llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll

   1 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s
   2
   3 ; SMIN (Single, x2)
   4
   5 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_single_x2_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm) {
   6 ; CHECK-LABEL: multi_vec_min_single_x2_s8:
   7 ; CHECK:       // %bb.0:
   8 ; CHECK-NEXT:    mov z5.d, z2.d
   9 ; CHECK-NEXT:    mov z4.d, z1.d
  10 ; CHECK-NEXT:    smin { z4.b, z5.b }, { z4.b, z5.b }, z3.b
  11 ; CHECK-NEXT:    mov z0.d, z4.d
  12 ; CHECK-NEXT:    mov z1.d, z5.d
  13 ; CHECK-NEXT:    ret
  14   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.smin.single.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm)
  15   ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
  16 }
  17
  18 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_single_x2_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm) {
  19 ; CHECK-LABEL: multi_vec_min_single_x2_s16:
  20 ; CHECK:       // %bb.0:
  21 ; CHECK-NEXT:    mov z5.d, z2.d
  22 ; CHECK-NEXT:    mov z4.d, z1.d
  23 ; CHECK-NEXT:    smin { z4.h, z5.h }, { z4.h, z5.h }, z3.h
  24 ; CHECK-NEXT:    mov z0.d, z4.d
  25 ; CHECK-NEXT:    mov z1.d, z5.d
  26 ; CHECK-NEXT:    ret
  27   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.smin.single.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm)
  28   ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
  29 }
  30
  31 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_single_x2_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm) {
  32 ; CHECK-LABEL: multi_vec_min_single_x2_s32:
  33 ; CHECK:       // %bb.0:
  34 ; CHECK-NEXT:    mov z5.d, z2.d
  35 ; CHECK-NEXT:    mov z4.d, z1.d
  36 ; CHECK-NEXT:    smin { z4.s, z5.s }, { z4.s, z5.s }, z3.s
  37 ; CHECK-NEXT:    mov z0.d, z4.d
  38 ; CHECK-NEXT:    mov z1.d, z5.d
  39 ; CHECK-NEXT:    ret
  40   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.smin.single.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm)
  41   ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
  42 }
  43
  44 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_single_x2_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm) {
  45 ; CHECK-LABEL: multi_vec_min_single_x2_s64:
  46 ; CHECK:       // %bb.0:
  47 ; CHECK-NEXT:    mov z5.d, z2.d
  48 ; CHECK-NEXT:    mov z4.d, z1.d
  49 ; CHECK-NEXT:    smin { z4.d, z5.d }, { z4.d, z5.d }, z3.d
  50 ; CHECK-NEXT:    mov z0.d, z4.d
  51 ; CHECK-NEXT:    mov z1.d, z5.d
  52 ; CHECK-NEXT:    ret
  53   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.smin.single.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm)
  54   ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
  55 }
  56
  57 ; UMIN (Single, x2)
  58
  59 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_single_x2_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm) {
  60 ; CHECK-LABEL: multi_vec_min_single_x2_u8:
  61 ; CHECK:       // %bb.0:
  62 ; CHECK-NEXT:    mov z5.d, z2.d
  63 ; CHECK-NEXT:    mov z4.d, z1.d
  64 ; CHECK-NEXT:    umin { z4.b, z5.b }, { z4.b, z5.b }, z3.b
  65 ; CHECK-NEXT:    mov z0.d, z4.d
  66 ; CHECK-NEXT:    mov z1.d, z5.d
  67 ; CHECK-NEXT:    ret
  68   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.umin.single.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm)
  69   ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
  70 }
  71
  72 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_single_x2_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm) {
  73 ; CHECK-LABEL: multi_vec_min_single_x2_u16:
  74 ; CHECK:       // %bb.0:
  75 ; CHECK-NEXT:    mov z5.d, z2.d
  76 ; CHECK-NEXT:    mov z4.d, z1.d
  77 ; CHECK-NEXT:    umin { z4.h, z5.h }, { z4.h, z5.h }, z3.h
  78 ; CHECK-NEXT:    mov z0.d, z4.d
  79 ; CHECK-NEXT:    mov z1.d, z5.d
  80 ; CHECK-NEXT:    ret
  81   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.umin.single.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm)
  82   ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
  83 }
  84
  85 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_single_x2_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm) {
  86 ; CHECK-LABEL: multi_vec_min_single_x2_u32:
  87 ; CHECK:       // %bb.0:
  88 ; CHECK-NEXT:    mov z5.d, z2.d
  89 ; CHECK-NEXT:    mov z4.d, z1.d
  90 ; CHECK-NEXT:    umin { z4.s, z5.s }, { z4.s, z5.s }, z3.s
  91 ; CHECK-NEXT:    mov z0.d, z4.d
  92 ; CHECK-NEXT:    mov z1.d, z5.d
  93 ; CHECK-NEXT:    ret
  94   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.umin.single.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm)
  95   ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
  96 }
  97
  98 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_single_x2_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm) {
  99 ; CHECK-LABEL: multi_vec_min_single_x2_u64:
 100 ; CHECK:       // %bb.0:
 101 ; CHECK-NEXT:    mov z5.d, z2.d
 102 ; CHECK-NEXT:    mov z4.d, z1.d
 103 ; CHECK-NEXT:    umin { z4.d, z5.d }, { z4.d, z5.d }, z3.d
 104 ; CHECK-NEXT:    mov z0.d, z4.d
 105 ; CHECK-NEXT:    mov z1.d, z5.d
 106 ; CHECK-NEXT:    ret
 107   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.umin.single.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm)
 108   ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
 109 }
 110
 111 ; FMIN (Single, x2)
 112
 113 define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_min_single_x2_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm) {
 114 ; CHECK-LABEL: multi_vec_min_single_x2_f16:
 115 ; CHECK:       // %bb.0:
 116 ; CHECK-NEXT:    mov z5.d, z2.d
 117 ; CHECK-NEXT:    mov z4.d, z1.d
 118 ; CHECK-NEXT:    fmin { z4.h, z5.h }, { z4.h, z5.h }, z3.h
 119 ; CHECK-NEXT:    mov z0.d, z4.d
 120 ; CHECK-NEXT:    mov z1.d, z5.d
 121 ; CHECK-NEXT:    ret
 122   %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmin.single.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm)
 123   ret { <vscale x 8 x half>, <vscale x 8 x half> } %res
 124 }
 125
 126 define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_min_single_x2_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm) {
 127 ; CHECK-LABEL: multi_vec_min_single_x2_f32:
 128 ; CHECK:       // %bb.0:
 129 ; CHECK-NEXT:    mov z5.d, z2.d
 130 ; CHECK-NEXT:    mov z4.d, z1.d
 131 ; CHECK-NEXT:    fmin { z4.s, z5.s }, { z4.s, z5.s }, z3.s
 132 ; CHECK-NEXT:    mov z0.d, z4.d
 133 ; CHECK-NEXT:    mov z1.d, z5.d
 134 ; CHECK-NEXT:    ret
 135   %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmin.single.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm)
 136   ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
 137 }
 138
 139 define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_min_single_x2_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm) {
 140 ; CHECK-LABEL: multi_vec_min_single_x2_f64:
 141 ; CHECK:       // %bb.0:
 142 ; CHECK-NEXT:    mov z5.d, z2.d
 143 ; CHECK-NEXT:    mov z4.d, z1.d
 144 ; CHECK-NEXT:    fmin { z4.d, z5.d }, { z4.d, z5.d }, z3.d
 145 ; CHECK-NEXT:    mov z0.d, z4.d
 146 ; CHECK-NEXT:    mov z1.d, z5.d
 147 ; CHECK-NEXT:    ret
 148   %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmin.single.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm)
 149   ret { <vscale x 2 x double>, <vscale x 2 x double> } %res
 150 }
 151
 152 ; SMIN (Single, x4)
 153
 154 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
 155 @multi_vec_min_single_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm) {
 156 ; CHECK-LABEL: multi_vec_min_single_x4_s8:
 157 ; CHECK:       // %bb.0:
 158 ; CHECK-NEXT:    mov z27.d, z4.d
 159 ; CHECK-NEXT:    mov z26.d, z3.d
 160 ; CHECK-NEXT:    mov z25.d, z2.d
 161 ; CHECK-NEXT:    mov z24.d, z1.d
 162 ; CHECK-NEXT:    smin { z24.b - z27.b }, { z24.b - z27.b }, z5.b
 163 ; CHECK-NEXT:    mov z0.d, z24.d
 164 ; CHECK-NEXT:    mov z1.d, z25.d
 165 ; CHECK-NEXT:    mov z2.d, z26.d
 166 ; CHECK-NEXT:    mov z3.d, z27.d
 167 ; CHECK-NEXT:    ret
 168   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
 169               @llvm.aarch64.sve.smin.single.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm)
 170   ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
 171 }
 172
 173 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
 174 @multi_vec_min_single_x4_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm) {
 175 ; CHECK-LABEL: multi_vec_min_single_x4_s16:
 176 ; CHECK:       // %bb.0:
 177 ; CHECK-NEXT:    mov z27.d, z4.d
 178 ; CHECK-NEXT:    mov z26.d, z3.d
 179 ; CHECK-NEXT:    mov z25.d, z2.d
 180 ; CHECK-NEXT:    mov z24.d, z1.d
 181 ; CHECK-NEXT:    smin { z24.h - z27.h }, { z24.h - z27.h }, z5.h
 182 ; CHECK-NEXT:    mov z0.d, z24.d
 183 ; CHECK-NEXT:    mov z1.d, z25.d
 184 ; CHECK-NEXT:    mov z2.d, z26.d
 185 ; CHECK-NEXT:    mov z3.d, z27.d
 186 ; CHECK-NEXT:    ret
 187   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
 188               @llvm.aarch64.sve.smin.single.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm)
 189   ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
 190 }
 191
 192 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
 193 @multi_vec_min_single_x4_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm) {
 194 ; CHECK-LABEL: multi_vec_min_single_x4_s32:
 195 ; CHECK:       // %bb.0:
 196 ; CHECK-NEXT:    mov z27.d, z4.d
 197 ; CHECK-NEXT:    mov z26.d, z3.d
 198 ; CHECK-NEXT:    mov z25.d, z2.d
 199 ; CHECK-NEXT:    mov z24.d, z1.d
 200 ; CHECK-NEXT:    smin { z24.s - z27.s }, { z24.s - z27.s }, z5.s
 201 ; CHECK-NEXT:    mov z0.d, z24.d
 202 ; CHECK-NEXT:    mov z1.d, z25.d
 203 ; CHECK-NEXT:    mov z2.d, z26.d
 204 ; CHECK-NEXT:    mov z3.d, z27.d
 205 ; CHECK-NEXT:    ret
 206   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
 207               @llvm.aarch64.sve.smin.single.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm)
 208   ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
 209 }
 210
 211 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
 212 @multi_vec_min_single_x4_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm) {
 213 ; CHECK-LABEL: multi_vec_min_single_x4_s64:
 214 ; CHECK:       // %bb.0:
 215 ; CHECK-NEXT:    mov z27.d, z4.d
 216 ; CHECK-NEXT:    mov z26.d, z3.d
 217 ; CHECK-NEXT:    mov z25.d, z2.d
 218 ; CHECK-NEXT:    mov z24.d, z1.d
 219 ; CHECK-NEXT:    smin { z24.d - z27.d }, { z24.d - z27.d }, z5.d
 220 ; CHECK-NEXT:    mov z0.d, z24.d
 221 ; CHECK-NEXT:    mov z1.d, z25.d
 222 ; CHECK-NEXT:    mov z2.d, z26.d
 223 ; CHECK-NEXT:    mov z3.d, z27.d
 224 ; CHECK-NEXT:    ret
 225   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
 226               @llvm.aarch64.sve.smin.single.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm)
 227   ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
 228 }
 229
 230 ; UMIN (Single, x4)
 231
 232 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
 233 @multi_vec_min_single_x4_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm) {
 234 ; CHECK-LABEL: multi_vec_min_single_x4_u8:
 235 ; CHECK:       // %bb.0:
 236 ; CHECK-NEXT:    mov z27.d, z4.d
 237 ; CHECK-NEXT:    mov z26.d, z3.d
 238 ; CHECK-NEXT:    mov z25.d, z2.d
 239 ; CHECK-NEXT:    mov z24.d, z1.d
 240 ; CHECK-NEXT:    umin { z24.b - z27.b }, { z24.b - z27.b }, z5.b
 241 ; CHECK-NEXT:    mov z0.d, z24.d
 242 ; CHECK-NEXT:    mov z1.d, z25.d
 243 ; CHECK-NEXT:    mov z2.d, z26.d
 244 ; CHECK-NEXT:    mov z3.d, z27.d
 245 ; CHECK-NEXT:    ret
 246   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
 247               @llvm.aarch64.sve.umin.single.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm)
 248   ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
 249 }
 250
 251 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
 252 @multi_vec_min_single_x4_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm) {
 253 ; CHECK-LABEL: multi_vec_min_single_x4_u16:
 254 ; CHECK:       // %bb.0:
 255 ; CHECK-NEXT:    mov z27.d, z4.d
 256 ; CHECK-NEXT:    mov z26.d, z3.d
 257 ; CHECK-NEXT:    mov z25.d, z2.d
 258 ; CHECK-NEXT:    mov z24.d, z1.d
 259 ; CHECK-NEXT:    umin { z24.h - z27.h }, { z24.h - z27.h }, z5.h
 260 ; CHECK-NEXT:    mov z0.d, z24.d
 261 ; CHECK-NEXT:    mov z1.d, z25.d
 262 ; CHECK-NEXT:    mov z2.d, z26.d
 263 ; CHECK-NEXT:    mov z3.d, z27.d
 264 ; CHECK-NEXT:    ret
 265   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
 266               @llvm.aarch64.sve.umin.single.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm)
 267   ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
 268 }
 269
 270 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
 271 @multi_vec_min_single_x4_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm) {
 272 ; CHECK-LABEL: multi_vec_min_single_x4_u32:
 273 ; CHECK:       // %bb.0:
 274 ; CHECK-NEXT:    mov z27.d, z4.d
 275 ; CHECK-NEXT:    mov z26.d, z3.d
 276 ; CHECK-NEXT:    mov z25.d, z2.d
 277 ; CHECK-NEXT:    mov z24.d, z1.d
 278 ; CHECK-NEXT:    umin { z24.s - z27.s }, { z24.s - z27.s }, z5.s
 279 ; CHECK-NEXT:    mov z0.d, z24.d
 280 ; CHECK-NEXT:    mov z1.d, z25.d
 281 ; CHECK-NEXT:    mov z2.d, z26.d
 282 ; CHECK-NEXT:    mov z3.d, z27.d
 283 ; CHECK-NEXT:    ret
 284   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
 285               @llvm.aarch64.sve.umin.single.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm)
 286   ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
 287 }
 288
 289 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
 290 @multi_vec_min_single_x4_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm) {
 291 ; CHECK-LABEL: multi_vec_min_single_x4_u64:
 292 ; CHECK:       // %bb.0:
 293 ; CHECK-NEXT:    mov z27.d, z4.d
 294 ; CHECK-NEXT:    mov z26.d, z3.d
 295 ; CHECK-NEXT:    mov z25.d, z2.d
 296 ; CHECK-NEXT:    mov z24.d, z1.d
 297 ; CHECK-NEXT:    umin { z24.d - z27.d }, { z24.d - z27.d }, z5.d
 298 ; CHECK-NEXT:    mov z0.d, z24.d
 299 ; CHECK-NEXT:    mov z1.d, z25.d
 300 ; CHECK-NEXT:    mov z2.d, z26.d
 301 ; CHECK-NEXT:    mov z3.d, z27.d
 302 ; CHECK-NEXT:    ret
 303   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
 304               @llvm.aarch64.sve.umin.single.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm)
 305   ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
 306 }
 307
 308 ; FMIN (SINGLE, x4)
 309
 310 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
 311 @multi_vec_min_single_x4_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm) {
 312 ; CHECK-LABEL: multi_vec_min_single_x4_f16:
 313 ; CHECK:       // %bb.0:
 314 ; CHECK-NEXT:    mov z27.d, z4.d
 315 ; CHECK-NEXT:    mov z26.d, z3.d
 316 ; CHECK-NEXT:    mov z25.d, z2.d
 317 ; CHECK-NEXT:    mov z24.d, z1.d
 318 ; CHECK-NEXT:    fmin { z24.h - z27.h }, { z24.h - z27.h }, z5.h
 319 ; CHECK-NEXT:    mov z0.d, z24.d
 320 ; CHECK-NEXT:    mov z1.d, z25.d
 321 ; CHECK-NEXT:    mov z2.d, z26.d
 322 ; CHECK-NEXT:    mov z3.d, z27.d
 323 ; CHECK-NEXT:    ret
 324   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
 325               @llvm.aarch64.sve.fmin.single.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm)
 326   ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res
 327 }
 328
 329 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
 330 @multi_vec_min_single_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm) {
 331 ; CHECK-LABEL: multi_vec_min_single_x4_f32:
 332 ; CHECK:       // %bb.0:
 333 ; CHECK-NEXT:    mov z27.d, z4.d
 334 ; CHECK-NEXT:    mov z26.d, z3.d
 335 ; CHECK-NEXT:    mov z25.d, z2.d
 336 ; CHECK-NEXT:    mov z24.d, z1.d
 337 ; CHECK-NEXT:    fmin { z24.s - z27.s }, { z24.s - z27.s }, z5.s
 338 ; CHECK-NEXT:    mov z0.d, z24.d
 339 ; CHECK-NEXT:    mov z1.d, z25.d
 340 ; CHECK-NEXT:    mov z2.d, z26.d
 341 ; CHECK-NEXT:    mov z3.d, z27.d
 342 ; CHECK-NEXT:    ret
 343   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
 344               @llvm.aarch64.sve.fmin.single.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm)
 345   ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
 346 }
 347
 348 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
 349 @multi_vec_min_single_x4_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm) {
 350 ; CHECK-LABEL: multi_vec_min_single_x4_f64:
 351 ; CHECK:       // %bb.0:
 352 ; CHECK-NEXT:    mov z27.d, z4.d
 353 ; CHECK-NEXT:    mov z26.d, z3.d
 354 ; CHECK-NEXT:    mov z25.d, z2.d
 355 ; CHECK-NEXT:    mov z24.d, z1.d
 356 ; CHECK-NEXT:    fmin { z24.d - z27.d }, { z24.d - z27.d }, z5.d
 357 ; CHECK-NEXT:    mov z0.d, z24.d
 358 ; CHECK-NEXT:    mov z1.d, z25.d
 359 ; CHECK-NEXT:    mov z2.d, z26.d
 360 ; CHECK-NEXT:    mov z3.d, z27.d
 361 ; CHECK-NEXT:    ret
 362   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
 363               @llvm.aarch64.sve.fmin.single.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm)
 364   ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
 365 }
 366
 367 ; SMIN (Multi, x2)
 368
 369 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_multi_x2_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) {
 370 ; CHECK-LABEL: multi_vec_min_multi_x2_s8:
 371 ; CHECK:       // %bb.0:
 372 ; CHECK-NEXT:    mov z7.d, z4.d
 373 ; CHECK-NEXT:    mov z5.d, z2.d
 374 ; CHECK-NEXT:    mov z6.d, z3.d
 375 ; CHECK-NEXT:    mov z4.d, z1.d
 376 ; CHECK-NEXT:    smin { z4.b, z5.b }, { z4.b, z5.b }, { z6.b, z7.b }
 377 ; CHECK-NEXT:    mov z0.d, z4.d
 378 ; CHECK-NEXT:    mov z1.d, z5.d
 379 ; CHECK-NEXT:    ret
 380   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.smin.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2)
 381   ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
 382 }
 383
 384 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_multi_x2_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) {
 385 ; CHECK-LABEL: multi_vec_min_multi_x2_s16:
 386 ; CHECK:       // %bb.0:
 387 ; CHECK-NEXT:    mov z7.d, z4.d
 388 ; CHECK-NEXT:    mov z5.d, z2.d
 389 ; CHECK-NEXT:    mov z6.d, z3.d
 390 ; CHECK-NEXT:    mov z4.d, z1.d
 391 ; CHECK-NEXT:    smin { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h }
 392 ; CHECK-NEXT:    mov z0.d, z4.d
 393 ; CHECK-NEXT:    mov z1.d, z5.d
 394 ; CHECK-NEXT:    ret
 395   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.smin.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
 396   ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
 397 }
 398
 399 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_multi_x2_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) {
 400 ; CHECK-LABEL: multi_vec_min_multi_x2_s32:
 401 ; CHECK:       // %bb.0:
 402 ; CHECK-NEXT:    mov z7.d, z4.d
 403 ; CHECK-NEXT:    mov z5.d, z2.d
 404 ; CHECK-NEXT:    mov z6.d, z3.d
 405 ; CHECK-NEXT:    mov z4.d, z1.d
 406 ; CHECK-NEXT:    smin { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s }
 407 ; CHECK-NEXT:    mov z0.d, z4.d
 408 ; CHECK-NEXT:    mov z1.d, z5.d
 409 ; CHECK-NEXT:    ret
 410   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.smin.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2)
 411   ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
 412 }
 413
 414 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_multi_x2_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) {
 415 ; CHECK-LABEL: multi_vec_min_multi_x2_s64:
 416 ; CHECK:       // %bb.0:
 417 ; CHECK-NEXT:    mov z7.d, z4.d
 418 ; CHECK-NEXT:    mov z5.d, z2.d
 419 ; CHECK-NEXT:    mov z6.d, z3.d
 420 ; CHECK-NEXT:    mov z4.d, z1.d
 421 ; CHECK-NEXT:    smin { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d }
 422 ; CHECK-NEXT:    mov z0.d, z4.d
 423 ; CHECK-NEXT:    mov z1.d, z5.d
 424 ; CHECK-NEXT:    ret
 425   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.smin.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2)
 426   ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
 427 }
 428
 429 ; UMIN (Multi, x2)
 430
 431 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_multi_x2_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) {
 432 ; CHECK-LABEL: multi_vec_min_multi_x2_u8:
 433 ; CHECK:       // %bb.0:
 434 ; CHECK-NEXT:    mov z7.d, z4.d
 435 ; CHECK-NEXT:    mov z5.d, z2.d
 436 ; CHECK-NEXT:    mov z6.d, z3.d
 437 ; CHECK-NEXT:    mov z4.d, z1.d
 438 ; CHECK-NEXT:    umin { z4.b, z5.b }, { z4.b, z5.b }, { z6.b, z7.b }
 439 ; CHECK-NEXT:    mov z0.d, z4.d
 440 ; CHECK-NEXT:    mov z1.d, z5.d
 441 ; CHECK-NEXT:    ret
 442   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.umin.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2)
 443   ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
 444 }
 445
 446 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_multi_x2_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) {
 447 ; CHECK-LABEL: multi_vec_min_multi_x2_u16:
 448 ; CHECK:       // %bb.0:
 449 ; CHECK-NEXT:    mov z7.d, z4.d
 450 ; CHECK-NEXT:    mov z5.d, z2.d
 451 ; CHECK-NEXT:    mov z6.d, z3.d
 452 ; CHECK-NEXT:    mov z4.d, z1.d
 453 ; CHECK-NEXT:    umin { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h }
 454 ; CHECK-NEXT:    mov z0.d, z4.d
 455 ; CHECK-NEXT:    mov z1.d, z5.d
 456 ; CHECK-NEXT:    ret
 457   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.umin.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
 458   ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
 459 }
 460
 461 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_multi_x2_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) {
 462 ; CHECK-LABEL: multi_vec_min_multi_x2_u32:
 463 ; CHECK:       // %bb.0:
 464 ; CHECK-NEXT:    mov z7.d, z4.d
 465 ; CHECK-NEXT:    mov z5.d, z2.d
 466 ; CHECK-NEXT:    mov z6.d, z3.d
 467 ; CHECK-NEXT:    mov z4.d, z1.d
 468 ; CHECK-NEXT:    umin { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s }
 469 ; CHECK-NEXT:    mov z0.d, z4.d
 470 ; CHECK-NEXT:    mov z1.d, z5.d
 471 ; CHECK-NEXT:    ret
 472   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.umin.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2)
 473   ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
 474 }
 475
 476 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_multi_x2_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) {
 477 ; CHECK-LABEL: multi_vec_min_multi_x2_u64:
 478 ; CHECK:       // %bb.0:
 479 ; CHECK-NEXT:    mov z7.d, z4.d
 480 ; CHECK-NEXT:    mov z5.d, z2.d
 481 ; CHECK-NEXT:    mov z6.d, z3.d
 482 ; CHECK-NEXT:    mov z4.d, z1.d
 483 ; CHECK-NEXT:    umin { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d }
 484 ; CHECK-NEXT:    mov z0.d, z4.d
 485 ; CHECK-NEXT:    mov z1.d, z5.d
 486 ; CHECK-NEXT:    ret
 487   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.umin.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2)
 488   ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
 489 }
 490
 491 ; FMIN (Multi, x2)
 492
 493 define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_min_multi_x2_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2) {
 494 ; CHECK-LABEL: multi_vec_min_multi_x2_f16:
 495 ; CHECK:       // %bb.0:
 496 ; CHECK-NEXT:    mov z7.d, z4.d
 497 ; CHECK-NEXT:    mov z5.d, z2.d
 498 ; CHECK-NEXT:    mov z6.d, z3.d
 499 ; CHECK-NEXT:    mov z4.d, z1.d
 500 ; CHECK-NEXT:    fmin { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h }
 501 ; CHECK-NEXT:    mov z0.d, z4.d
 502 ; CHECK-NEXT:    mov z1.d, z5.d
 503 ; CHECK-NEXT:    ret
 504   %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmin.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2)
 505   ret { <vscale x 8 x half>, <vscale x 8 x half> } %res
 506 }
 507
 508 define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_min_multi_x2_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2) {
 509 ; CHECK-LABEL: multi_vec_min_multi_x2_f32:
 510 ; CHECK:       // %bb.0:
 511 ; CHECK-NEXT:    mov z7.d, z4.d
 512 ; CHECK-NEXT:    mov z5.d, z2.d
 513 ; CHECK-NEXT:    mov z6.d, z3.d
 514 ; CHECK-NEXT:    mov z4.d, z1.d
 515 ; CHECK-NEXT:    fmin { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s }
 516 ; CHECK-NEXT:    mov z0.d, z4.d
 517 ; CHECK-NEXT:    mov z1.d, z5.d
 518 ; CHECK-NEXT:    ret
 519   %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmin.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2)
 520   ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
 521 }
 522
 523 define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_min_multi_x2_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2) {
 524 ; CHECK-LABEL: multi_vec_min_multi_x2_f64:
 525 ; CHECK:       // %bb.0:
 526 ; CHECK-NEXT:    mov z7.d, z4.d
 527 ; CHECK-NEXT:    mov z5.d, z2.d
 528 ; CHECK-NEXT:    mov z6.d, z3.d
 529 ; CHECK-NEXT:    mov z4.d, z1.d
 530 ; CHECK-NEXT:    fmin { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d }
 531 ; CHECK-NEXT:    mov z0.d, z4.d
 532 ; CHECK-NEXT:    mov z1.d, z5.d
 533 ; CHECK-NEXT:    ret
 534   %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmin.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2)
 535   ret { <vscale x 2 x double>, <vscale x 2 x double> } %res
 536 }
 537
 538 ; SMIN (Multi, x4)
 539
 540 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
 541 @multi_vec_min_multi_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
 542                            <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
 543 ; CHECK-LABEL: multi_vec_min_multi_x4_s8:
 544 ; CHECK:       // %bb.0:
 545 ; CHECK-NEXT:    ptrue p0.b
 546 ; CHECK-NEXT:    mov z30.d, z7.d
 547 ; CHECK-NEXT:    mov z27.d, z4.d
 548 ; CHECK-NEXT:    mov z29.d, z6.d
 549 ; CHECK-NEXT:    mov z26.d, z3.d
 550 ; CHECK-NEXT:    mov z28.d, z5.d
 551 ; CHECK-NEXT:    mov z25.d, z2.d
 552 ; CHECK-NEXT:    mov z24.d, z1.d
 553 ; CHECK-NEXT:    ld1b { z31.b }, p0/z, [x0]
 554 ; CHECK-NEXT:    smin { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
 555 ; CHECK-NEXT:    mov z0.d, z24.d
 556 ; CHECK-NEXT:    mov z1.d, z25.d
 557 ; CHECK-NEXT:    mov z2.d, z26.d
 558 ; CHECK-NEXT:    mov z3.d, z27.d
 559 ; CHECK-NEXT:    ret
 560   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
 561               @llvm.aarch64.sve.smin.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
 562                                                 <vscale x 16 x i8> %zm1,  <vscale x 16 x i8> %zm2,  <vscale x 16 x i8> %zm3,  <vscale x 16 x i8> %zm4)
 563   ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
 564 }
 565
 566 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
 567 @multi_vec_min_multi_x4_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
 568                             <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
 569 ; CHECK-LABEL: multi_vec_min_multi_x4_s16:
 570 ; CHECK:       // %bb.0:
 571 ; CHECK-NEXT:    ptrue p0.h
 572 ; CHECK-NEXT:    mov z30.d, z7.d
 573 ; CHECK-NEXT:    mov z27.d, z4.d
 574 ; CHECK-NEXT:    mov z29.d, z6.d
 575 ; CHECK-NEXT:    mov z26.d, z3.d
 576 ; CHECK-NEXT:    mov z28.d, z5.d
 577 ; CHECK-NEXT:    mov z25.d, z2.d
 578 ; CHECK-NEXT:    mov z24.d, z1.d
 579 ; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
 580 ; CHECK-NEXT:    smin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
 581 ; CHECK-NEXT:    mov z0.d, z24.d
 582 ; CHECK-NEXT:    mov z1.d, z25.d
 583 ; CHECK-NEXT:    mov z2.d, z26.d
 584 ; CHECK-NEXT:    mov z3.d, z27.d
 585 ; CHECK-NEXT:    ret
 586   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
 587               @llvm.aarch64.sve.smin.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
 588                                                 <vscale x 8 x i16> %zm1,  <vscale x 8 x i16> %zm2,  <vscale x 8 x i16> %zm3,  <vscale x 8 x i16> %zm4)
 589   ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
 590 }
 591
 592 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
 593 @multi_vec_min_multi_x4_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
 594                             <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
 595 ; CHECK-LABEL: multi_vec_min_multi_x4_s32:
 596 ; CHECK:       // %bb.0:
 597 ; CHECK-NEXT:    ptrue p0.s
 598 ; CHECK-NEXT:    mov z30.d, z7.d
 599 ; CHECK-NEXT:    mov z27.d, z4.d
 600 ; CHECK-NEXT:    mov z29.d, z6.d
 601 ; CHECK-NEXT:    mov z26.d, z3.d
 602 ; CHECK-NEXT:    mov z28.d, z5.d
 603 ; CHECK-NEXT:    mov z25.d, z2.d
 604 ; CHECK-NEXT:    mov z24.d, z1.d
 605 ; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
 606 ; CHECK-NEXT:    smin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
 607 ; CHECK-NEXT:    mov z0.d, z24.d
 608 ; CHECK-NEXT:    mov z1.d, z25.d
 609 ; CHECK-NEXT:    mov z2.d, z26.d
 610 ; CHECK-NEXT:    mov z3.d, z27.d
 611 ; CHECK-NEXT:    ret
 612   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
 613               @llvm.aarch64.sve.smin.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
 614                                                 <vscale x 4 x i32> %zm1,  <vscale x 4 x i32> %zm2,  <vscale x 4 x i32> %zm3,  <vscale x 4 x i32> %zm4)
 615   ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
 616 }
 617
 618 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
 619 @multi_vec_min_multi_x4_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
 620                             <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
 621 ; CHECK-LABEL: multi_vec_min_multi_x4_s64:
 622 ; CHECK:       // %bb.0:
 623 ; CHECK-NEXT:    ptrue p0.d
 624 ; CHECK-NEXT:    mov z30.d, z7.d
 625 ; CHECK-NEXT:    mov z27.d, z4.d
 626 ; CHECK-NEXT:    mov z29.d, z6.d
 627 ; CHECK-NEXT:    mov z26.d, z3.d
 628 ; CHECK-NEXT:    mov z28.d, z5.d
 629 ; CHECK-NEXT:    mov z25.d, z2.d
 630 ; CHECK-NEXT:    mov z24.d, z1.d
 631 ; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
 632 ; CHECK-NEXT:    smin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
 633 ; CHECK-NEXT:    mov z0.d, z24.d
 634 ; CHECK-NEXT:    mov z1.d, z25.d
 635 ; CHECK-NEXT:    mov z2.d, z26.d
 636 ; CHECK-NEXT:    mov z3.d, z27.d
 637 ; CHECK-NEXT:    ret
 638   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
 639               @llvm.aarch64.sve.smin.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
 640                                                 <vscale x 2 x i64> %zm1,  <vscale x 2 x i64> %zm2,  <vscale x 2 x i64> %zm3,  <vscale x 2 x i64> %zm4)
 641   ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
 642 }
 643
 644 ; UMIN (Multi, x4)
 645
 646 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
 647 @multi_vec_min_multi_x4_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
 648                            <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
 649 ; CHECK-LABEL: multi_vec_min_multi_x4_u8:
 650 ; CHECK:       // %bb.0:
 651 ; CHECK-NEXT:    ptrue p0.b
 652 ; CHECK-NEXT:    mov z30.d, z7.d
 653 ; CHECK-NEXT:    mov z27.d, z4.d
 654 ; CHECK-NEXT:    mov z29.d, z6.d
 655 ; CHECK-NEXT:    mov z26.d, z3.d
 656 ; CHECK-NEXT:    mov z28.d, z5.d
 657 ; CHECK-NEXT:    mov z25.d, z2.d
 658 ; CHECK-NEXT:    mov z24.d, z1.d
 659 ; CHECK-NEXT:    ld1b { z31.b }, p0/z, [x0]
 660 ; CHECK-NEXT:    umin { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
 661 ; CHECK-NEXT:    mov z0.d, z24.d
 662 ; CHECK-NEXT:    mov z1.d, z25.d
 663 ; CHECK-NEXT:    mov z2.d, z26.d
 664 ; CHECK-NEXT:    mov z3.d, z27.d
 665 ; CHECK-NEXT:    ret
 666   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
 667               @llvm.aarch64.sve.umin.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
 668                                                 <vscale x 16 x i8> %zm1,  <vscale x 16 x i8> %zm2,  <vscale x 16 x i8> %zm3,  <vscale x 16 x i8> %zm4)
 669   ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
 670 }
 671
 672 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
 673 @multi_vec_min_multi_x4_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
 674                             <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
 675 ; CHECK-LABEL: multi_vec_min_multi_x4_u16:
 676 ; CHECK:       // %bb.0:
 677 ; CHECK-NEXT:    ptrue p0.h
 678 ; CHECK-NEXT:    mov z30.d, z7.d
 679 ; CHECK-NEXT:    mov z27.d, z4.d
 680 ; CHECK-NEXT:    mov z29.d, z6.d
 681 ; CHECK-NEXT:    mov z26.d, z3.d
 682 ; CHECK-NEXT:    mov z28.d, z5.d
 683 ; CHECK-NEXT:    mov z25.d, z2.d
 684 ; CHECK-NEXT:    mov z24.d, z1.d
 685 ; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
 686 ; CHECK-NEXT:    umin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
 687 ; CHECK-NEXT:    mov z0.d, z24.d
 688 ; CHECK-NEXT:    mov z1.d, z25.d
 689 ; CHECK-NEXT:    mov z2.d, z26.d
 690 ; CHECK-NEXT:    mov z3.d, z27.d
 691 ; CHECK-NEXT:    ret
 692   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
 693               @llvm.aarch64.sve.umin.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
 694                                                 <vscale x 8 x i16> %zm1,  <vscale x 8 x i16> %zm2,  <vscale x 8 x i16> %zm3,  <vscale x 8 x i16> %zm4)
 695   ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
 696 }
 697
 698 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
 699 @multi_vec_min_multi_x4_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
 700                             <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
 701 ; CHECK-LABEL: multi_vec_min_multi_x4_u32:
 702 ; CHECK:       // %bb.0:
 703 ; CHECK-NEXT:    ptrue p0.s
 704 ; CHECK-NEXT:    mov z30.d, z7.d
 705 ; CHECK-NEXT:    mov z27.d, z4.d
 706 ; CHECK-NEXT:    mov z29.d, z6.d
 707 ; CHECK-NEXT:    mov z26.d, z3.d
 708 ; CHECK-NEXT:    mov z28.d, z5.d
 709 ; CHECK-NEXT:    mov z25.d, z2.d
 710 ; CHECK-NEXT:    mov z24.d, z1.d
 711 ; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
 712 ; CHECK-NEXT:    umin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
 713 ; CHECK-NEXT:    mov z0.d, z24.d
 714 ; CHECK-NEXT:    mov z1.d, z25.d
 715 ; CHECK-NEXT:    mov z2.d, z26.d
 716 ; CHECK-NEXT:    mov z3.d, z27.d
 717 ; CHECK-NEXT:    ret
 718   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
 719               @llvm.aarch64.sve.umin.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
 720                                                 <vscale x 4 x i32> %zm1,  <vscale x 4 x i32> %zm2,  <vscale x 4 x i32> %zm3,  <vscale x 4 x i32> %zm4)
 721   ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
 722 }
 723
 724 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
 725 @multi_vec_min_multi_x4_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
 726                             <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
 727 ; CHECK-LABEL: multi_vec_min_multi_x4_u64:
 728 ; CHECK:       // %bb.0:
 729 ; CHECK-NEXT:    ptrue p0.d
 730 ; CHECK-NEXT:    mov z30.d, z7.d
 731 ; CHECK-NEXT:    mov z27.d, z4.d
 732 ; CHECK-NEXT:    mov z29.d, z6.d
 733 ; CHECK-NEXT:    mov z26.d, z3.d
 734 ; CHECK-NEXT:    mov z28.d, z5.d
 735 ; CHECK-NEXT:    mov z25.d, z2.d
 736 ; CHECK-NEXT:    mov z24.d, z1.d
 737 ; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
 738 ; CHECK-NEXT:    umin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
 739 ; CHECK-NEXT:    mov z0.d, z24.d
 740 ; CHECK-NEXT:    mov z1.d, z25.d
 741 ; CHECK-NEXT:    mov z2.d, z26.d
 742 ; CHECK-NEXT:    mov z3.d, z27.d
 743 ; CHECK-NEXT:    ret
 744   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
 745               @llvm.aarch64.sve.umin.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
 746                                                 <vscale x 2 x i64> %zm1,  <vscale x 2 x i64> %zm2,  <vscale x 2 x i64> %zm3,  <vscale x 2 x i64> %zm4)
 747   ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
 748 }
 749
 750 ; FMIN (Multi, x4)
 751
 752 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
 753 @multi_vec_min_multi_x4_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
 754                             <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
 755 ; CHECK-LABEL: multi_vec_min_multi_x4_f16:
 756 ; CHECK:       // %bb.0:
 757 ; CHECK-NEXT:    ptrue p0.h
 758 ; CHECK-NEXT:    mov z30.d, z7.d
 759 ; CHECK-NEXT:    mov z27.d, z4.d
 760 ; CHECK-NEXT:    mov z29.d, z6.d
 761 ; CHECK-NEXT:    mov z26.d, z3.d
 762 ; CHECK-NEXT:    mov z28.d, z5.d
 763 ; CHECK-NEXT:    mov z25.d, z2.d
 764 ; CHECK-NEXT:    mov z24.d, z1.d
 765 ; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
 766 ; CHECK-NEXT:    fmin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
 767 ; CHECK-NEXT:    mov z0.d, z24.d
 768 ; CHECK-NEXT:    mov z1.d, z25.d
 769 ; CHECK-NEXT:    mov z2.d, z26.d
 770 ; CHECK-NEXT:    mov z3.d, z27.d
 771 ; CHECK-NEXT:    ret
 772   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
 773               @llvm.aarch64.sve.fmin.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
 774                                                 <vscale x 8 x half> %zm1,  <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4)
 775   ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res
 776 }
 777
 778 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
 779 @multi_vec_min_multi_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
 780                             <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
 781 ; CHECK-LABEL: multi_vec_min_multi_x4_f32:
 782 ; CHECK:       // %bb.0:
 783 ; CHECK-NEXT:    ptrue p0.s
 784 ; CHECK-NEXT:    mov z30.d, z7.d
 785 ; CHECK-NEXT:    mov z27.d, z4.d
 786 ; CHECK-NEXT:    mov z29.d, z6.d
 787 ; CHECK-NEXT:    mov z26.d, z3.d
 788 ; CHECK-NEXT:    mov z28.d, z5.d
 789 ; CHECK-NEXT:    mov z25.d, z2.d
 790 ; CHECK-NEXT:    mov z24.d, z1.d
 791 ; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
 792 ; CHECK-NEXT:    fmin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
 793 ; CHECK-NEXT:    mov z0.d, z24.d
 794 ; CHECK-NEXT:    mov z1.d, z25.d
 795 ; CHECK-NEXT:    mov z2.d, z26.d
 796 ; CHECK-NEXT:    mov z3.d, z27.d
 797 ; CHECK-NEXT:    ret
 798   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
 799               @llvm.aarch64.sve.fmin.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
 800                                                 <vscale x 4 x float> %zm1,  <vscale x 4 x float> %zm2,  <vscale x 4 x float> %zm3,  <vscale x 4 x float> %zm4)
 801   ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
 802 }
 803
 804 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
 805 @multi_vec_min_multi_x4_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
 806                             <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
 807 ; CHECK-LABEL: multi_vec_min_multi_x4_f64:
 808 ; CHECK:       // %bb.0:
 809 ; CHECK-NEXT:    ptrue p0.d
 810 ; CHECK-NEXT:    mov z30.d, z7.d
 811 ; CHECK-NEXT:    mov z27.d, z4.d
 812 ; CHECK-NEXT:    mov z29.d, z6.d
 813 ; CHECK-NEXT:    mov z26.d, z3.d
 814 ; CHECK-NEXT:    mov z28.d, z5.d
 815 ; CHECK-NEXT:    mov z25.d, z2.d
 816 ; CHECK-NEXT:    mov z24.d, z1.d
 817 ; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
 818 ; CHECK-NEXT:    fmin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
 819 ; CHECK-NEXT:    mov z0.d, z24.d
 820 ; CHECK-NEXT:    mov z1.d, z25.d
 821 ; CHECK-NEXT:    mov z2.d, z26.d
 822 ; CHECK-NEXT:    mov z3.d, z27.d
 823 ; CHECK-NEXT:    ret
 824   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
 825               @llvm.aarch64.sve.fmin.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
 826                                                 <vscale x 2 x double> %zm1,  <vscale x 2 x double> %zm2,  <vscale x 2 x double> %zm3,  <vscale x 2 x double> %zm4)
 827   ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
 828 }
 829
 830 ; FMINNM (Single, x2)
 831
 832 define { <vscale x 8 x half>, <vscale x 8 x half> }  @multi_vec_minnm_single_x2_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm) {
 833 ; CHECK-LABEL: multi_vec_minnm_single_x2_f16:
 834 ; CHECK:       // %bb.0:
 835 ; CHECK-NEXT:    mov z5.d, z2.d
 836 ; CHECK-NEXT:    mov z4.d, z1.d
 837 ; CHECK-NEXT:    fminnm { z4.h, z5.h }, { z4.h, z5.h }, z3.h
 838 ; CHECK-NEXT:    mov z0.d, z4.d
 839 ; CHECK-NEXT:    mov z1.d, z5.d
 840 ; CHECK-NEXT:    ret
 841   %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fminnm.single.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm)
 842   ret { <vscale x 8 x half>, <vscale x 8 x half> } %res
 843 }
 844
 845 define { <vscale x 4 x float>, <vscale x 4 x float> }  @multi_vec_minnm_single_x2_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm) {
 846 ; CHECK-LABEL: multi_vec_minnm_single_x2_f32:
 847 ; CHECK:       // %bb.0:
 848 ; CHECK-NEXT:    mov z5.d, z2.d
 849 ; CHECK-NEXT:    mov z4.d, z1.d
 850 ; CHECK-NEXT:    fminnm { z4.s, z5.s }, { z4.s, z5.s }, z3.s
 851 ; CHECK-NEXT:    mov z0.d, z4.d
 852 ; CHECK-NEXT:    mov z1.d, z5.d
 853 ; CHECK-NEXT:    ret
 854   %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fminnm.single.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm)
 855   ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
 856 }
 857
 858 define { <vscale x 2 x double>, <vscale x 2 x double> }  @multi_vec_minnm_single_x2_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm) {
 859 ; CHECK-LABEL: multi_vec_minnm_single_x2_f64:
 860 ; CHECK:       // %bb.0:
 861 ; CHECK-NEXT:    mov z5.d, z2.d
 862 ; CHECK-NEXT:    mov z4.d, z1.d
 863 ; CHECK-NEXT:    fminnm { z4.d, z5.d }, { z4.d, z5.d }, z3.d
 864 ; CHECK-NEXT:    mov z0.d, z4.d
 865 ; CHECK-NEXT:    mov z1.d, z5.d
 866 ; CHECK-NEXT:    ret
 867   %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fminnm.single.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm)
 868   ret { <vscale x 2 x double>, <vscale x 2 x double> } %res
 869 }
 870
 871 ; FMINNM (Single, x4)
 872
 873 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
 874 @multi_vec_minnm_single_x4_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm) {
 875 ; CHECK-LABEL: multi_vec_minnm_single_x4_f16:
 876 ; CHECK:       // %bb.0:
 877 ; CHECK-NEXT:    mov z27.d, z4.d
 878 ; CHECK-NEXT:    mov z26.d, z3.d
 879 ; CHECK-NEXT:    mov z25.d, z2.d
 880 ; CHECK-NEXT:    mov z24.d, z1.d
 881 ; CHECK-NEXT:    fminnm { z24.h - z27.h }, { z24.h - z27.h }, z5.h
 882 ; CHECK-NEXT:    mov z0.d, z24.d
 883 ; CHECK-NEXT:    mov z1.d, z25.d
 884 ; CHECK-NEXT:    mov z2.d, z26.d
 885 ; CHECK-NEXT:    mov z3.d, z27.d
 886 ; CHECK-NEXT:    ret
 887   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
 888               @llvm.aarch64.sve.fminnm.single.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm)
 889   ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res
 890 }
 891
 892 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
 893 @multi_vec_minnm_single_x4_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm) {
 894 ; CHECK-LABEL: multi_vec_minnm_single_x4_f32:
 895 ; CHECK:       // %bb.0:
 896 ; CHECK-NEXT:    mov z27.d, z4.d
 897 ; CHECK-NEXT:    mov z26.d, z3.d
 898 ; CHECK-NEXT:    mov z25.d, z2.d
 899 ; CHECK-NEXT:    mov z24.d, z1.d
 900 ; CHECK-NEXT:    fminnm { z24.s - z27.s }, { z24.s - z27.s }, z5.s
 901 ; CHECK-NEXT:    mov z0.d, z24.d
 902 ; CHECK-NEXT:    mov z1.d, z25.d
 903 ; CHECK-NEXT:    mov z2.d, z26.d
 904 ; CHECK-NEXT:    mov z3.d, z27.d
 905 ; CHECK-NEXT:    ret
 906   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
 907               @llvm.aarch64.sve.fminnm.single.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm)
 908   ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
 909 }
 910
 911 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
 912 @multi_vec_minnm_single_x4_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm) {
 913 ; CHECK-LABEL: multi_vec_minnm_single_x4_f64:
 914 ; CHECK:       // %bb.0:
 915 ; CHECK-NEXT:    mov z27.d, z4.d
 916 ; CHECK-NEXT:    mov z26.d, z3.d
 917 ; CHECK-NEXT:    mov z25.d, z2.d
 918 ; CHECK-NEXT:    mov z24.d, z1.d
 919 ; CHECK-NEXT:    fminnm { z24.d - z27.d }, { z24.d - z27.d }, z5.d
 920 ; CHECK-NEXT:    mov z0.d, z24.d
 921 ; CHECK-NEXT:    mov z1.d, z25.d
 922 ; CHECK-NEXT:    mov z2.d, z26.d
 923 ; CHECK-NEXT:    mov z3.d, z27.d
 924 ; CHECK-NEXT:    ret
 925   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
 926               @llvm.aarch64.sve.fminnm.single.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm)
 927   ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
 928 }
 929
 930 ; FMINNM (Multi, x2)
 931
 932 define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_minnm_x2_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2) {
 933 ; CHECK-LABEL: multi_vec_minnm_x2_f16:
 934 ; CHECK:       // %bb.0:
 935 ; CHECK-NEXT:    mov z7.d, z4.d
 936 ; CHECK-NEXT:    mov z5.d, z2.d
 937 ; CHECK-NEXT:    mov z6.d, z3.d
 938 ; CHECK-NEXT:    mov z4.d, z1.d
 939 ; CHECK-NEXT:    fminnm { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h }
 940 ; CHECK-NEXT:    mov z0.d, z4.d
 941 ; CHECK-NEXT:    mov z1.d, z5.d
 942 ; CHECK-NEXT:    ret
 943   %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fminnm.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2)
 944   ret { <vscale x 8 x half>, <vscale x 8 x half> } %res
 945 }
 946
 947 define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_minnm_x2_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2) {
 948 ; CHECK-LABEL: multi_vec_minnm_x2_f32:
 949 ; CHECK:       // %bb.0:
 950 ; CHECK-NEXT:    mov z7.d, z4.d
 951 ; CHECK-NEXT:    mov z5.d, z2.d
 952 ; CHECK-NEXT:    mov z6.d, z3.d
 953 ; CHECK-NEXT:    mov z4.d, z1.d
 954 ; CHECK-NEXT:    fminnm { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s }
 955 ; CHECK-NEXT:    mov z0.d, z4.d
 956 ; CHECK-NEXT:    mov z1.d, z5.d
 957 ; CHECK-NEXT:    ret
 958   %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fminnm.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2)
 959   ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
 960 }
 961
 962 define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_minnm_x2_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2) {
 963 ; CHECK-LABEL: multi_vec_minnm_x2_f64:
 964 ; CHECK:       // %bb.0:
 965 ; CHECK-NEXT:    mov z7.d, z4.d
 966 ; CHECK-NEXT:    mov z5.d, z2.d
 967 ; CHECK-NEXT:    mov z6.d, z3.d
 968 ; CHECK-NEXT:    mov z4.d, z1.d
 969 ; CHECK-NEXT:    fminnm { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d }
 970 ; CHECK-NEXT:    mov z0.d, z4.d
 971 ; CHECK-NEXT:    mov z1.d, z5.d
 972 ; CHECK-NEXT:    ret
 973   %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fminnm.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2)
 974   ret { <vscale x 2 x double>, <vscale x 2 x double> } %res
 975 }
 976
 977 ; FMINNM (Multi, x4)
 978
 979 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
 980 @multi_vec_minnm_x4_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
 981 ; CHECK-LABEL: multi_vec_minnm_x4_f16:
 982 ; CHECK:       // %bb.0:
 983 ; CHECK-NEXT:    ptrue p0.h
 984 ; CHECK-NEXT:    mov z30.d, z7.d
 985 ; CHECK-NEXT:    mov z27.d, z4.d
 986 ; CHECK-NEXT:    mov z29.d, z6.d
 987 ; CHECK-NEXT:    mov z26.d, z3.d
 988 ; CHECK-NEXT:    mov z28.d, z5.d
 989 ; CHECK-NEXT:    mov z25.d, z2.d
 990 ; CHECK-NEXT:    mov z24.d, z1.d
 991 ; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
 992 ; CHECK-NEXT:    fminnm { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
 993 ; CHECK-NEXT:    mov z0.d, z24.d
 994 ; CHECK-NEXT:    mov z1.d, z25.d
 995 ; CHECK-NEXT:    mov z2.d, z26.d
 996 ; CHECK-NEXT:    mov z3.d, z27.d
 997 ; CHECK-NEXT:    ret
 998   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
 999               @llvm.aarch64.sve.fminnm.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
1000                                                   <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4)
1001   ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res
1002 }
1003
1004 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
1005 @multi_vec_minnm_x4_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
1006 ; CHECK-LABEL: multi_vec_minnm_x4_f32:
1007 ; CHECK:       // %bb.0:
1008 ; CHECK-NEXT:    ptrue p0.s
1009 ; CHECK-NEXT:    mov z30.d, z7.d
1010 ; CHECK-NEXT:    mov z27.d, z4.d
1011 ; CHECK-NEXT:    mov z29.d, z6.d
1012 ; CHECK-NEXT:    mov z26.d, z3.d
1013 ; CHECK-NEXT:    mov z28.d, z5.d
1014 ; CHECK-NEXT:    mov z25.d, z2.d
1015 ; CHECK-NEXT:    mov z24.d, z1.d
1016 ; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
1017 ; CHECK-NEXT:    fminnm { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
1018 ; CHECK-NEXT:    mov z0.d, z24.d
1019 ; CHECK-NEXT:    mov z1.d, z25.d
1020 ; CHECK-NEXT:    mov z2.d, z26.d
1021 ; CHECK-NEXT:    mov z3.d, z27.d
1022 ; CHECK-NEXT:    ret
1023   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
1024               @llvm.aarch64.sve.fminnm.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
1025                                                   <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4)
1026   ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
1027 }
1028
1029 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
1030 @multi_vec_minnm_x4_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
1031 ; CHECK-LABEL: multi_vec_minnm_x4_f64:
1032 ; CHECK:       // %bb.0:
1033 ; CHECK-NEXT:    ptrue p0.d
1034 ; CHECK-NEXT:    mov z30.d, z7.d
1035 ; CHECK-NEXT:    mov z27.d, z4.d
1036 ; CHECK-NEXT:    mov z29.d, z6.d
1037 ; CHECK-NEXT:    mov z26.d, z3.d
1038 ; CHECK-NEXT:    mov z28.d, z5.d
1039 ; CHECK-NEXT:    mov z25.d, z2.d
1040 ; CHECK-NEXT:    mov z24.d, z1.d
1041 ; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
1042 ; CHECK-NEXT:    fminnm { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
1043 ; CHECK-NEXT:    mov z0.d, z24.d
1044 ; CHECK-NEXT:    mov z1.d, z25.d
1045 ; CHECK-NEXT:    mov z2.d, z26.d
1046 ; CHECK-NEXT:    mov z3.d, z27.d
1047 ; CHECK-NEXT:    ret
1048   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
1049               @llvm.aarch64.sve.fminnm.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
1050                                                   <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4)
1051   ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
1052 }
1053
1054 declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.smin.single.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1055 declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.smin.single.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1056 declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.smin.single.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1057 declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.smin.single.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1058
1059 declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.umin.single.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1060 declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.umin.single.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1061 declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.umin.single.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1062 declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.umin.single.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1063
1064 declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmin.single.x2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1065 declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmin.single.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1066 declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmin.single.x2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1067
1068 declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.smin.single.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1069 declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.smin.single.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1070 declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.smin.single.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1071 declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.smin.single.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1072
1073 declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.umin.single.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1074 declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.umin.single.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1075 declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.umin.single.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1076 declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.umin.single.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1077
1078 declare { <vscale x 8 x half>, <vscale x 8 x half> , <vscale x 8 x half>, <vscale x 8 x half> }
1079  @llvm.aarch64.sve.fmin.single.x4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1080 declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
1081  @llvm.aarch64.sve.fmin.single.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1082 declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
1083  @llvm.aarch64.sve.fmin.single.x4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1084
1085 declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.smin.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1086 declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.smin.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> )
1087 declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.smin.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1088 declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.smin.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1089
1090 declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.umin.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1091 declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.umin.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> )
1092 declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.umin.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1093 declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.umin.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1094
1095 declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmin.x2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1096 declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmin.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1097 declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmin.x2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1098
1099 declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
1100  @llvm.aarch64.sve.smin.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1101 declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
1102  @llvm.aarch64.sve.smin.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1103 declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
1104  @llvm.aarch64.sve.smin.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1105 declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
1106  @llvm.aarch64.sve.smin.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1107
1108 declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
1109  @llvm.aarch64.sve.umin.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1110 declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
1111  @llvm.aarch64.sve.umin.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1112 declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
1113  @llvm.aarch64.sve.umin.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1114 declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
1115  @llvm.aarch64.sve.umin.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1116
1117 declare { <vscale x 8 x half>, <vscale x 8 x half> , <vscale x 8 x half>, <vscale x 8 x half> }
1118  @llvm.aarch64.sve.fmin.x4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1119 declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
1120  @llvm.aarch64.sve.fmin.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1121 declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
1122  @llvm.aarch64.sve.fmin.x4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1123
1124 declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fminnm.single.x2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1125 declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fminnm.single.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1126 declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fminnm.single.x2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1127
1128 declare { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
1129  @llvm.aarch64.sve.fminnm.single.x4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1130 declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
1131  @llvm.aarch64.sve.fminnm.single.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1132 declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
1133  @llvm.aarch64.sve.fminnm.single.x4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1134
1135 declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fminnm.x2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1136 declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fminnm.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1137 declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fminnm.x2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1138
1139 declare { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
1140  @llvm.aarch64.sve.fminnm.x4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1141 declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
1142  @llvm.aarch64.sve.fminnm.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1143 declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
1144  @llvm.aarch64.sve.fminnm.x4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)