clang/test/CodeGen/X86/avx10_2bf16-builtins.c

   1 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64 -target-feature +avx10.2-256 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s
   2 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386 -target-feature +avx10.2-256 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s
   3
   4 #include <immintrin.h>
   5
   6 __m256bh test_mm256_setzero_pbh() {
   7   // CHECK-LABEL: @test_mm256_setzero_pbh
   8   // CHECK: zeroinitializer
   9   return _mm256_setzero_pbh();
  10 }
  11
  12 __m128bh test_mm_setzero_pbh() {
  13   // CHECK-LABEL: @test_mm_setzero_pbh
  14   // CHECK: zeroinitializer
  15   return _mm_setzero_pbh();
  16 }
  17
  18 __m256bh test_mm256_undefined_pbh(void) {
  19   // CHECK-LABEL: @test_mm256_undefined_pbh
  20   // CHECK: ret <16 x bfloat> zeroinitializer
  21   return _mm256_undefined_pbh();
  22 }
  23
  24 __m128bh test_mm_undefined_pbh(void) {
  25   // CHECK-LABEL: @test_mm_undefined_pbh
  26   // CHECK: ret <8 x bfloat> zeroinitializer
  27   return _mm_undefined_pbh();
  28 }
  29
  30 __bf16 test_mm_cvtsbh_bf16(__m128bh __A) {
  31   // CHECK-LABEL: @test_mm_cvtsbh_bf16
  32   // CHECK: extractelement <8 x bfloat> %{{.*}}, i32 0
  33   return _mm_cvtsbh_bf16(__A);
  34 }
  35
  36 __bf16 test_mm256_cvtsbh_bf16(__m256bh __A) {
  37   // CHECK-LABEL: @test_mm256_cvtsbh_bf16
  38   // CHECK: extractelement <16 x bfloat> %{{.*}}, i32 0
  39   return _mm256_cvtsbh_bf16(__A);
  40 }
  41
  42 __m128bh test_mm_set_sbh(__bf16 h) {
  43   // CHECK-LABEL: @test_mm_set_sbh
  44   // CHECK: insertelement <8 x bfloat> {{.*}}, i32 0
  45   // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 1
  46   // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 2
  47   // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 3
  48   // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 4
  49   // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 5
  50   // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 6
  51   // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 7
  52   return _mm_set_sbh(h);
  53 }
  54
  55 __m128bh test_mm_set1_pbh(__bf16 h) {
  56   // CHECK-LABEL: @test_mm_set1_pbh
  57   // CHECK: insertelement <8 x bfloat> {{.*}}, i32 0
  58   // CHECK: insertelement <8 x bfloat> {{.*}}, i32 1
  59   // CHECK: insertelement <8 x bfloat> {{.*}}, i32 2
  60   // CHECK: insertelement <8 x bfloat> {{.*}}, i32 3
  61   // CHECK: insertelement <8 x bfloat> {{.*}}, i32 4
  62   // CHECK: insertelement <8 x bfloat> {{.*}}, i32 5
  63   // CHECK: insertelement <8 x bfloat> {{.*}}, i32 6
  64   // CHECK: insertelement <8 x bfloat> {{.*}}, i32 7
  65   return _mm_set1_pbh(h);
  66 }
  67
  68 __m256bh test_mm256_set1_pbh(__bf16 h) {
  69   // CHECK-LABEL: @test_mm256_set1_pbh
  70   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 0
  71   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 1
  72   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 2
  73   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 3
  74   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 4
  75   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 5
  76   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 6
  77   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 7
  78   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 8
  79   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 9
  80   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 10
  81   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 11
  82   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 12
  83   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 13
  84   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 14
  85   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 15
  86   return _mm256_set1_pbh(h);
  87 }
  88
  89 __m128bh test_mm_set_pbh(__bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4,
  90                        __bf16 bf5, __bf16 bf6, __bf16 bf7, __bf16 bf8) {
  91   // CHECK-LABEL: @test_mm_set_pbh
  92   // CHECK: insertelement <8 x bfloat> {{.*}}, i32 0
  93   // CHECK: insertelement <8 x bfloat> {{.*}}, i32 1
  94   // CHECK: insertelement <8 x bfloat> {{.*}}, i32 2
  95   // CHECK: insertelement <8 x bfloat> {{.*}}, i32 3
  96   // CHECK: insertelement <8 x bfloat> {{.*}}, i32 4
  97   // CHECK: insertelement <8 x bfloat> {{.*}}, i32 5
  98   // CHECK: insertelement <8 x bfloat> {{.*}}, i32 6
  99   // CHECK: insertelement <8 x bfloat> {{.*}}, i32 7
 100   return _mm_set_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8);
 101 }
 102
 103 __m256bh test_mm256_set_pbh(__bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4,
 104                           __bf16 bf5, __bf16 bf6, __bf16 bf7, __bf16 bf8,
 105                           __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12,
 106                           __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16) {
 107   // CHECK-LABEL: @test_mm256_set_pbh
 108   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 0
 109   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 1
 110   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 2
 111   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 3
 112   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 4
 113   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 5
 114   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 6
 115   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 7
 116   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 8
 117   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 9
 118   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 10
 119   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 11
 120   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 12
 121   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 13
 122   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 14
 123   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 15
 124   return _mm256_set_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
 125                        bf9, bf10, bf11, bf12, bf13, bf14, bf15, bf16);
 126 }
 127
 128 __m128bh test_mm_setr_pbh(__bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4,
 129                         __bf16 bf5, __bf16 bf6, __bf16 bf7, __bf16 bf8) {
 130   // CHECK-LABEL: @test_mm_setr_pbh
 131   // CHECK: insertelement <8 x bfloat> {{.*}}, i32 0
 132   // CHECK: insertelement <8 x bfloat> {{.*}}, i32 1
 133   // CHECK: insertelement <8 x bfloat> {{.*}}, i32 2
 134   // CHECK: insertelement <8 x bfloat> {{.*}}, i32 3
 135   // CHECK: insertelement <8 x bfloat> {{.*}}, i32 4
 136   // CHECK: insertelement <8 x bfloat> {{.*}}, i32 5
 137   // CHECK: insertelement <8 x bfloat> {{.*}}, i32 6
 138   // CHECK: insertelement <8 x bfloat> {{.*}}, i32 7
 139   return _mm_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8);
 140 }
 141
 142 __m256bh test_mm256_setr_pbh(__bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4,
 143                            __bf16 bf5, __bf16 bf6, __bf16 bf7, __bf16 bf8,
 144                            __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12,
 145                            __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16) {
 146   // CHECK-LABEL: @test_mm256_setr_pbh
 147   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 0
 148   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 1
 149   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 2
 150   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 3
 151   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 4
 152   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 5
 153   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 6
 154   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 7
 155   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 8
 156   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 9
 157   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 10
 158   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 11
 159   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 12
 160   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 13
 161   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 14
 162   // CHECK: insertelement <16 x bfloat> {{.*}}, i32 15
 163   return _mm256_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
 164                         bf9, bf10, bf11, bf12, bf13, bf14, bf15, bf16);
 165 }
 166
 167 __m128 test_mm_castpbf16_ps(__m128bh A) {
 168   // CHECK-LABEL: test_mm_castpbf16_ps
 169   // CHECK: bitcast <8 x bfloat> %{{.*}} to <4 x float>
 170   return _mm_castpbf16_ps(A);
 171 }
 172
 173 __m256 test_mm256_castpbf16_ps(__m256bh A) {
 174   // CHECK-LABEL: test_mm256_castpbf16_ps
 175   // CHECK: bitcast <16 x bfloat> %{{.*}} to <8 x float>
 176   return _mm256_castpbf16_ps(A);
 177 }
 178
 179 __m128i test_mm_castpbf16_si128(__m128bh A) {
 180   // CHECK-LABEL: test_mm_castpbf16_si128
 181   // CHECK: bitcast <8 x bfloat> %{{.*}} to <2 x i64>
 182   return _mm_castpbf16_si128(A);
 183 }
 184
 185 __m256i test_mm256_castpbf16_si256(__m256bh A) {
 186   // CHECK-LABEL: test_mm256_castpbf16_si256
 187   // CHECK: bitcast <16 x bfloat> %{{.*}} to <4 x i64>
 188   return _mm256_castpbf16_si256(A);
 189 }
 190
 191 __m128bh test_mm_castps_pbh(__m128 A) {
 192   // CHECK-LABEL: test_mm_castps_pbh
 193   // CHECK: bitcast <4 x float> %{{.*}} to <8 x bfloat>
 194   return _mm_castps_pbh(A);
 195 }
 196
 197 __m256bh test_mm256_castps_pbh(__m256 A) {
 198   // CHECK-LABEL: test_mm256_castps_pbh
 199   // CHECK: bitcast <8 x float> %{{.*}} to <16 x bfloat>
 200   return _mm256_castps_pbh(A);
 201 }
 202
 203 __m128bh test_mm_castpd_pbh(__m128d A) {
 204   // CHECK-LABEL: test_mm_castpd_pbh
 205   // CHECK: bitcast <2 x double> %{{.*}} to <8 x bfloat>
 206   return _mm_castpd_pbh(A);
 207 }
 208
 209 __m256bh test_mm256_castpd_pbh(__m256d A) {
 210   // CHECK-LABEL: test_mm256_castpd_pbh
 211   // CHECK: bitcast <4 x double> %{{.*}} to <16 x bfloat>
 212   return _mm256_castpd_pbh(A);
 213 }
 214
 215 __m128bh test_mm_castsi128_pbh(__m128i A) {
 216   // CHECK-LABEL: test_mm_castsi128_pbh
 217   // CHECK: bitcast <2 x i64> %{{.*}} to <8 x bfloat>
 218   return _mm_castsi128_pbh(A);
 219 }
 220
 221 __m256bh test_mm256_castsi256_pbh(__m256i A) {
 222   // CHECK-LABEL: test_mm256_castsi256_pbh
 223   // CHECK: bitcast <4 x i64> %{{.*}} to <16 x bfloat>
 224   return _mm256_castsi256_pbh(A);
 225 }
 226
 227 __m128d test_mm_castpbf16_pd(__m128bh A) {
 228   // CHECK-LABEL: test_mm_castpbf16_pd
 229   // CHECK: bitcast <8 x bfloat> %{{.*}} to <2 x double>
 230   return _mm_castpbf16_pd(A);
 231 }
 232
 233 __m128bh test_mm256_castpbf16256_pbh128(__m256bh __a) {
 234   // CHECK-LABEL: test_mm256_castpbf16256_pbh128
 235   // CHECK: shufflevector <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 236   return _mm256_castpbf16256_pbh128(__a);
 237 }
 238
 239 __m256bh test_mm256_castpbf16128_pbh256(__m128bh __a) {
 240   // CHECK-LABEL: test_mm256_castpbf16128_pbh256
 241   // CHECK: shufflevector <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 242   return _mm256_castpbf16128_pbh256(__a);
 243 }
 244
 245 __m256d test_mm256_castpbf16_pd(__m256bh A) {
 246   // CHECK-LABEL: test_mm256_castpbf16_pd
 247   // CHECK: bitcast <16 x bfloat> %{{.*}} to <4 x double>
 248   return _mm256_castpbf16_pd(A);
 249 }
 250
 251 __m256bh test_mm256_zextpbf16128_pbh256(__m128bh __a) {
 252   // CHECK-LABEL: test_mm256_zextpbf16128_pbh256
 253   // CHECK: shufflevector <8 x bfloat> %{{.*}}, <8 x bfloat> {{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 254   return _mm256_zextpbf16128_pbh256(__a);
 255 }
 256
 257 __m128bh test_mm_abs_pbh(__m128bh a) {
 258   // CHECK-LABEL: @test_mm_abs_pbh
 259   // CHECK: and <4 x i32>
 260   return _mm_abs_pbh(a);
 261 }
 262
 263 __m256bh test_mm256_abs_pbh(__m256bh a) {
 264   // CHECK-LABEL: @test_mm256_abs_pbh
 265   // CHECK: and <8 x i32>
 266   return _mm256_abs_pbh(a);
 267 }
 268
 269 __m256bh test_mm256_loadu_pbh(void *p) {
 270   // CHECK-LABEL: @test_mm256_loadu_pbh
 271   // CHECK: load <16 x bfloat>, ptr {{.*}}, align 1{{$}}
 272   return _mm256_loadu_pbh(p);
 273 }
 274
 275 __m128bh test_mm_load_sbh(void const *A) {
 276   // CHECK-LABEL: test_mm_load_sbh
 277   // CHECK: %{{.*}} = call <8 x bfloat> @llvm.masked.load.v8bf16.p0(ptr %{{.*}}, i32 1, <8 x i1> bitcast (<1 x i8> splat (i8 1) to <8 x i1>), <8 x bfloat> %{{.*}})
 278   return _mm_load_sbh(A);
 279 }
 280
 281 __m256bh test_mm256_load_pbh(void *p) {
 282   // CHECK-LABEL: @test_mm256_load_pbh
 283   // CHECK: load <16 x bfloat>, ptr %{{.*}}, align 32
 284   return _mm256_load_pbh(p);
 285 }
 286
 287 __m128bh test_mm_load_pbh(void *p) {
 288   // CHECK-LABEL: @test_mm_load_pbh
 289   // CHECK: load <8 x bfloat>, ptr %{{.*}}, align 16
 290   return _mm_load_pbh(p);
 291 }
 292
 293 __m128bh test_mm_loadu_pbh(void *p) {
 294   // CHECK-LABEL: @test_mm_loadu_pbh
 295   // CHECK: load <8 x bfloat>, ptr {{.*}}, align 1{{$}}
 296   return _mm_loadu_pbh(p);
 297 }
 298
 299 void test_mm_store_sbh(void *A, __m128bh B) {
 300   // CHECK-LABEL: test_mm_store_sbh
 301   // CHECK: extractelement <8 x bfloat> %{{.*}}, i32 0
 302   // CHECK: store bfloat %{{.*}}, ptr %{{.*}}, align 1{{$}}
 303   _mm_store_sbh(A, B);
 304 }
 305
 306 void test_mm_mask_store_sbh(void *__P, __mmask8 __U, __m128bh __A) {
 307   // CHECK-LABEL: @test_mm_mask_store_sbh
 308   // CHECK: call void @llvm.masked.store.v8bf16.p0(<8 x bfloat> %{{.*}}, ptr %{{.*}}, i32 1, <8 x i1> %{{.*}})
 309   _mm_mask_store_sbh(__P, __U, __A);
 310 }
 311
 312 void test_mm256_store_pbh(void *p, __m256bh a) {
 313   // CHECK-LABEL: @test_mm256_store_pbh
 314   // CHECK: store <16 x bfloat> %{{.*}}, ptr %{{.*}}, align 32
 315   _mm256_store_pbh(p, a);
 316 }
 317
 318 void test_mm_store_pbh(void *p, __m128bh a) {
 319   // CHECK-LABEL: @test_mm_store_pbh
 320   // CHECK: store <8 x bfloat> %{{.*}}, ptr %{{.*}}, align 16
 321   _mm_store_pbh(p, a);
 322 }
 323
 324 __m128bh test_mm_mask_load_sbh(__m128bh __A, __mmask8 __U, const void *__W) {
 325   // CHECK-LABEL: @test_mm_mask_load_sbh
 326   // CHECK: %{{.*}} = call <8 x bfloat> @llvm.masked.load.v8bf16.p0(ptr %{{.*}}, i32 1, <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}})
 327   return _mm_mask_load_sbh(__A, __U, __W);
 328 }
 329
 330 __m128bh test_mm_maskz_load_sbh(__mmask8 __U, const void *__W) {
 331   // CHECK-LABEL: @test_mm_maskz_load_sbh
 332   // CHECK: %{{.*}} = call <8 x bfloat> @llvm.masked.load.v8bf16.p0(ptr %{{.*}}, i32 1, <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}})
 333   return _mm_maskz_load_sbh(__U, __W);
 334 }
 335
 336 void test_mm256_storeu_pbh(void *p, __m256bh a) {
 337   // CHECK-LABEL: @test_mm256_storeu_pbh
 338   // CHECK: store <16 x bfloat> %{{.*}}, ptr %{{.*}}, align 1{{$}}
 339   // CHECK-NEXT: ret void
 340   _mm256_storeu_pbh(p, a);
 341 }
 342
 343 void test_mm_storeu_pbh(void *p, __m128bh a) {
 344   // CHECK-LABEL: @test_mm_storeu_pbh
 345   // CHECK: store <8 x bfloat> %{{.*}}, ptr %{{.*}}, align 1{{$}}
 346   // CHECK-NEXT: ret void
 347   _mm_storeu_pbh(p, a);
 348 }
 349
 350 __m128bh test_mm_move_sbh(__m128bh A, __m128bh B) {
 351   // CHECK-LABEL: test_mm_move_sbh
 352   // CHECK: extractelement <8 x bfloat> %{{.*}}, i32 0
 353   // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 0
 354   return _mm_move_sbh(A, B);
 355 }
 356
 357 __m128bh test_mm_mask_move_sbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
 358   // CHECK-LABEL: @test_mm_mask_move_sbh
 359   // CHECK: [[EXT:%.*]] = extractelement <8 x bfloat> %{{.*}}, i32 0
 360   // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat [[EXT]], i32 0
 361   // CHECK: [[A:%.*]] = extractelement <8 x bfloat> [[VEC:%.*]], i64 0
 362   // CHECK-NEXT: [[B:%.*]] = extractelement <8 x bfloat> %{{.*}}, i64 0
 363   // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
 364   // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
 365   // CHECK-NEXT: [[SEL:%.*]] = select i1 %{{.*}}, bfloat [[A]], bfloat [[B]]
 366   // CHECK-NEXT: insertelement <8 x bfloat> [[VEC]], bfloat [[SEL]], i64 0
 367   return _mm_mask_move_sbh(__W, __U, __A, __B);
 368 }
 369
 370 __m128bh test_mm_maskz_move_sbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
 371   // CHECK-LABEL: @test_mm_maskz_move_sbh
 372   // CHECK: [[EXT:%.*]] = extractelement <8 x bfloat> %{{.*}}, i32 0
 373   // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat [[EXT]], i32 0
 374   // CHECK: [[A:%.*]] = extractelement <8 x bfloat> [[VEC:%.*]], i64 0
 375   // CHECK-NEXT: [[B:%.*]] = extractelement <8 x bfloat> %{{.*}}, i64 0
 376   // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
 377   // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
 378   // CHECK-NEXT: [[SEL:%.*]] = select i1 %{{.*}}, bfloat [[A]], bfloat [[B]]
 379   // CHECK-NEXT: insertelement <8 x bfloat> [[VEC]], bfloat [[SEL]], i64 0
 380   return _mm_maskz_move_sbh(__U, __A, __B);
 381 }
 382
 383 __m128bh test_mm_mask_blend_pbh(__mmask8 __U, __m128bh __A, __m128bh __W) {
 384   // CHECK-LABEL: @test_mm_mask_blend_pbh
 385   // CHECK:  %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
 386   // CHECK:  %{{.*}} = select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
 387   return _mm_mask_blend_pbh(__U, __A, __W);
 388 }
 389
 390 __m256bh test_mm256_mask_blend_pbh(__mmask16 __U, __m256bh __A, __m256bh __W) {
 391   // CHECK-LABEL: @test_mm256_mask_blend_pbh
 392   // CHECK:  %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
 393   // CHECK:  %{{.*}} = select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
 394   return _mm256_mask_blend_pbh(__U, __A, __W);
 395 }
 396
 397 __m128bh test_mm_permutex2var_pbh(__m128bh __A, __m128i __I, __m128bh __B) {
 398   // CHECK-LABEL: @test_mm_permutex2var_pbh
 399   // CHECK:  %{{.*}} = bitcast <8 x bfloat> %{{.*}} to <8 x i16>
 400   // CHECK:  %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
 401   // CHECK:  %{{.*}} = bitcast <8 x bfloat> %{{.*}} to <8 x i16>
 402   // CHECK:  %{{.*}} = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
 403   // CHECK:  %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x bfloat>
 404   return _mm_permutex2var_pbh(__A, __I, __B);
 405 }
 406
 407 __m256bh test_mm256_permutex2var_pbh(__m256bh __A, __m256i __I, __m256bh __B) {
 408   // CHECK-LABEL: @test_mm256_permutex2var_pbh
 409   // CHECK:  %{{.*}} = bitcast <16 x bfloat> %{{.*}} to <16 x i16>
 410   // CHECK:  %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16>
 411   // CHECK:  %{{.*}} = bitcast <16 x bfloat> %{{.*}} to <16 x i16>
 412   // CHECK:  %{{.*}} = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
 413   // CHECK:  %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x bfloat>
 414   return _mm256_permutex2var_pbh(__A, __I, __B);
 415 }
 416
 417 __m128bh test_mm_permutexvar_pbh(__m128i __A, __m128bh __B) {
 418   // CHECK-LABEL: @test_mm_permutexvar_pbh
 419   // CHECK:  %{{.*}} = bitcast <8 x bfloat> %{{.*}} to <8 x i16>
 420   // CHECK:  %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
 421   // CHECK:  %{{.*}} = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
 422   // CHECK:  %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x bfloat>
 423   return _mm_permutexvar_pbh(__A, __B);
 424 }
 425
 426 __m256bh test_mm256_permutexvar_pbh(__m256i __A, __m256bh __B) {
 427   // CHECK-LABEL: @test_mm256_permutexvar_pbh
 428   // CHECK:  %{{.*}} = bitcast <16 x bfloat> %{{.*}} to <16 x i16>
 429   // CHECK:  %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16>
 430   // CHECK:  %{{.*}} = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
 431   // CHECK:  %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x bfloat>
 432   return _mm256_permutexvar_pbh(__A, __B);
 433 }
 434
 435 __m256bh test_mm256_addne_pbh(__m256bh __A, __m256bh __B) {
 436   // CHECK-LABEL: @test_mm256_addne_pbh
 437   // CHECK: %{{.*}} = fadd <16 x bfloat> %{{.*}}, %{{.*}}
 438   return _mm256_addne_pbh(__A, __B);
 439 }
 440
 441 __m256bh test_mm256_mask_addne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
 442   // CHECK: %{{.*}} = fadd <16 x bfloat> %{{.*}}, %{{.*}}
 443   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
 444   return (__m256bh)_mm256_mask_addne_pbh(__W, __U, __A, __B);
 445 }
 446
 447 __m256bh test_mm256_maskz_addne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
 448   // CHECK: %{{.*}} = fadd <16 x bfloat> %{{.*}}, %{{.*}}
 449   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
 450   return _mm256_maskz_addne_pbh(__U, __A, __B);
 451 }
 452
 453 __m128bh test_mm_addne_pbh(__m128bh __A, __m128bh __B) {
 454   // CHECK-LABEL: @test_mm_addne_pbh
 455   // CHECK: %{{.*}} = fadd <8 x bfloat> %{{.*}}, %{{.*}}
 456   return _mm_addne_pbh(__A, __B);
 457 }
 458
 459 __m128bh test_mm_mask_addne_pbh(__m128bh __W, __mmask16 __U, __m128bh __A, __m128bh __B) {
 460   // CHECK: %{{.*}} = fadd <8 x bfloat> %{{.*}}, %{{.*}}
 461   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
 462   return (__m128bh)_mm_mask_addne_pbh(__W, __U, __A, __B);
 463 }
 464
 465 __m128bh test_mm_maskz_addne_pbh(__mmask16 __U, __m128bh __A, __m128bh __B) {
 466   // CHECK: %{{.*}} = fadd <8 x bfloat> %{{.*}}, %{{.*}}
 467   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
 468   return _mm_maskz_addne_pbh(__U, __A, __B);
 469 }
 470
 471 __m256bh test_mm256_subne_pbh(__m256bh __A, __m256bh __B) {
 472   // CHECK-LABEL: @test_mm256_subne_pbh
 473   // CHECK: %{{.*}} = fsub <16 x bfloat> %{{.*}}, %{{.*}}
 474   return _mm256_subne_pbh(__A, __B);
 475 }
 476
 477 __m256bh test_mm256_mask_subne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
 478   // CHECK: %{{.*}} = fsub <16 x bfloat> %{{.*}}, %{{.*}}
 479   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
 480   return (__m256bh)_mm256_mask_subne_pbh(__W, __U, __A, __B);
 481 }
 482
 483 __m256bh test_mm256_maskz_subne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
 484   // CHECK: %{{.*}} = fsub <16 x bfloat> %{{.*}}, %{{.*}}
 485   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
 486   return _mm256_maskz_subne_pbh(__U, __A, __B);
 487 }
 488
 489 __m128bh test_mm_subne_pbh(__m128bh __A, __m128bh __B) {
 490   // CHECK-LABEL: @test_mm_subne_pbh
 491   // CHECK: %{{.*}} = fsub <8 x bfloat> %{{.*}}, %{{.*}}
 492   return _mm_subne_pbh(__A, __B);
 493 }
 494
 495 __m128bh test_mm_mask_subne_pbh(__m128bh __W, __mmask16 __U, __m128bh __A, __m128bh __B) {
 496   // CHECK: %{{.*}} = fsub <8 x bfloat> %{{.*}}, %{{.*}}
 497   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
 498   return (__m128bh)_mm_mask_subne_pbh(__W, __U, __A, __B);
 499 }
 500
 501 __m128bh test_mm_maskz_subne_pbh(__mmask16 __U, __m128bh __A, __m128bh __B) {
 502   // CHECK: %{{.*}} = fsub <8 x bfloat> %{{.*}}, %{{.*}}
 503   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
 504   return _mm_maskz_subne_pbh(__U, __A, __B);
 505 }
 506
 507 __m256bh test_mm256_mulne_pbh(__m256bh __A, __m256bh __B) {
 508   // CHECK-LABEL: @test_mm256_mulne_pbh
 509   // CHECK: %{{.*}} = fmul <16 x bfloat> %{{.*}}, %{{.*}}
 510   return _mm256_mulne_pbh(__A, __B);
 511 }
 512
 513 __m256bh test_mm256_mask_mulne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
 514   // CHECK: %{{.*}} = fmul <16 x bfloat> %{{.*}}, %{{.*}}
 515   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
 516   return (__m256bh)_mm256_mask_mulne_pbh(__W, __U, __A, __B);
 517 }
 518
 519 __m256bh test_mm256_maskz_mulne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
 520   // CHECK: %{{.*}} = fmul <16 x bfloat> %{{.*}}, %{{.*}}
 521   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
 522   return _mm256_maskz_mulne_pbh(__U, __A, __B);
 523 }
 524
 525 __m128bh test_mm_mulne_pbh(__m128bh __A, __m128bh __B) {
 526   // CHECK-LABEL: @test_mm_mulne_pbh
 527   // CHECK: %{{.*}} = fmul <8 x bfloat> %{{.*}}, %{{.*}}
 528   return _mm_mulne_pbh(__A, __B);
 529 }
 530
 531 __m128bh test_mm_mask_mulne_pbh(__m128bh __W, __mmask16 __U, __m128bh __A, __m128bh __B) {
 532   // CHECK: %{{.*}} = fmul <8 x bfloat> %{{.*}}, %{{.*}}
 533   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
 534   return (__m128bh)_mm_mask_mulne_pbh(__W, __U, __A, __B);
 535 }
 536
 537 __m128bh test_mm_maskz_mulne_pbh(__mmask16 __U, __m128bh __A, __m128bh __B) {
 538   // CHECK: %{{.*}} = fmul <8 x bfloat> %{{.*}}, %{{.*}}
 539   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
 540   return _mm_maskz_mulne_pbh(__U, __A, __B);
 541 }
 542
 543 __m256bh test_mm256_divne_pbh(__m256bh __A, __m256bh __B) {
 544   // CHECK-LABEL: @test_mm256_divne_pbh
 545   // CHECK: %{{.*}} = fdiv <16 x bfloat> %{{.*}}, %{{.*}}
 546   return _mm256_divne_pbh(__A, __B);
 547 }
 548
 549 __m256bh test_mm256_mask_divne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
 550   // CHECK: %{{.*}} = fdiv <16 x bfloat> %{{.*}}, %{{.*}}
 551   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
 552   return (__m256bh)_mm256_mask_divne_pbh(__W, __U, __A, __B);
 553 }
 554
 555 __m256bh test_mm256_maskz_divne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
 556   // CHECK: %{{.*}} = fdiv <16 x bfloat> %{{.*}}, %{{.*}}
 557   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
 558   return _mm256_maskz_divne_pbh(__U, __A, __B);
 559 }
 560
 561 __m128bh test_mm_divne_pbh(__m128bh __A, __m128bh __B) {
 562   // CHECK-LABEL: @test_mm_divne_pbh
 563   // CHECK: %{{.*}} = fdiv <8 x bfloat> %{{.*}}, %{{.*}}
 564   return _mm_divne_pbh(__A, __B);
 565 }
 566
 567 __m128bh test_mm_mask_divne_pbh(__m128bh __W, __mmask16 __U, __m128bh __A, __m128bh __B) {
 568   // CHECK: %{{.*}} = fdiv <8 x bfloat> %{{.*}}, %{{.*}}
 569   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
 570   return (__m128bh)_mm_mask_divne_pbh(__W, __U, __A, __B);
 571 }
 572
 573 __m128bh test_mm_maskz_divne_pbh(__mmask16 __U, __m128bh __A, __m128bh __B) {
 574   // CHECK: %{{.*}} = fdiv <8 x bfloat> %{{.*}}, %{{.*}}
 575   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
 576   return _mm_maskz_divne_pbh(__U, __A, __B);
 577 }
 578
 579 __m256bh test_mm256_max_pbh(__m256bh __A, __m256bh __B) {
 580   // CHECK-LABEL: @test_mm256_max_pbh
 581   // CHECK: @llvm.x86.avx10.vmaxpbf16256(
 582   return _mm256_max_pbh(__A, __B);
 583 }
 584
 585 __m256bh test_mm256_mask_max_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
 586   // CHECK: @llvm.x86.avx10.vmaxpbf16256
 587   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
 588   return (__m256bh)_mm256_mask_max_pbh(__W, __U, __A, __B);
 589 }
 590
 591 __m256bh test_mm256_maskz_max_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
 592   // CHECK: @llvm.x86.avx10.vmaxpbf16256
 593   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
 594   return _mm256_maskz_max_pbh(__U, __A, __B);
 595 }
 596
 597 __m128bh test_mm_max_pbh(__m128bh __A, __m128bh __B) {
 598   // CHECK-LABEL: @test_mm_max_pbh
 599   // CHECK: @llvm.x86.avx10.vmaxpbf16128(
 600   return _mm_max_pbh(__A, __B);
 601 }
 602
 603 __m128bh test_mm_mask_max_pbh(__m128bh __W, __mmask16 __U, __m128bh __A, __m128bh __B) {
 604   // CHECK: @llvm.x86.avx10.vmaxpbf16128
 605   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
 606   return (__m128bh)_mm_mask_max_pbh(__W, __U, __A, __B);
 607 }
 608
 609 __m128bh test_mm_maskz_max_pbh(__mmask16 __U, __m128bh __A, __m128bh __B) {
 610   // CHECK: @llvm.x86.avx10.vmaxpbf16128
 611   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
 612   return _mm_maskz_max_pbh(__U, __A, __B);
 613 }
 614
 615 __m256bh test_mm256_min_pbh(__m256bh __A, __m256bh __B) {
 616   // CHECK-LABEL: @test_mm256_min_pbh
 617   // CHECK: @llvm.x86.avx10.vminpbf16256(
 618   return _mm256_min_pbh(__A, __B);
 619 }
 620
 621 __m256bh test_mm256_mask_min_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
 622   // CHECK: @llvm.x86.avx10.vminpbf16256
 623   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
 624   return (__m256bh)_mm256_mask_min_pbh(__W, __U, __A, __B);
 625 }
 626
 627 __m256bh test_mm256_maskz_min_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
 628   // CHECK: @llvm.x86.avx10.vminpbf16256
 629   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
 630   return _mm256_maskz_min_pbh(__U, __A, __B);
 631 }
 632
 633 __m128bh test_mm_min_pbh(__m128bh __A, __m128bh __B) {
 634   // CHECK-LABEL: @test_mm_min_pbh
 635   // CHECK: @llvm.x86.avx10.vminpbf16128(
 636   return _mm_min_pbh(__A, __B);
 637 }
 638
 639 __m128bh test_mm_mask_min_pbh(__m128bh __W, __mmask16 __U, __m128bh __A, __m128bh __B) {
 640   // CHECK: @llvm.x86.avx10.vminpbf16128
 641   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
 642   return (__m128bh)_mm_mask_min_pbh(__W, __U, __A, __B);
 643 }
 644
 645 __m128bh test_mm_maskz_min_pbh(__mmask16 __U, __m128bh __A, __m128bh __B) {
 646   // CHECK: @llvm.x86.avx10.vminpbf16128
 647   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
 648   return _mm_maskz_min_pbh(__U, __A, __B);
 649 }
 650
 651 int test_mm_comeqsbh(__m128bh __A, __m128bh __B) {
 652   // CHECK-LABEL: test_mm_comeqsbh
 653   // CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16eq(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
 654   return _mm_comeqsbh(__A, __B);
 655 }
 656
 657 int test_mm_comltsbh(__m128bh __A, __m128bh __B) {
 658   // CHECK-LABEL: test_mm_comltsbh
 659   // CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16lt(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
 660   return _mm_comltsbh(__A, __B);
 661 }
 662
 663 int test_mm_comlesbh(__m128bh __A, __m128bh __B) {
 664   // CHECK-LABEL: test_mm_comlesbh
 665   // CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16le(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
 666   return _mm_comlesbh(__A, __B);
 667 }
 668
 669 int test_mm_comgtsbh(__m128bh __A, __m128bh __B) {
 670   // CHECK-LABEL: test_mm_comgtsbh
 671   // CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16gt(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
 672   return _mm_comgtsbh(__A, __B);
 673 }
 674
 675 int test_mm_comgesbh(__m128bh __A, __m128bh __B) {
 676   // CHECK-LABEL: test_mm_comgesbh
 677   // CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16ge(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
 678   return _mm_comgesbh(__A, __B);
 679 }
 680
 681 int test_mm_comneqsbh(__m128bh __A, __m128bh __B) {
 682   // CHECK-LABEL: test_mm_comneqsbh
 683   // CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16neq(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
 684   return _mm_comneqsbh(__A, __B);
 685 }
 686
 687 __mmask16 test_mm256_cmp_pbh_mask_eq_oq(__m256bh a, __m256bh b) {
 688   // CHECK-LABEL: @test_mm256_cmp_pbh_mask_eq_oq
 689   // CHECK: fcmp oeq <16 x bfloat> %{{.*}}, %{{.*}}
 690   return _mm256_cmp_pbh_mask(a, b, _CMP_EQ_OQ);
 691 }
 692
 693 __mmask16 test_mm256_cmp_pbh_mask_lt_os(__m256bh a, __m256bh b) {
 694   // CHECK-LABEL: test_mm256_cmp_pbh_mask_lt_os
 695   // CHECK: fcmp olt <16 x bfloat> %{{.*}}, %{{.*}}
 696   return _mm256_cmp_pbh_mask(a, b, _CMP_LT_OS);
 697 }
 698
 699 __mmask16 test_mm256_cmp_pbh_mask_le_os(__m256bh a, __m256bh b) {
 700   // CHECK-LABEL: test_mm256_cmp_pbh_mask_le_os
 701   // CHECK: fcmp ole <16 x bfloat> %{{.*}}, %{{.*}}
 702   return _mm256_cmp_pbh_mask(a, b, _CMP_LE_OS);
 703 }
 704
 705 __mmask16 test_mm256_cmp_pbh_mask_unord_q(__m256bh a, __m256bh b) {
 706   // CHECK-LABEL: test_mm256_cmp_pbh_mask_unord_q
 707   // CHECK: fcmp uno <16 x bfloat> %{{.*}}, %{{.*}}
 708   return _mm256_cmp_pbh_mask(a, b, _CMP_UNORD_Q);
 709 }
 710
 711 __mmask16 test_mm256_cmp_pbh_mask_neq_uq(__m256bh a, __m256bh b) {
 712   // CHECK-LABEL: test_mm256_cmp_pbh_mask_neq_uq
 713   // CHECK: fcmp une <16 x bfloat> %{{.*}}, %{{.*}}
 714   return _mm256_cmp_pbh_mask(a, b, _CMP_NEQ_UQ);
 715 }
 716
 717 __mmask16 test_mm256_cmp_pbh_mask_nlt_us(__m256bh a, __m256bh b) {
 718   // CHECK-LABEL: test_mm256_cmp_pbh_mask_nlt_us
 719   // CHECK: fcmp uge <16 x bfloat> %{{.*}}, %{{.*}}
 720   return _mm256_cmp_pbh_mask(a, b, _CMP_NLT_US);
 721 }
 722
 723 __mmask16 test_mm256_cmp_pbh_mask_nle_us(__m256bh a, __m256bh b) {
 724   // CHECK-LABEL: test_mm256_cmp_pbh_mask_nle_us
 725   // CHECK: fcmp ugt <16 x bfloat> %{{.*}}, %{{.*}}
 726   return _mm256_cmp_pbh_mask(a, b, _CMP_NLE_US);
 727 }
 728
 729 __mmask16 test_mm256_cmp_pbh_mask_ord_q(__m256bh a, __m256bh b) {
 730   // CHECK-LABEL: test_mm256_cmp_pbh_mask_ord_q
 731   // CHECK: fcmp ord <16 x bfloat> %{{.*}}, %{{.*}}
 732   return _mm256_cmp_pbh_mask(a, b, _CMP_ORD_Q);
 733 }
 734
 735 __mmask16 test_mm256_cmp_pbh_mask_eq_uq(__m256bh a, __m256bh b) {
 736   // CHECK-LABEL: test_mm256_cmp_pbh_mask_eq_uq
 737   // CHECK: fcmp ueq <16 x bfloat> %{{.*}}, %{{.*}}
 738   return _mm256_cmp_pbh_mask(a, b, _CMP_EQ_UQ);
 739 }
 740
 741 __mmask16 test_mm256_cmp_pbh_mask_nge_us(__m256bh a, __m256bh b) {
 742   // CHECK-LABEL: test_mm256_cmp_pbh_mask_nge_us
 743   // CHECK: fcmp ult <16 x bfloat> %{{.*}}, %{{.*}}
 744   return _mm256_cmp_pbh_mask(a, b, _CMP_NGE_US);
 745 }
 746
 747 __mmask16 test_mm256_cmp_pbh_mask_ngt_us(__m256bh a, __m256bh b) {
 748   // CHECK-LABEL: test_mm256_cmp_pbh_mask_ngt_us
 749   // CHECK: fcmp ule <16 x bfloat> %{{.*}}, %{{.*}}
 750   return _mm256_cmp_pbh_mask(a, b, _CMP_NGT_US);
 751 }
 752
 753 __mmask16 test_mm256_cmp_pbh_mask_false_oq(__m256bh a, __m256bh b) {
 754   // CHECK-LABEL: test_mm256_cmp_pbh_mask_false_oq
 755   // CHECK: fcmp false <16 x bfloat> %{{.*}}, %{{.*}}
 756   return _mm256_cmp_pbh_mask(a, b, _CMP_FALSE_OQ);
 757 }
 758
 759 __mmask16 test_mm256_cmp_pbh_mask_neq_oq(__m256bh a, __m256bh b) {
 760   // CHECK-LABEL: test_mm256_cmp_pbh_mask_neq_oq
 761   // CHECK: fcmp one <16 x bfloat> %{{.*}}, %{{.*}}
 762   return _mm256_cmp_pbh_mask(a, b, _CMP_NEQ_OQ);
 763 }
 764
 765 __mmask16 test_mm256_cmp_pbh_mask_ge_os(__m256bh a, __m256bh b) {
 766   // CHECK-LABEL: test_mm256_cmp_pbh_mask_ge_os
 767   // CHECK: fcmp oge <16 x bfloat> %{{.*}}, %{{.*}}
 768   return _mm256_cmp_pbh_mask(a, b, _CMP_GE_OS);
 769 }
 770
 771 __mmask16 test_mm256_cmp_pbh_mask_gt_os(__m256bh a, __m256bh b) {
 772   // CHECK-LABEL: test_mm256_cmp_pbh_mask_gt_os
 773   // CHECK: fcmp ogt <16 x bfloat> %{{.*}}, %{{.*}}
 774   return _mm256_cmp_pbh_mask(a, b, _CMP_GT_OS);
 775 }
 776
 777 __mmask16 test_mm256_cmp_pbh_mask_true_uq(__m256bh a, __m256bh b) {
 778   // CHECK-LABEL: test_mm256_cmp_pbh_mask_true_uq
 779   // CHECK: fcmp true <16 x bfloat> %{{.*}}, %{{.*}}
 780   return _mm256_cmp_pbh_mask(a, b, _CMP_TRUE_UQ);
 781 }
 782
 783 __mmask16 test_mm256_cmp_pbh_mask_eq_os(__m256bh a, __m256bh b) {
 784   // CHECK-LABEL: test_mm256_cmp_pbh_mask_eq_os
 785   // CHECK: fcmp oeq <16 x bfloat> %{{.*}}, %{{.*}}
 786   return _mm256_cmp_pbh_mask(a, b, _CMP_EQ_OS);
 787 }
 788
 789 __mmask16 test_mm256_cmp_pbh_mask_lt_oq(__m256bh a, __m256bh b) {
 790   // CHECK-LABEL: test_mm256_cmp_pbh_mask_lt_oq
 791   // CHECK: fcmp olt <16 x bfloat> %{{.*}}, %{{.*}}
 792   return _mm256_cmp_pbh_mask(a, b, _CMP_LT_OQ);
 793 }
 794
 795 __mmask16 test_mm256_cmp_pbh_mask_le_oq(__m256bh a, __m256bh b) {
 796   // CHECK-LABEL: test_mm256_cmp_pbh_mask_le_oq
 797   // CHECK: fcmp ole <16 x bfloat> %{{.*}}, %{{.*}}
 798   return _mm256_cmp_pbh_mask(a, b, _CMP_LE_OQ);
 799 }
 800
 801 __mmask16 test_mm256_cmp_pbh_mask_unord_s(__m256bh a, __m256bh b) {
 802   // CHECK-LABEL: test_mm256_cmp_pbh_mask_unord_s
 803   // CHECK: fcmp uno <16 x bfloat> %{{.*}}, %{{.*}}
 804   return _mm256_cmp_pbh_mask(a, b, _CMP_UNORD_S);
 805 }
 806
 807 __mmask16 test_mm256_cmp_pbh_mask_neq_us(__m256bh a, __m256bh b) {
 808   // CHECK-LABEL: test_mm256_cmp_pbh_mask_neq_us
 809   // CHECK: fcmp une <16 x bfloat> %{{.*}}, %{{.*}}
 810   return _mm256_cmp_pbh_mask(a, b, _CMP_NEQ_US);
 811 }
 812
 813 __mmask16 test_mm256_cmp_pbh_mask_nlt_uq(__m256bh a, __m256bh b) {
 814   // CHECK-LABEL: test_mm256_cmp_pbh_mask_nlt_uq
 815   // CHECK: fcmp uge <16 x bfloat> %{{.*}}, %{{.*}}
 816   return _mm256_cmp_pbh_mask(a, b, _CMP_NLT_UQ);
 817 }
 818
 819 __mmask16 test_mm256_cmp_pbh_mask_nle_uq(__m256bh a, __m256bh b) {
 820   // CHECK-LABEL: test_mm256_cmp_pbh_mask_nle_uq
 821   // CHECK: fcmp ugt <16 x bfloat> %{{.*}}, %{{.*}}
 822   return _mm256_cmp_pbh_mask(a, b, _CMP_NLE_UQ);
 823 }
 824
 825 __mmask16 test_mm256_cmp_pbh_mask_ord_s(__m256bh a, __m256bh b) {
 826   // CHECK-LABEL: test_mm256_cmp_pbh_mask_ord_s
 827   // CHECK: fcmp ord <16 x bfloat> %{{.*}}, %{{.*}}
 828   return _mm256_cmp_pbh_mask(a, b, _CMP_ORD_S);
 829 }
 830
 831 __mmask16 test_mm256_cmp_pbh_mask_eq_us(__m256bh a, __m256bh b) {
 832   // CHECK-LABEL: test_mm256_cmp_pbh_mask_eq_us
 833   // CHECK: fcmp ueq <16 x bfloat> %{{.*}}, %{{.*}}
 834   return _mm256_cmp_pbh_mask(a, b, _CMP_EQ_US);
 835 }
 836
 837 __mmask16 test_mm256_cmp_pbh_mask_nge_uq(__m256bh a, __m256bh b) {
 838   // CHECK-LABEL: test_mm256_cmp_pbh_mask_nge_uq
 839   // CHECK: fcmp ult <16 x bfloat> %{{.*}}, %{{.*}}
 840   return _mm256_cmp_pbh_mask(a, b, _CMP_NGE_UQ);
 841 }
 842
 843 __mmask16 test_mm256_cmp_pbh_mask_ngt_uq(__m256bh a, __m256bh b) {
 844   // CHECK-LABEL: test_mm256_cmp_pbh_mask_ngt_uq
 845   // CHECK: fcmp ule <16 x bfloat> %{{.*}}, %{{.*}}
 846   return _mm256_cmp_pbh_mask(a, b, _CMP_NGT_UQ);
 847 }
 848
 849 __mmask16 test_mm256_cmp_pbh_mask_false_os(__m256bh a, __m256bh b) {
 850   // CHECK-LABEL: test_mm256_cmp_pbh_mask_false_os
 851   // CHECK: fcmp false <16 x bfloat> %{{.*}}, %{{.*}}
 852   return _mm256_cmp_pbh_mask(a, b, _CMP_FALSE_OS);
 853 }
 854
 855 __mmask16 test_mm256_cmp_pbh_mask_neq_os(__m256bh a, __m256bh b) {
 856   // CHECK-LABEL: test_mm256_cmp_pbh_mask_neq_os
 857   // CHECK: fcmp one <16 x bfloat> %{{.*}}, %{{.*}}
 858   return _mm256_cmp_pbh_mask(a, b, _CMP_NEQ_OS);
 859 }
 860
 861 __mmask16 test_mm256_cmp_pbh_mask_ge_oq(__m256bh a, __m256bh b) {
 862   // CHECK-LABEL: test_mm256_cmp_pbh_mask_ge_oq
 863   // CHECK: fcmp oge <16 x bfloat> %{{.*}}, %{{.*}}
 864   return _mm256_cmp_pbh_mask(a, b, _CMP_GE_OQ);
 865 }
 866
 867 __mmask16 test_mm256_cmp_pbh_mask_gt_oq(__m256bh a, __m256bh b) {
 868   // CHECK-LABEL: test_mm256_cmp_pbh_mask_gt_oq
 869   // CHECK: fcmp ogt <16 x bfloat> %{{.*}}, %{{.*}}
 870   return _mm256_cmp_pbh_mask(a, b, _CMP_GT_OQ);
 871 }
 872
 873 __mmask16 test_mm256_cmp_pbh_mask_true_us(__m256bh a, __m256bh b) {
 874   // CHECK-LABEL: test_mm256_cmp_pbh_mask_true_us
 875   // CHECK: fcmp true <16 x bfloat> %{{.*}}, %{{.*}}
 876   return _mm256_cmp_pbh_mask(a, b, _CMP_TRUE_US);
 877 }
 878
 879 __mmask16 test_mm256_mask_cmp_pbh_mask_eq_oq(__mmask16 m, __m256bh a, __m256bh b) {
 880   // CHECK-LABEL: @test_mm256_mask_cmp_pbh_mask_eq_oq
 881   // CHECK: fcmp oeq <16 x bfloat> %{{.*}}, %{{.*}}
 882   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
 883   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OQ);
 884 }
 885
 886 __mmask16 test_mm256_mask_cmp_pbh_mask_lt_os(__mmask16 m, __m256bh a, __m256bh b) {
 887   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_lt_os
 888   // CHECK: fcmp olt <16 x bfloat> %{{.*}}, %{{.*}}
 889   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
 890   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OS);
 891 }
 892
 893 __mmask16 test_mm256_mask_cmp_pbh_mask_le_os(__mmask16 m, __m256bh a, __m256bh b) {
 894   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_le_os
 895   // CHECK: fcmp ole <16 x bfloat> %{{.*}}, %{{.*}}
 896   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
 897   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OS);
 898 }
 899
 900 __mmask16 test_mm256_mask_cmp_pbh_mask_unord_q(__mmask16 m, __m256bh a, __m256bh b) {
 901   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_unord_q
 902   // CHECK: fcmp uno <16 x bfloat> %{{.*}}, %{{.*}}
 903   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
 904   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_UNORD_Q);
 905 }
 906
 907 __mmask16 test_mm256_mask_cmp_pbh_mask_neq_uq(__mmask16 m, __m256bh a, __m256bh b) {
 908   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_neq_uq
 909   // CHECK: fcmp une <16 x bfloat> %{{.*}}, %{{.*}}
 910   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
 911   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_UQ);
 912 }
 913
 914 __mmask16 test_mm256_mask_cmp_pbh_mask_nlt_us(__mmask16 m, __m256bh a, __m256bh b) {
 915   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nlt_us
 916   // CHECK: fcmp uge <16 x bfloat> %{{.*}}, %{{.*}}
 917   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
 918   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_US);
 919 }
 920
 921 __mmask16 test_mm256_mask_cmp_pbh_mask_nle_us(__mmask16 m, __m256bh a, __m256bh b) {
 922   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nle_us
 923   // CHECK: fcmp ugt <16 x bfloat> %{{.*}}, %{{.*}}
 924   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
 925   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_US);
 926 }
 927
 928 __mmask16 test_mm256_mask_cmp_pbh_mask_ord_q(__mmask16 m, __m256bh a, __m256bh b) {
 929   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ord_q
 930   // CHECK: fcmp ord <16 x bfloat> %{{.*}}, %{{.*}}
 931   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
 932   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_Q);
 933 }
 934
 935 __mmask16 test_mm256_mask_cmp_pbh_mask_eq_uq(__mmask16 m, __m256bh a, __m256bh b) {
 936   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_eq_uq
 937   // CHECK: fcmp ueq <16 x bfloat> %{{.*}}, %{{.*}}
 938   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
 939   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_UQ);
 940 }
 941
 942 __mmask16 test_mm256_mask_cmp_pbh_mask_nge_us(__mmask16 m, __m256bh a, __m256bh b) {
 943   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nge_us
 944   // CHECK: fcmp ult <16 x bfloat> %{{.*}}, %{{.*}}
 945   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
 946   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_US);
 947 }
 948
 949 __mmask16 test_mm256_mask_cmp_pbh_mask_ngt_us(__mmask16 m, __m256bh a, __m256bh b) {
 950   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ngt_us
 951   // CHECK: fcmp ule <16 x bfloat> %{{.*}}, %{{.*}}
 952   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
 953   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_US);
 954 }
 955
 956 __mmask16 test_mm256_mask_cmp_pbh_mask_false_oq(__mmask16 m, __m256bh a, __m256bh b) {
 957   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_false_oq
 958   // CHECK: fcmp false <16 x bfloat> %{{.*}}, %{{.*}}
 959   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
 960   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OQ);
 961 }
 962
 963 __mmask16 test_mm256_mask_cmp_pbh_mask_neq_oq(__mmask16 m, __m256bh a, __m256bh b) {
 964   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_neq_oq
 965   // CHECK: fcmp one <16 x bfloat> %{{.*}}, %{{.*}}
 966   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
 967   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OQ);
 968 }
 969
 970 __mmask16 test_mm256_mask_cmp_pbh_mask_ge_os(__mmask16 m, __m256bh a, __m256bh b) {
 971   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ge_os
 972   // CHECK: fcmp oge <16 x bfloat> %{{.*}}, %{{.*}}
 973   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
 974   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OS);
 975 }
 976
 977 __mmask16 test_mm256_mask_cmp_pbh_mask_gt_os(__mmask16 m, __m256bh a, __m256bh b) {
 978   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_gt_os
 979   // CHECK: fcmp ogt <16 x bfloat> %{{.*}}, %{{.*}}
 980   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
 981   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OS);
 982 }
 983
 984 __mmask16 test_mm256_mask_cmp_pbh_mask_true_uq(__mmask16 m, __m256bh a, __m256bh b) {
 985   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_true_uq
 986   // CHECK: fcmp true <16 x bfloat> %{{.*}}, %{{.*}}
 987   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
 988   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_UQ);
 989 }
 990
 991 __mmask16 test_mm256_mask_cmp_pbh_mask_eq_os(__mmask16 m, __m256bh a, __m256bh b) {
 992   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_eq_os
 993   // CHECK: fcmp oeq <16 x bfloat> %{{.*}}, %{{.*}}
 994   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
 995   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OS);
 996 }
 997
 998 __mmask16 test_mm256_mask_cmp_pbh_mask_lt_oq(__mmask16 m, __m256bh a, __m256bh b) {
 999   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_lt_oq
1000   // CHECK: fcmp olt <16 x bfloat> %{{.*}}, %{{.*}}
1001   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
1002   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OQ);
1003 }
1004
1005 __mmask16 test_mm256_mask_cmp_pbh_mask_le_oq(__mmask16 m, __m256bh a, __m256bh b) {
1006   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_le_oq
1007   // CHECK: fcmp ole <16 x bfloat> %{{.*}}, %{{.*}}
1008   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
1009   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OQ);
1010 }
1011
1012 __mmask16 test_mm256_mask_cmp_pbh_mask_unord_s(__mmask16 m, __m256bh a, __m256bh b) {
1013   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_unord_s
1014   // CHECK: fcmp uno <16 x bfloat> %{{.*}}, %{{.*}}
1015   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
1016   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_UNORD_S);
1017 }
1018
1019 __mmask16 test_mm256_mask_cmp_pbh_mask_neq_us(__mmask16 m, __m256bh a, __m256bh b) {
1020   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_neq_us
1021   // CHECK: fcmp une <16 x bfloat> %{{.*}}, %{{.*}}
1022   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
1023   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_US);
1024 }
1025
1026 __mmask16 test_mm256_mask_cmp_pbh_mask_nlt_uq(__mmask16 m, __m256bh a, __m256bh b) {
1027   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nlt_uq
1028   // CHECK: fcmp uge <16 x bfloat> %{{.*}}, %{{.*}}
1029   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
1030   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_UQ);
1031 }
1032
1033 __mmask16 test_mm256_mask_cmp_pbh_mask_nle_uq(__mmask16 m, __m256bh a, __m256bh b) {
1034   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nle_uq
1035   // CHECK: fcmp ugt <16 x bfloat> %{{.*}}, %{{.*}}
1036   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
1037   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_UQ);
1038 }
1039
1040 __mmask16 test_mm256_mask_cmp_pbh_mask_ord_s(__mmask16 m, __m256bh a, __m256bh b) {
1041   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ord_s
1042   // CHECK: fcmp ord <16 x bfloat> %{{.*}}, %{{.*}}
1043   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
1044   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_S);
1045 }
1046
1047 __mmask16 test_mm256_mask_cmp_pbh_mask_eq_us(__mmask16 m, __m256bh a, __m256bh b) {
1048   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_eq_us
1049   // CHECK: fcmp ueq <16 x bfloat> %{{.*}}, %{{.*}}
1050   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
1051   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_US);
1052 }
1053
1054 __mmask16 test_mm256_mask_cmp_pbh_mask_nge_uq(__mmask16 m, __m256bh a, __m256bh b) {
1055   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nge_uq
1056   // CHECK: fcmp ult <16 x bfloat> %{{.*}}, %{{.*}}
1057   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
1058   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_UQ);
1059 }
1060
1061 __mmask16 test_mm256_mask_cmp_pbh_mask_ngt_uq(__mmask16 m, __m256bh a, __m256bh b) {
1062   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ngt_uq
1063   // CHECK: fcmp ule <16 x bfloat> %{{.*}}, %{{.*}}
1064   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
1065   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_UQ);
1066 }
1067
1068 __mmask16 test_mm256_mask_cmp_pbh_mask_false_os(__mmask16 m, __m256bh a, __m256bh b) {
1069   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_false_os
1070   // CHECK: fcmp false <16 x bfloat> %{{.*}}, %{{.*}}
1071   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
1072   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OS);
1073 }
1074
1075 __mmask16 test_mm256_mask_cmp_pbh_mask_neq_os(__mmask16 m, __m256bh a, __m256bh b) {
1076   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_neq_os
1077   // CHECK: fcmp one <16 x bfloat> %{{.*}}, %{{.*}}
1078   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
1079   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OS);
1080 }
1081
1082 __mmask16 test_mm256_mask_cmp_pbh_mask_ge_oq(__mmask16 m, __m256bh a, __m256bh b) {
1083   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ge_oq
1084   // CHECK: fcmp oge <16 x bfloat> %{{.*}}, %{{.*}}
1085   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
1086   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OQ);
1087 }
1088
1089 __mmask16 test_mm256_mask_cmp_pbh_mask_gt_oq(__mmask16 m, __m256bh a, __m256bh b) {
1090   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_gt_oq
1091   // CHECK: fcmp ogt <16 x bfloat> %{{.*}}, %{{.*}}
1092   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
1093   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OQ);
1094 }
1095
1096 __mmask16 test_mm256_mask_cmp_pbh_mask_true_us(__mmask16 m, __m256bh a, __m256bh b) {
1097   // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_true_us
1098   // CHECK: fcmp true <16 x bfloat> %{{.*}}, %{{.*}}
1099   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
1100   return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_US);
1101 }
1102
1103 __mmask8 test_mm_cmp_pbh_mask_eq_oq(__m128bh a, __m128bh b) {
1104   // CHECK-LABEL: @test_mm_cmp_pbh_mask_eq_oq
1105   // CHECK: fcmp oeq <8 x bfloat> %{{.*}}, %{{.*}}
1106   return _mm_cmp_pbh_mask(a, b, _CMP_EQ_OQ);
1107 }
1108
1109 __mmask8 test_mm_cmp_pbh_mask_lt_os(__m128bh a, __m128bh b) {
1110   // CHECK-LABEL: test_mm_cmp_pbh_mask_lt_os
1111   // CHECK: fcmp olt <8 x bfloat> %{{.*}}, %{{.*}}
1112   return _mm_cmp_pbh_mask(a, b, _CMP_LT_OS);
1113 }
1114
1115 __mmask8 test_mm_cmp_pbh_mask_le_os(__m128bh a, __m128bh b) {
1116   // CHECK-LABEL: test_mm_cmp_pbh_mask_le_os
1117   // CHECK: fcmp ole <8 x bfloat> %{{.*}}, %{{.*}}
1118   return _mm_cmp_pbh_mask(a, b, _CMP_LE_OS);
1119 }
1120
1121 __mmask8 test_mm_cmp_pbh_mask_unord_q(__m128bh a, __m128bh b) {
1122   // CHECK-LABEL: test_mm_cmp_pbh_mask_unord_q
1123   // CHECK: fcmp uno <8 x bfloat> %{{.*}}, %{{.*}}
1124   return _mm_cmp_pbh_mask(a, b, _CMP_UNORD_Q);
1125 }
1126
1127 __mmask8 test_mm_cmp_pbh_mask_neq_uq(__m128bh a, __m128bh b) {
1128   // CHECK-LABEL: test_mm_cmp_pbh_mask_neq_uq
1129   // CHECK: fcmp une <8 x bfloat> %{{.*}}, %{{.*}}
1130   return _mm_cmp_pbh_mask(a, b, _CMP_NEQ_UQ);
1131 }
1132
1133 __mmask8 test_mm_cmp_pbh_mask_nlt_us(__m128bh a, __m128bh b) {
1134   // CHECK-LABEL: test_mm_cmp_pbh_mask_nlt_us
1135   // CHECK: fcmp uge <8 x bfloat> %{{.*}}, %{{.*}}
1136   return _mm_cmp_pbh_mask(a, b, _CMP_NLT_US);
1137 }
1138
1139 __mmask8 test_mm_cmp_pbh_mask_nle_us(__m128bh a, __m128bh b) {
1140   // CHECK-LABEL: test_mm_cmp_pbh_mask_nle_us
1141   // CHECK: fcmp ugt <8 x bfloat> %{{.*}}, %{{.*}}
1142   return _mm_cmp_pbh_mask(a, b, _CMP_NLE_US);
1143 }
1144
1145 __mmask8 test_mm_cmp_pbh_mask_ord_q(__m128bh a, __m128bh b) {
1146   // CHECK-LABEL: test_mm_cmp_pbh_mask_ord_q
1147   // CHECK: fcmp ord <8 x bfloat> %{{.*}}, %{{.*}}
1148   return _mm_cmp_pbh_mask(a, b, _CMP_ORD_Q);
1149 }
1150
1151 __mmask8 test_mm_cmp_pbh_mask_eq_uq(__m128bh a, __m128bh b) {
1152   // CHECK-LABEL: test_mm_cmp_pbh_mask_eq_uq
1153   // CHECK: fcmp ueq <8 x bfloat> %{{.*}}, %{{.*}}
1154   return _mm_cmp_pbh_mask(a, b, _CMP_EQ_UQ);
1155 }
1156
1157 __mmask8 test_mm_cmp_pbh_mask_nge_us(__m128bh a, __m128bh b) {
1158   // CHECK-LABEL: test_mm_cmp_pbh_mask_nge_us
1159   // CHECK: fcmp ult <8 x bfloat> %{{.*}}, %{{.*}}
1160   return _mm_cmp_pbh_mask(a, b, _CMP_NGE_US);
1161 }
1162
1163 __mmask8 test_mm_cmp_pbh_mask_ngt_us(__m128bh a, __m128bh b) {
1164   // CHECK-LABEL: test_mm_cmp_pbh_mask_ngt_us
1165   // CHECK: fcmp ule <8 x bfloat> %{{.*}}, %{{.*}}
1166   return _mm_cmp_pbh_mask(a, b, _CMP_NGT_US);
1167 }
1168
1169 __mmask8 test_mm_cmp_pbh_mask_false_oq(__m128bh a, __m128bh b) {
1170   // CHECK-LABEL: test_mm_cmp_pbh_mask_false_oq
1171   // CHECK: fcmp false <8 x bfloat> %{{.*}}, %{{.*}}
1172   return _mm_cmp_pbh_mask(a, b, _CMP_FALSE_OQ);
1173 }
1174
1175 __mmask8 test_mm_cmp_pbh_mask_neq_oq(__m128bh a, __m128bh b) {
1176   // CHECK-LABEL: test_mm_cmp_pbh_mask_neq_oq
1177   // CHECK: fcmp one <8 x bfloat> %{{.*}}, %{{.*}}
1178   return _mm_cmp_pbh_mask(a, b, _CMP_NEQ_OQ);
1179 }
1180
1181 __mmask8 test_mm_cmp_pbh_mask_ge_os(__m128bh a, __m128bh b) {
1182   // CHECK-LABEL: test_mm_cmp_pbh_mask_ge_os
1183   // CHECK: fcmp oge <8 x bfloat> %{{.*}}, %{{.*}}
1184   return _mm_cmp_pbh_mask(a, b, _CMP_GE_OS);
1185 }
1186
1187 __mmask8 test_mm_cmp_pbh_mask_gt_os(__m128bh a, __m128bh b) {
1188   // CHECK-LABEL: test_mm_cmp_pbh_mask_gt_os
1189   // CHECK: fcmp ogt <8 x bfloat> %{{.*}}, %{{.*}}
1190   return _mm_cmp_pbh_mask(a, b, _CMP_GT_OS);
1191 }
1192
1193 __mmask8 test_mm_cmp_pbh_mask_true_uq(__m128bh a, __m128bh b) {
1194   // CHECK-LABEL: test_mm_cmp_pbh_mask_true_uq
1195   // CHECK: fcmp true <8 x bfloat> %{{.*}}, %{{.*}}
1196   return _mm_cmp_pbh_mask(a, b, _CMP_TRUE_UQ);
1197 }
1198
1199 __mmask8 test_mm_cmp_pbh_mask_eq_os(__m128bh a, __m128bh b) {
1200   // CHECK-LABEL: test_mm_cmp_pbh_mask_eq_os
1201   // CHECK: fcmp oeq <8 x bfloat> %{{.*}}, %{{.*}}
1202   return _mm_cmp_pbh_mask(a, b, _CMP_EQ_OS);
1203 }
1204
1205 __mmask8 test_mm_cmp_pbh_mask_lt_oq(__m128bh a, __m128bh b) {
1206   // CHECK-LABEL: test_mm_cmp_pbh_mask_lt_oq
1207   // CHECK: fcmp olt <8 x bfloat> %{{.*}}, %{{.*}}
1208   return _mm_cmp_pbh_mask(a, b, _CMP_LT_OQ);
1209 }
1210
1211 __mmask8 test_mm_cmp_pbh_mask_le_oq(__m128bh a, __m128bh b) {
1212   // CHECK-LABEL: test_mm_cmp_pbh_mask_le_oq
1213   // CHECK: fcmp ole <8 x bfloat> %{{.*}}, %{{.*}}
1214   return _mm_cmp_pbh_mask(a, b, _CMP_LE_OQ);
1215 }
1216
1217 __mmask8 test_mm_cmp_pbh_mask_unord_s(__m128bh a, __m128bh b) {
1218   // CHECK-LABEL: test_mm_cmp_pbh_mask_unord_s
1219   // CHECK: fcmp uno <8 x bfloat> %{{.*}}, %{{.*}}
1220   return _mm_cmp_pbh_mask(a, b, _CMP_UNORD_S);
1221 }
1222
1223 __mmask8 test_mm_cmp_pbh_mask_neq_us(__m128bh a, __m128bh b) {
1224   // CHECK-LABEL: test_mm_cmp_pbh_mask_neq_us
1225   // CHECK: fcmp une <8 x bfloat> %{{.*}}, %{{.*}}
1226   return _mm_cmp_pbh_mask(a, b, _CMP_NEQ_US);
1227 }
1228
1229 __mmask8 test_mm_cmp_pbh_mask_nlt_uq(__m128bh a, __m128bh b) {
1230   // CHECK-LABEL: test_mm_cmp_pbh_mask_nlt_uq
1231   // CHECK: fcmp uge <8 x bfloat> %{{.*}}, %{{.*}}
1232   return _mm_cmp_pbh_mask(a, b, _CMP_NLT_UQ);
1233 }
1234
1235 __mmask8 test_mm_cmp_pbh_mask_nle_uq(__m128bh a, __m128bh b) {
1236   // CHECK-LABEL: test_mm_cmp_pbh_mask_nle_uq
1237   // CHECK: fcmp ugt <8 x bfloat> %{{.*}}, %{{.*}}
1238   return _mm_cmp_pbh_mask(a, b, _CMP_NLE_UQ);
1239 }
1240
1241 __mmask8 test_mm_cmp_pbh_mask_ord_s(__m128bh a, __m128bh b) {
1242   // CHECK-LABEL: test_mm_cmp_pbh_mask_ord_s
1243   // CHECK: fcmp ord <8 x bfloat> %{{.*}}, %{{.*}}
1244   return _mm_cmp_pbh_mask(a, b, _CMP_ORD_S);
1245 }
1246
1247 __mmask8 test_mm_cmp_pbh_mask_eq_us(__m128bh a, __m128bh b) {
1248   // CHECK-LABEL: test_mm_cmp_pbh_mask_eq_us
1249   // CHECK: fcmp ueq <8 x bfloat> %{{.*}}, %{{.*}}
1250   return _mm_cmp_pbh_mask(a, b, _CMP_EQ_US);
1251 }
1252
1253 __mmask8 test_mm_cmp_pbh_mask_nge_uq(__m128bh a, __m128bh b) {
1254   // CHECK-LABEL: test_mm_cmp_pbh_mask_nge_uq
1255   // CHECK: fcmp ult <8 x bfloat> %{{.*}}, %{{.*}}
1256   return _mm_cmp_pbh_mask(a, b, _CMP_NGE_UQ);
1257 }
1258
1259 __mmask8 test_mm_cmp_pbh_mask_ngt_uq(__m128bh a, __m128bh b) {
1260   // CHECK-LABEL: test_mm_cmp_pbh_mask_ngt_uq
1261   // CHECK: fcmp ule <8 x bfloat> %{{.*}}, %{{.*}}
1262   return _mm_cmp_pbh_mask(a, b, _CMP_NGT_UQ);
1263 }
1264
1265 __mmask8 test_mm_cmp_pbh_mask_false_os(__m128bh a, __m128bh b) {
1266   // CHECK-LABEL: test_mm_cmp_pbh_mask_false_os
1267   // CHECK: fcmp false <8 x bfloat> %{{.*}}, %{{.*}}
1268   return _mm_cmp_pbh_mask(a, b, _CMP_FALSE_OS);
1269 }
1270
1271 __mmask8 test_mm_cmp_pbh_mask_neq_os(__m128bh a, __m128bh b) {
1272   // CHECK-LABEL: test_mm_cmp_pbh_mask_neq_os
1273   // CHECK: fcmp one <8 x bfloat> %{{.*}}, %{{.*}}
1274   return _mm_cmp_pbh_mask(a, b, _CMP_NEQ_OS);
1275 }
1276
1277 __mmask8 test_mm_cmp_pbh_mask_ge_oq(__m128bh a, __m128bh b) {
1278   // CHECK-LABEL: test_mm_cmp_pbh_mask_ge_oq
1279   // CHECK: fcmp oge <8 x bfloat> %{{.*}}, %{{.*}}
1280   return _mm_cmp_pbh_mask(a, b, _CMP_GE_OQ);
1281 }
1282
1283 __mmask8 test_mm_cmp_pbh_mask_gt_oq(__m128bh a, __m128bh b) {
1284   // CHECK-LABEL: test_mm_cmp_pbh_mask_gt_oq
1285   // CHECK: fcmp ogt <8 x bfloat> %{{.*}}, %{{.*}}
1286   return _mm_cmp_pbh_mask(a, b, _CMP_GT_OQ);
1287 }
1288
1289 __mmask8 test_mm_cmp_pbh_mask_true_us(__m128bh a, __m128bh b) {
1290   // CHECK-LABEL: test_mm_cmp_pbh_mask_true_us
1291   // CHECK: fcmp true <8 x bfloat> %{{.*}}, %{{.*}}
1292   return _mm_cmp_pbh_mask(a, b, _CMP_TRUE_US);
1293 }
1294
1295 __mmask8 test_mm_mask_cmp_pbh_mask_eq_oq(__mmask8 m, __m128bh a, __m128bh b) {
1296   // CHECK-LABEL: @test_mm_mask_cmp_pbh_mask_eq_oq
1297   // CHECK: fcmp oeq <8 x bfloat> %{{.*}}, %{{.*}}
1298   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1299   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OQ);
1300 }
1301
1302 __mmask8 test_mm_mask_cmp_pbh_mask_lt_os(__mmask8 m, __m128bh a, __m128bh b) {
1303   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_lt_os
1304   // CHECK: fcmp olt <8 x bfloat> %{{.*}}, %{{.*}}
1305   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1306   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OS);
1307 }
1308
1309 __mmask8 test_mm_mask_cmp_pbh_mask_le_os(__mmask8 m, __m128bh a, __m128bh b) {
1310   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_le_os
1311   // CHECK: fcmp ole <8 x bfloat> %{{.*}}, %{{.*}}
1312   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1313   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OS);
1314 }
1315
1316 __mmask8 test_mm_mask_cmp_pbh_mask_unord_q(__mmask8 m, __m128bh a, __m128bh b) {
1317   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_unord_q
1318   // CHECK: fcmp uno <8 x bfloat> %{{.*}}, %{{.*}}
1319   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1320   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_UNORD_Q);
1321 }
1322
1323 __mmask8 test_mm_mask_cmp_pbh_mask_neq_uq(__mmask8 m, __m128bh a, __m128bh b) {
1324   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_neq_uq
1325   // CHECK: fcmp une <8 x bfloat> %{{.*}}, %{{.*}}
1326   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1327   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_UQ);
1328 }
1329
1330 __mmask8 test_mm_mask_cmp_pbh_mask_nlt_us(__mmask8 m, __m128bh a, __m128bh b) {
1331   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nlt_us
1332   // CHECK: fcmp uge <8 x bfloat> %{{.*}}, %{{.*}}
1333   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1334   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_US);
1335 }
1336
1337 __mmask8 test_mm_mask_cmp_pbh_mask_nle_us(__mmask8 m, __m128bh a, __m128bh b) {
1338   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nle_us
1339   // CHECK: fcmp ugt <8 x bfloat> %{{.*}}, %{{.*}}
1340   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1341   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_US);
1342 }
1343
1344 __mmask8 test_mm_mask_cmp_pbh_mask_ord_q(__mmask8 m, __m128bh a, __m128bh b) {
1345   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ord_q
1346   // CHECK: fcmp ord <8 x bfloat> %{{.*}}, %{{.*}}
1347   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1348   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_Q);
1349 }
1350
1351 __mmask8 test_mm_mask_cmp_pbh_mask_eq_uq(__mmask8 m, __m128bh a, __m128bh b) {
1352   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_eq_uq
1353   // CHECK: fcmp ueq <8 x bfloat> %{{.*}}, %{{.*}}
1354   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1355   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_UQ);
1356 }
1357
1358 __mmask8 test_mm_mask_cmp_pbh_mask_nge_us(__mmask8 m, __m128bh a, __m128bh b) {
1359   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nge_us
1360   // CHECK: fcmp ult <8 x bfloat> %{{.*}}, %{{.*}}
1361   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1362   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_US);
1363 }
1364
1365 __mmask8 test_mm_mask_cmp_pbh_mask_ngt_us(__mmask8 m, __m128bh a, __m128bh b) {
1366   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ngt_us
1367   // CHECK: fcmp ule <8 x bfloat> %{{.*}}, %{{.*}}
1368   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1369   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_US);
1370 }
1371
1372 __mmask8 test_mm_mask_cmp_pbh_mask_false_oq(__mmask8 m, __m128bh a, __m128bh b) {
1373   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_false_oq
1374   // CHECK: fcmp false <8 x bfloat> %{{.*}}, %{{.*}}
1375   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1376   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OQ);
1377 }
1378
1379 __mmask8 test_mm_mask_cmp_pbh_mask_neq_oq(__mmask8 m, __m128bh a, __m128bh b) {
1380   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_neq_oq
1381   // CHECK: fcmp one <8 x bfloat> %{{.*}}, %{{.*}}
1382   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1383   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OQ);
1384 }
1385
1386 __mmask8 test_mm_mask_cmp_pbh_mask_ge_os(__mmask8 m, __m128bh a, __m128bh b) {
1387   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ge_os
1388   // CHECK: fcmp oge <8 x bfloat> %{{.*}}, %{{.*}}
1389   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1390   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OS);
1391 }
1392
1393 __mmask8 test_mm_mask_cmp_pbh_mask_gt_os(__mmask8 m, __m128bh a, __m128bh b) {
1394   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_gt_os
1395   // CHECK: fcmp ogt <8 x bfloat> %{{.*}}, %{{.*}}
1396   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1397   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OS);
1398 }
1399
1400 __mmask8 test_mm_mask_cmp_pbh_mask_true_uq(__mmask8 m, __m128bh a, __m128bh b) {
1401   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_true_uq
1402   // CHECK: fcmp true <8 x bfloat> %{{.*}}, %{{.*}}
1403   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1404   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_UQ);
1405 }
1406
1407 __mmask8 test_mm_mask_cmp_pbh_mask_eq_os(__mmask8 m, __m128bh a, __m128bh b) {
1408   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_eq_os
1409   // CHECK: fcmp oeq <8 x bfloat> %{{.*}}, %{{.*}}
1410   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1411   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OS);
1412 }
1413
1414 __mmask8 test_mm_mask_cmp_pbh_mask_lt_oq(__mmask8 m, __m128bh a, __m128bh b) {
1415   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_lt_oq
1416   // CHECK: fcmp olt <8 x bfloat> %{{.*}}, %{{.*}}
1417   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1418   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OQ);
1419 }
1420
1421 __mmask8 test_mm_mask_cmp_pbh_mask_le_oq(__mmask8 m, __m128bh a, __m128bh b) {
1422   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_le_oq
1423   // CHECK: fcmp ole <8 x bfloat> %{{.*}}, %{{.*}}
1424   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1425   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OQ);
1426 }
1427
1428 __mmask8 test_mm_mask_cmp_pbh_mask_unord_s(__mmask8 m, __m128bh a, __m128bh b) {
1429   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_unord_s
1430   // CHECK: fcmp uno <8 x bfloat> %{{.*}}, %{{.*}}
1431   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1432   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_UNORD_S);
1433 }
1434
1435 __mmask8 test_mm_mask_cmp_pbh_mask_neq_us(__mmask8 m, __m128bh a, __m128bh b) {
1436   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_neq_us
1437   // CHECK: fcmp une <8 x bfloat> %{{.*}}, %{{.*}}
1438   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1439   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_US);
1440 }
1441
1442 __mmask8 test_mm_mask_cmp_pbh_mask_nlt_uq(__mmask8 m, __m128bh a, __m128bh b) {
1443   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nlt_uq
1444   // CHECK: fcmp uge <8 x bfloat> %{{.*}}, %{{.*}}
1445   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1446   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_UQ);
1447 }
1448
1449 __mmask8 test_mm_mask_cmp_pbh_mask_nle_uq(__mmask8 m, __m128bh a, __m128bh b) {
1450   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nle_uq
1451   // CHECK: fcmp ugt <8 x bfloat> %{{.*}}, %{{.*}}
1452   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1453   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_UQ);
1454 }
1455
1456 __mmask8 test_mm_mask_cmp_pbh_mask_ord_s(__mmask8 m, __m128bh a, __m128bh b) {
1457   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ord_s
1458   // CHECK: fcmp ord <8 x bfloat> %{{.*}}, %{{.*}}
1459   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1460   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_S);
1461 }
1462
1463 __mmask8 test_mm_mask_cmp_pbh_mask_eq_us(__mmask8 m, __m128bh a, __m128bh b) {
1464   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_eq_us
1465   // CHECK: fcmp ueq <8 x bfloat> %{{.*}}, %{{.*}}
1466   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1467   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_US);
1468 }
1469
1470 __mmask8 test_mm_mask_cmp_pbh_mask_nge_uq(__mmask8 m, __m128bh a, __m128bh b) {
1471   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nge_uq
1472   // CHECK: fcmp ult <8 x bfloat> %{{.*}}, %{{.*}}
1473   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1474   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_UQ);
1475 }
1476
1477 __mmask8 test_mm_mask_cmp_pbh_mask_ngt_uq(__mmask8 m, __m128bh a, __m128bh b) {
1478   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ngt_uq
1479   // CHECK: fcmp ule <8 x bfloat> %{{.*}}, %{{.*}}
1480   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1481   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_UQ);
1482 }
1483
1484 __mmask8 test_mm_mask_cmp_pbh_mask_false_os(__mmask8 m, __m128bh a, __m128bh b) {
1485   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_false_os
1486   // CHECK: fcmp false <8 x bfloat> %{{.*}}, %{{.*}}
1487   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1488   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OS);
1489 }
1490
1491 __mmask8 test_mm_mask_cmp_pbh_mask_neq_os(__mmask8 m, __m128bh a, __m128bh b) {
1492   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_neq_os
1493   // CHECK: fcmp one <8 x bfloat> %{{.*}}, %{{.*}}
1494   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1495   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OS);
1496 }
1497
1498 __mmask8 test_mm_mask_cmp_pbh_mask_ge_oq(__mmask8 m, __m128bh a, __m128bh b) {
1499   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ge_oq
1500   // CHECK: fcmp oge <8 x bfloat> %{{.*}}, %{{.*}}
1501   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1502   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OQ);
1503 }
1504
1505 __mmask8 test_mm_mask_cmp_pbh_mask_gt_oq(__mmask8 m, __m128bh a, __m128bh b) {
1506   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_gt_oq
1507   // CHECK: fcmp ogt <8 x bfloat> %{{.*}}, %{{.*}}
1508   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1509   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OQ);
1510 }
1511
1512 __mmask8 test_mm_mask_cmp_pbh_mask_true_us(__mmask8 m, __m128bh a, __m128bh b) {
1513   // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_true_us
1514   // CHECK: fcmp true <8 x bfloat> %{{.*}}, %{{.*}}
1515   // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
1516   return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_US);
1517 }
1518
1519
1520 __mmask16 test_mm256_mask_fpclass_pbh_mask(__mmask16 __U, __m256bh __A) {
1521   // CHECK-LABEL: @test_mm256_mask_fpclass_pbh_mask
1522   // CHECK: @llvm.x86.avx10.fpclass.nepbf16.256
1523   return _mm256_mask_fpclass_pbh_mask(__U, __A, 4);
1524 }
1525
1526 __mmask16 test_mm256_fpclass_pbh_mask(__m256bh __A) {
1527   // CHECK-LABEL: @test_mm256_fpclass_pbh_mask
1528   // CHECK: @llvm.x86.avx10.fpclass.nepbf16.256
1529   return _mm256_fpclass_pbh_mask(__A, 4);
1530 }
1531
1532 __mmask8 test_mm_mask_fpclass_pbh_mask(__mmask8 __U, __m128bh __A) {
1533   // CHECK-LABEL: @test_mm_mask_fpclass_pbh_mask
1534   // CHECK: @llvm.x86.avx10.fpclass.nepbf16.128
1535   return _mm_mask_fpclass_pbh_mask(__U, __A, 4);
1536 }
1537
1538 __mmask8 test_mm_fpclass_pbh_mask(__m128bh __A) {
1539   // CHECK-LABEL: @test_mm_fpclass_pbh_mask
1540   // CHECK: @llvm.x86.avx10.fpclass.nepbf16.128
1541   return _mm_fpclass_pbh_mask(__A, 4);
1542 }
1543
1544 __m256bh test_mm256_scalef_pbh(__m256bh __A, __m256bh __B) {
1545   // CHECK-LABEL: @test_mm256_scalef_pbh
1546   // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.256
1547   return _mm256_scalef_pbh(__A, __B);
1548 }
1549
1550 __m256bh test_mm256_mask_scalef_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
1551   // CHECK-LABEL: @test_mm256_mask_scalef_pbh
1552   // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.256
1553   return _mm256_mask_scalef_pbh(__W, __U, __A, __B);
1554 }
1555
1556 __m256bh test_mm256_maskz_scalef_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
1557   // CHECK-LABEL: @test_mm256_maskz_scalef_pbh
1558   // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.256
1559   return _mm256_maskz_scalef_pbh(__U, __A, __B);
1560 }
1561
1562 __m256bh test_mm256_rcp_pbh(__m256bh __A) {
1563   // CHECK-LABEL: @test_mm256_rcp_pbh
1564   // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.256
1565   return _mm256_rcp_pbh(__A);
1566 }
1567
1568 __m256bh test_mm256_mask_rcp_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
1569   // CHECK-LABEL: @test_mm256_mask_rcp_pbh
1570   // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.256
1571   return (__m256bh)_mm256_mask_rcp_pbh(__W, __U, __A);
1572 }
1573
1574 __m256bh test_mm256_maskz_rcp_pbh(__mmask16 __U, __m256bh __A) {
1575   // CHECK-LABEL: @test_mm256_maskz_rcp_pbh
1576   // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.256
1577   return _mm256_maskz_rcp_pbh(__U, __A);
1578 }
1579
1580 __m256bh test_mm256_getexp_pbh(__m256bh __A) {
1581   // CHECK-LABEL: @test_mm256_getexp_pbh
1582   // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.256
1583   return _mm256_getexp_pbh(__A);
1584 }
1585
1586 __m256bh test_mm256_mask_getexp_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
1587   // CHECK-LABEL: @test_mm256_mask_getexp_pbh
1588   // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.256
1589   return _mm256_mask_getexp_pbh(__W, __U, __A);
1590 }
1591
1592 __m256bh test_mm256_maskz_getexp_pbh(__mmask16 __U, __m256bh __A) {
1593   // CHECK-LABEL: @test_mm256_maskz_getexp_pbh
1594   // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.256
1595   return _mm256_maskz_getexp_pbh(__U, __A);
1596 }
1597
1598 __m256bh test_mm256_rsqrt_pbh(__m256bh __A) {
1599   // CHECK-LABEL: @test_mm256_rsqrt_pbh
1600   // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.256
1601   return _mm256_rsqrt_pbh(__A);
1602 }
1603
1604 __m256bh test_mm256_mask_rsqrt_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
1605   // CHECK-LABEL: @test_mm256_mask_rsqrt_pbh
1606   // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.256
1607   return (__m256bh)_mm256_mask_rsqrt_pbh(__W, __U, __A);
1608 }
1609
1610 __m256bh test_mm256_maskz_rsqrt_pbh(__mmask16 __U, __m256bh __A) {
1611   // CHECK-LABEL: @test_mm256_maskz_rsqrt_pbh
1612   // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.256
1613   return _mm256_maskz_rsqrt_pbh(__U, __A);
1614 }
1615
1616 __m256bh test_mm256_reducene_pbh(__m256bh __A) {
1617   // CHECK-LABEL: @test_mm256_reducene_pbh
1618   // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.256
1619   return _mm256_reducene_pbh(__A, 3);
1620 }
1621
1622 __m256bh test_mm256_mask_reducene_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
1623   // CHECK-LABEL: @test_mm256_mask_reducene_pbh
1624   // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.256
1625   return _mm256_mask_reducene_pbh(__W, __U, __A, 1);
1626 }
1627
1628 __m256bh test_mm256_maskz_reducene_pbh(__mmask16 __U, __m256bh __A) {
1629   // CHECK-LABEL: @test_mm256_maskz_reducene_pbh
1630   // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.256
1631   return _mm256_maskz_reducene_pbh(__U, __A, 1);
1632 }
1633
1634 __m256bh test_mm256_roundscalene_pbh(__m256bh __A) {
1635   // CHECK-LABEL: @test_mm256_roundscalene_pbh
1636   // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.256
1637   return _mm256_roundscalene_pbh(__A, 3);
1638 }
1639
1640 __m256bh test_mm256_mask_roundscalene_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
1641   // CHECK-LABEL: @test_mm256_mask_roundscalene_pbh
1642   // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.256
1643   return _mm256_mask_roundscalene_pbh(__W, __U, __A, 1);
1644 }
1645
1646 __m256bh test_mm256_maskz_roundscalene_pbh(__mmask16 __U, __m256bh __A) {
1647   // CHECK-LABEL: @test_mm256_maskz_roundscalene_pbh
1648   // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.256
1649   return _mm256_maskz_roundscalene_pbh(__U, __A, 1 );
1650 }
1651
1652 __m256bh test_mm256_getmant_pbh(__m256bh __A) {
1653   // CHECK-LABEL: @test_mm256_getmant_pbh
1654   // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.256
1655   return _mm256_getmant_pbh(__A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan);
1656 }
1657
1658 __m256bh test_mm256_mask_getmant_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
1659   // CHECK-LABEL: @test_mm256_mask_getmant_pbh
1660   // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.256
1661   return _mm256_mask_getmant_pbh(__W, __U, __A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan);
1662 }
1663
1664 __m256bh test_mm256_maskz_getmant_pbh(__mmask16 __U, __m256bh __A) {
1665   // CHECK-LABEL: @test_mm256_maskz_getmant_pbh
1666   // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.256
1667   return _mm256_maskz_getmant_pbh(__U, __A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan);
1668 }
1669
1670 __m256bh test_mm256_sqrt_pbh(__m256bh __A) {
1671   // CHECK-LABEL: @test_mm256_sqrt_pbh
1672   // CHECK: call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> %{{.*}})
1673   return _mm256_sqrt_pbh(__A);
1674 }
1675
1676 __m256bh test_mm256_mask_sqrt_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
1677   // CHECK-LABEL: @test_mm256_mask_sqrt_pbh
1678   // CHECK: @llvm.sqrt.v16bf16
1679   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
1680   return (__m256bh)_mm256_mask_sqrt_pbh(__W, __U, __A);
1681 }
1682
1683 __m256bh test_mm256_maskz_sqrt_pbh(__mmask16 __U, __m256bh __A) {
1684   // CHECK-LABEL: @test_mm256_maskz_sqrt_pbh
1685   // CHECK: @llvm.sqrt.v16bf16
1686   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
1687   return _mm256_maskz_sqrt_pbh(__U, __A);
1688 }
1689
1690 __m128bh test_mm_scalef_pbh(__m128bh __A, __m128bh __B) {
1691   // CHECK-LABEL: @test_mm_scalef_pbh
1692   // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.128
1693   return _mm_scalef_pbh(__A, __B);
1694 }
1695
1696 __m128bh test_mm_mask_scalef_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
1697   // CHECK-LABEL: @test_mm_mask_scalef_pbh
1698   // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.128
1699   return _mm_mask_scalef_pbh(__W, __U, __A, __B);
1700 }
1701
1702 __m128bh test_mm_maskz_scalef_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
1703   // CHECK-LABEL: @test_mm_maskz_scalef_pbh
1704   // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.128
1705   return _mm_maskz_scalef_pbh(__U, __A, __B);
1706 }
1707
1708 __m128bh test_mm_rcp_pbh(__m128bh __A) {
1709   // CHECK-LABEL: @test_mm_rcp_pbh
1710   // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.128
1711   return _mm_rcp_pbh(__A);
1712 }
1713
1714 __m128bh test_mm_mask_rcp_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
1715   // CHECK-LABEL: @test_mm_mask_rcp_pbh
1716   // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.128
1717   return (__m128bh)_mm_mask_rcp_pbh(__W, __U, __A);
1718 }
1719
1720 __m128bh test_mm_maskz_rcp_pbh(__mmask8 __U, __m128bh __A) {
1721   // CHECK-LABEL: @test_mm_maskz_rcp_pbh
1722   // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.128
1723   return _mm_maskz_rcp_pbh(__U, __A);
1724 }
1725
1726 __m128bh test_mm_getexp_pbh(__m128bh __A) {
1727   // CHECK-LABEL: @test_mm_getexp_pbh
1728   // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.128
1729   return _mm_getexp_pbh(__A);
1730 }
1731
1732 __m128bh test_mm_mask_getexp_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
1733   // CHECK-LABEL: @test_mm_mask_getexp_pbh
1734   // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.128
1735   return _mm_mask_getexp_pbh(__W, __U, __A);
1736 }
1737
1738 __m128bh test_mm_maskz_getexp_pbh(__mmask8 __U, __m128bh __A) {
1739   // CHECK-LABEL: @test_mm_maskz_getexp_pbh
1740   // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.128
1741   return _mm_maskz_getexp_pbh(__U, __A);
1742 }
1743
1744 __m128bh test_mm_rsqrt_pbh(__m128bh __A) {
1745   // CHECK-LABEL: @test_mm_rsqrt_pbh
1746   // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.128
1747   return _mm_rsqrt_pbh(__A);
1748 }
1749
1750 __m128bh test_mm_mask_rsqrt_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
1751   // CHECK-LABEL: @test_mm_mask_rsqrt_pbh
1752   // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.128
1753   return (__m128bh)_mm_mask_rsqrt_pbh(__W, __U, __A);
1754 }
1755
1756 __m128bh test_mm_maskz_rsqrt_pbh(__mmask8 __U, __m128bh __A) {
1757   // CHECK-LABEL: @test_mm_maskz_rsqrt_pbh
1758   // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.128
1759   return _mm_maskz_rsqrt_pbh(__U, __A);
1760 }
1761
1762 __m128bh test_mm_reducene_pbh(__m128bh __A) {
1763   // CHECK-LABEL: @test_mm_reducene_pbh
1764   // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.128
1765   return _mm_reducene_pbh(__A, 3);
1766 }
1767
1768 __m128bh test_mm_mask_reducene_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
1769   // CHECK-LABEL: @test_mm_mask_reducene_pbh
1770   // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.128
1771   return _mm_mask_reducene_pbh(__W, __U, __A, 1);
1772 }
1773
1774 __m128bh test_mm_maskz_reducene_pbh(__mmask8 __U, __m128bh __A) {
1775   // CHECK-LABEL: @test_mm_maskz_reducene_pbh
1776   // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.128
1777   return _mm_maskz_reducene_pbh(__U, __A, 1);
1778 }
1779
1780 __m128bh test_mm_roundscalene_pbh(__m128bh __A) {
1781   // CHECK-LABEL: @test_mm_roundscalene_pbh
1782   // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.128
1783   return _mm_roundscalene_pbh(__A, 3);
1784 }
1785
1786 __m128bh test_mm_mask_roundscalene_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
1787   // CHECK-LABEL: @test_mm_mask_roundscalene_pbh
1788   // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.128
1789   return _mm_mask_roundscalene_pbh(__W, __U, __A, 1);
1790 }
1791
1792 __m128bh test_mm_maskz_roundscalene_pbh(__mmask8 __U, __m128bh __A) {
1793   // CHECK-LABEL: @test_mm_maskz_roundscalene_pbh
1794   // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.128
1795   return _mm_maskz_roundscalene_pbh(__U, __A, 1 );
1796 }
1797
1798 __m128bh test_mm_getmant_pbh(__m128bh __A) {
1799   // CHECK-LABEL: @test_mm_getmant_pbh
1800   // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.128
1801   return _mm_getmant_pbh(__A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan);
1802 }
1803
1804 __m128bh test_mm_mask_getmant_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
1805   // CHECK-LABEL: @test_mm_mask_getmant_pbh
1806   // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.128
1807   return _mm_mask_getmant_pbh(__W, __U, __A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan);
1808 }
1809
1810 __m128bh test_mm_maskz_getmant_pbh(__mmask8 __U, __m128bh __A) {
1811   // CHECK-LABEL: @test_mm_maskz_getmant_pbh
1812   // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.128
1813   return _mm_maskz_getmant_pbh(__U, __A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan);
1814 }
1815
1816 __m128bh test_mm_sqrt_pbh(__m128bh __A) {
1817   // CHECK-LABEL: @test_mm_sqrt_pbh
1818   // CHECK: call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> {{.*}})
1819   return _mm_sqrt_pbh(__A);
1820 }
1821
1822 __m128bh test_mm_mask_sqrt_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
1823   // CHECK-LABEL: @test_mm_mask_sqrt_pbh
1824   // CHECK: call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> {{.*}})
1825   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
1826   return (__m128bh)_mm_mask_sqrt_pbh(__W, __U, __A);
1827 }
1828
1829 __m128bh test_mm_maskz_sqrt_pbh(__mmask8 __U, __m128bh __A) {
1830   // CHECK-LABEL: @test_mm_maskz_sqrt_pbh
1831   // CHECK: call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> {{.*}})
1832   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
1833   return _mm_maskz_sqrt_pbh(__U, __A);
1834 }
1835
1836 __m256bh test_mm256_fmaddne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
1837   // CHECK-LABEL: @test_mm256_fmaddne_pbh
1838   // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
1839   return _mm256_fmaddne_pbh(__A, __B, __C);
1840 }
1841
1842 __m256bh test_mm256_mask_fmaddne_pbh(__m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) {
1843   // CHECK-LABEL: @test_mm256_mask_fmaddne_pbh
1844   // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
1845   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
1846   return _mm256_mask_fmaddne_pbh(__A, __U, __B, __C);
1847 }
1848
1849 __m256bh test_mm256_mask3_fmaddne_pbh(__m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) {
1850   // CHECK-LABEL: @test_mm256_mask3_fmaddne_pbh
1851   // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
1852   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
1853   return _mm256_mask3_fmaddne_pbh(__A, __B, __C, __U);
1854 }
1855
1856 __m256bh test_mm256_maskz_fmaddne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) {
1857   // CHECK-LABEL: @test_mm256_maskz_fmaddne_pbh
1858   // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
1859   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
1860   return _mm256_maskz_fmaddne_pbh(__U, __A, __B, __C);
1861 }
1862
1863 __m256bh test_mm256_fmsubne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
1864   // CHECK-LABEL: @test_mm256_fmsubne_pbh
1865   // CHECK: fneg
1866   // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
1867   return _mm256_fmsubne_pbh(__A, __B, __C);
1868 }
1869
1870 __m256bh test_mm256_mask_fmsubne_pbh(__m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) {
1871   // CHECK-LABEL: @test_mm256_mask_fmsubne_pbh
1872   // CHECK: fneg
1873   // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
1874   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
1875   return _mm256_mask_fmsubne_pbh(__A, __U, __B, __C);
1876 }
1877
1878 __m256bh test_mm256_mask3_fmsubne_pbh(__m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) {
1879   // CHECK-LABEL: @test_mm256_mask3_fmsubne_pbh
1880   // CHECK: fneg
1881   // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
1882   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
1883   return _mm256_mask3_fmsubne_pbh(__A, __B, __C, __U);
1884 }
1885
1886 __m256bh test_mm256_maskz_fmsubne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) {
1887   // CHECK-LABEL: @test_mm256_maskz_fmsubne_pbh
1888   // CHECK: fneg
1889   // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
1890   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
1891   return _mm256_maskz_fmsubne_pbh(__U, __A, __B, __C);
1892 }
1893
1894 __m256bh test_mm256_fnmaddne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
1895   // CHECK-LABEL: @test_mm256_fnmaddne_pbh
1896   // CHECK: fneg
1897   // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
1898   return _mm256_fnmaddne_pbh(__A, __B, __C);
1899 }
1900
1901 __m256bh test_mm256_mask_fnmaddne_pbh(__m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) {
1902   // CHECK-LABEL: @test_mm256_mask_fnmaddne_pbh
1903   // CHECK: fneg
1904   // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
1905   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
1906   return _mm256_mask_fnmaddne_pbh(__A, __U, __B, __C);
1907 }
1908
1909 __m256bh test_mm256_mask3_fnmaddne_pbh(__m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) {
1910   // CHECK-LABEL: @test_mm256_mask3_fnmaddne_pbh
1911   // CHECK: fneg
1912   // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
1913   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
1914   return _mm256_mask3_fnmaddne_pbh(__A, __B, __C, __U);
1915 }
1916
1917 __m256bh test_mm256_maskz_fnmaddne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) {
1918   // CHECK-LABEL: @test_mm256_maskz_fnmaddne_pbh
1919   // CHECK: fneg
1920   // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
1921   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
1922   return _mm256_maskz_fnmaddne_pbh(__U, __A, __B, __C);
1923 }
1924
1925 __m256bh test_mm256_fnmsubne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
1926   // CHECK-LABEL: @test_mm256_fnmsubne_pbh
1927   // CHECK: fneg
1928   // CHECK: fneg
1929   // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
1930   return _mm256_fnmsubne_pbh(__A, __B, __C);
1931 }
1932
1933 __m256bh test_mm256_mask_fnmsubne_pbh(__m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) {
1934   // CHECK-LABEL: @test_mm256_mask_fnmsubne_pbh
1935   // CHECK: fneg
1936   // CHECK: fneg
1937   // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
1938   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
1939   return _mm256_mask_fnmsubne_pbh(__A, __U, __B, __C);
1940 }
1941
1942 __m256bh test_mm256_mask3_fnmsubne_pbh(__m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) {
1943   // CHECK-LABEL: @test_mm256_mask3_fnmsubne_pbh
1944   // CHECK: fneg
1945   // CHECK: fneg
1946   // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
1947   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
1948   return _mm256_mask3_fnmsubne_pbh(__A, __B, __C, __U);
1949 }
1950
1951 __m256bh test_mm256_maskz_fnmsubne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) {
1952   // CHECK-LABEL: @test_mm256_maskz_fnmsubne_pbh
1953   // CHECK: fneg
1954   // CHECK: fneg
1955   // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
1956   // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
1957   return _mm256_maskz_fnmsubne_pbh(__U, __A, __B, __C);
1958 }
1959
1960 __m128bh test_mm_fmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C) {
1961   // CHECK-LABEL: @test_mm_fmaddne_pbh
1962   // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
1963   return _mm_fmaddne_pbh(__A, __B, __C);
1964 }
1965
1966 __m128bh test_mm_mask_fmaddne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) {
1967   // CHECK-LABEL: @test_mm_mask_fmaddne_pbh
1968   // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
1969   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
1970   return _mm_mask_fmaddne_pbh(__A, __U, __B, __C);
1971 }
1972
1973 __m128bh test_mm_mask3_fmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) {
1974   // CHECK-LABEL: @test_mm_mask3_fmaddne_pbh
1975   // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
1976   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
1977   return _mm_mask3_fmaddne_pbh(__A, __B, __C, __U);
1978 }
1979
1980 __m128bh test_mm_maskz_fmaddne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
1981   // CHECK-LABEL: @test_mm_maskz_fmaddne_pbh
1982   // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
1983   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
1984   return _mm_maskz_fmaddne_pbh(__U, __A, __B, __C);
1985 }
1986
1987 __m128bh test_mm_fmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C) {
1988   // CHECK-LABEL: @test_mm_fmsubne_pbh
1989   // CHECK: fneg
1990   // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
1991   return _mm_fmsubne_pbh(__A, __B, __C);
1992 }
1993
1994 __m128bh test_mm_mask_fmsubne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) {
1995   // CHECK-LABEL: @test_mm_mask_fmsubne_pbh
1996   // CHECK: fneg
1997   // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
1998   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
1999   return _mm_mask_fmsubne_pbh(__A, __U, __B, __C);
2000 }
2001
2002 __m128bh test_mm_mask3_fmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) {
2003   // CHECK-LABEL: @test_mm_mask3_fmsubne_pbh
2004   // CHECK: fneg
2005   // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
2006   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
2007   return _mm_mask3_fmsubne_pbh(__A, __B, __C, __U);
2008 }
2009
2010 __m128bh test_mm_maskz_fmsubne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
2011   // CHECK-LABEL: @test_mm_maskz_fmsubne_pbh
2012   // CHECK: fneg
2013   // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
2014   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
2015   return _mm_maskz_fmsubne_pbh(__U, __A, __B, __C);
2016 }
2017
2018 __m128bh test_mm_fnmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C) {
2019   // CHECK-LABEL: @test_mm_fnmaddne_pbh
2020   // CHECK: fneg
2021   // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
2022   return _mm_fnmaddne_pbh(__A, __B, __C);
2023 }
2024
2025 __m128bh test_mm_mask_fnmaddne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) {
2026   // CHECK-LABEL: @test_mm_mask_fnmaddne_pbh
2027   // CHECK: fneg
2028   // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
2029   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
2030   return _mm_mask_fnmaddne_pbh(__A, __U, __B, __C);
2031 }
2032
2033 __m128bh test_mm_mask3_fnmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) {
2034   // CHECK-LABEL: @test_mm_mask3_fnmaddne_pbh
2035   // CHECK: fneg
2036   // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
2037   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
2038   return _mm_mask3_fnmaddne_pbh(__A, __B, __C, __U);
2039 }
2040
2041 __m128bh test_mm_maskz_fnmaddne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
2042   // CHECK-LABEL: @test_mm_maskz_fnmaddne_pbh
2043   // CHECK: fneg
2044   // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
2045   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
2046   return _mm_maskz_fnmaddne_pbh(__U, __A, __B, __C);
2047 }
2048
2049 __m128bh test_mm_fnmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C) {
2050   // CHECK-LABEL: @test_mm_fnmsubne_pbh
2051   // CHECK: fneg
2052   // CHECK: fneg
2053   // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
2054   return _mm_fnmsubne_pbh(__A, __B, __C);
2055 }
2056
2057 __m128bh test_mm_mask_fnmsubne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) {
2058   // CHECK-LABEL: @test_mm_mask_fnmsubne_pbh
2059   // CHECK: fneg
2060   // CHECK: fneg
2061   // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
2062   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
2063   return _mm_mask_fnmsubne_pbh(__A, __U, __B, __C);
2064 }
2065
2066 __m128bh test_mm_mask3_fnmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) {
2067   // CHECK-LABEL: @test_mm_mask3_fnmsubne_pbh
2068   // CHECK: fneg
2069   // CHECK: fneg
2070   // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
2071   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
2072   return _mm_mask3_fnmsubne_pbh(__A, __B, __C, __U);
2073 }
2074
2075 __m128bh test_mm_maskz_fnmsubne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
2076   // CHECK-LABEL: @test_mm_maskz_fnmsubne_pbh
2077   // CHECK: fneg
2078   // CHECK: fneg
2079   // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
2080   // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
2081   return _mm_maskz_fnmsubne_pbh(__U, __A, __B, __C);
2082 }