2 #extension GL_KHR_memory_scope_semantics : enable
3 #extension GL_KHR_cooperative_matrix : enable
4 #extension GL_EXT_shader_explicit_arithmetic_types : enable
5 #extension GL_NV_cooperative_matrix2 : enable
\r
7 layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
13 uint32_t addr(const in uint32_t x, const in uint32_t y) {
\r
17 uint32_t foo() { return 124; }
\r
19 float16_t relu(const in uint32_t row, const in uint32_t col, const in float16_t x) { return max(x, float16_t(0)); }
\r
21 float16_t add(const in uint32_t row, const in uint32_t col, const in float16_t x, const in float16_t y) { return x+y; }
\r
23 float16_t combineSum(const in float16_t a, const in float16_t b) { return a + b; }
\r
24 float16_t combineMax(const in float16_t a, const in float16_t b) { return max(a, b); }
\r
26 layout(constant_id = 0) const uint32_t Dim = 32;
\r
30 coopmat<float16_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA> A;
\r
31 coopmat<float16_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseB> B;
\r
32 coopmat<float16_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseAccumulator> Acc;
\r
34 A = coopmat<float16_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA>(Acc);
35 B = coopmat<float16_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseB>(Acc);
\r
37 coopmat<float16_t, gl_ScopeWorkgroup, 32, 64, gl_MatrixUseB> tr;
\r
39 coopMatTransposeNV(tr, Acc);
\r
41 coopMatReduceNV(Acc, Acc, gl_CooperativeMatrixReduceRowNV, combineSum);
\r
42 coopMatReduceNV(Acc, Acc, gl_CooperativeMatrixReduceColumnNV, combineSum);
\r
43 coopMatReduceNV(Acc, Acc, gl_CooperativeMatrixReduceRowAndColumnNV, combineSum);
\r
45 coopmat<float16_t, gl_ScopeWorkgroup, 32, 16, gl_MatrixUseAccumulator> Acc2x2;
\r
46 coopMatReduceNV(Acc2x2, Acc, gl_CooperativeMatrixReduce2x2NV, combineMax);
\r
48 //coopMatLoadTensorNV(A, Buf.x, foo(), addr);
\r
49 //coopMatStoreTensorNV(A, Buf.x, foo(), addr);
\r
51 coopMatPerElementNV(Acc, Acc, relu);
\r
52 coopMatPerElementNV(Acc, Acc, add, float16_t(1.0));
\r
54 coopmat<float16_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseAccumulator> Accf16;
\r
55 coopmat<float32_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseAccumulator> Accf32;
\r
56 coopmat<uint32_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseAccumulator> Accu32;
\r
57 coopmat<int32_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseAccumulator> Accs32;
\r
59 coopmat<uint8_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA>(Accu32);
\r
60 coopmat<int8_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA>(Accu32);
\r
61 coopmat<float16_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA>(Accu32);
\r
63 coopmat<uint8_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA>(Accs32);
\r
64 coopmat<int8_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA>(Accs32);
\r
65 coopmat<float16_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA>(Accs32);
\r
67 coopmat<uint8_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA>(Accf16);
\r
68 coopmat<int8_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA>(Accf16);
\r
69 coopmat<float16_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA>(Accf16);
\r
71 coopmat<uint8_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA>(Accf32);
\r
72 coopmat<int8_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA>(Accf32);
\r
73 coopmat<float16_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA>(Accf32);
\r
75 coopmat<float, gl_ScopeWorkgroup, Dim, Dim, gl_MatrixUseAccumulator> li, mijm1;
\r
76 mijm1 = coopmat<float, gl_ScopeWorkgroup, Dim, Dim, gl_MatrixUseAccumulator>(-1.0/0.0);
\r