2 * Copyright 2015-2021 Arm Limited
3 * SPDX-License-Identifier: Apache-2.0 OR MIT
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
19 * At your option, you may choose to accept this material under either:
20 * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
21 * 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
24 #include "spirv_cross.hpp"
25 #include "GLSL.std.450.h"
26 #include "spirv_cfg.hpp"
27 #include "spirv_common.hpp"
28 #include "spirv_parser.hpp"
35 using namespace SPIRV_CROSS_NAMESPACE
;
37 Compiler::Compiler(vector
<uint32_t> ir_
)
39 Parser
parser(std::move(ir_
));
41 set_ir(std::move(parser
.get_parsed_ir()));
44 Compiler::Compiler(const uint32_t *ir_
, size_t word_count
)
46 Parser
parser(ir_
, word_count
);
48 set_ir(std::move(parser
.get_parsed_ir()));
51 Compiler::Compiler(const ParsedIR
&ir_
)
56 Compiler::Compiler(ParsedIR
&&ir_
)
58 set_ir(std::move(ir_
));
61 void Compiler::set_ir(ParsedIR
&&ir_
)
67 void Compiler::set_ir(const ParsedIR
&ir_
)
73 string
Compiler::compile()
78 bool Compiler::variable_storage_is_aliased(const SPIRVariable
&v
)
80 auto &type
= get
<SPIRType
>(v
.basetype
);
81 bool ssbo
= v
.storage
== StorageClassStorageBuffer
||
82 ir
.meta
[type
.self
].decoration
.decoration_flags
.get(DecorationBufferBlock
);
83 bool image
= type
.basetype
== SPIRType::Image
;
84 bool counter
= type
.basetype
== SPIRType::AtomicCounter
;
85 bool buffer_reference
= type
.storage
== StorageClassPhysicalStorageBufferEXT
;
89 is_restrict
= ir
.get_buffer_block_flags(v
).get(DecorationRestrict
);
91 is_restrict
= has_decoration(v
.self
, DecorationRestrict
);
93 return !is_restrict
&& (ssbo
|| image
|| counter
|| buffer_reference
);
96 bool Compiler::block_is_control_dependent(const SPIRBlock
&block
)
98 for (auto &i
: block
.ops
)
100 auto ops
= stream(i
);
101 auto op
= static_cast<Op
>(i
.op
);
107 uint32_t func
= ops
[2];
108 if (function_is_control_dependent(get
<SPIRFunction
>(func
)))
124 // Anything implicit LOD
125 case OpImageSampleImplicitLod
:
126 case OpImageSampleDrefImplicitLod
:
127 case OpImageSampleProjImplicitLod
:
128 case OpImageSampleProjDrefImplicitLod
:
129 case OpImageSparseSampleImplicitLod
:
130 case OpImageSparseSampleDrefImplicitLod
:
131 case OpImageSparseSampleProjImplicitLod
:
132 case OpImageSparseSampleProjDrefImplicitLod
:
133 case OpImageQueryLod
:
134 case OpImageDrefGather
:
136 case OpImageSparseDrefGather
:
137 case OpImageSparseGather
:
139 // Anything subgroups
140 case OpGroupNonUniformElect
:
141 case OpGroupNonUniformAll
:
142 case OpGroupNonUniformAny
:
143 case OpGroupNonUniformAllEqual
:
144 case OpGroupNonUniformBroadcast
:
145 case OpGroupNonUniformBroadcastFirst
:
146 case OpGroupNonUniformBallot
:
147 case OpGroupNonUniformInverseBallot
:
148 case OpGroupNonUniformBallotBitExtract
:
149 case OpGroupNonUniformBallotBitCount
:
150 case OpGroupNonUniformBallotFindLSB
:
151 case OpGroupNonUniformBallotFindMSB
:
152 case OpGroupNonUniformShuffle
:
153 case OpGroupNonUniformShuffleXor
:
154 case OpGroupNonUniformShuffleUp
:
155 case OpGroupNonUniformShuffleDown
:
156 case OpGroupNonUniformIAdd
:
157 case OpGroupNonUniformFAdd
:
158 case OpGroupNonUniformIMul
:
159 case OpGroupNonUniformFMul
:
160 case OpGroupNonUniformSMin
:
161 case OpGroupNonUniformUMin
:
162 case OpGroupNonUniformFMin
:
163 case OpGroupNonUniformSMax
:
164 case OpGroupNonUniformUMax
:
165 case OpGroupNonUniformFMax
:
166 case OpGroupNonUniformBitwiseAnd
:
167 case OpGroupNonUniformBitwiseOr
:
168 case OpGroupNonUniformBitwiseXor
:
169 case OpGroupNonUniformLogicalAnd
:
170 case OpGroupNonUniformLogicalOr
:
171 case OpGroupNonUniformLogicalXor
:
172 case OpGroupNonUniformQuadBroadcast
:
173 case OpGroupNonUniformQuadSwap
:
176 case OpControlBarrier
:
187 bool Compiler::block_is_pure(const SPIRBlock
&block
)
189 // This is a global side effect of the function.
190 if (block
.terminator
== SPIRBlock::Kill
||
191 block
.terminator
== SPIRBlock::TerminateRay
||
192 block
.terminator
== SPIRBlock::IgnoreIntersection
||
193 block
.terminator
== SPIRBlock::EmitMeshTasks
)
196 for (auto &i
: block
.ops
)
198 auto ops
= stream(i
);
199 auto op
= static_cast<Op
>(i
.op
);
205 uint32_t func
= ops
[2];
206 if (!function_is_pure(get
<SPIRFunction
>(func
)))
214 auto &type
= expression_type(ops
[0]);
215 if (type
.storage
!= StorageClassFunction
)
223 // Atomics are impure.
226 case OpAtomicExchange
:
227 case OpAtomicCompareExchange
:
228 case OpAtomicCompareExchangeWeak
:
229 case OpAtomicIIncrement
:
230 case OpAtomicIDecrement
:
242 // Geometry shader builtins modify global state.
244 case OpEmitStreamVertex
:
245 case OpEndStreamPrimitive
:
249 // Mesh shader functions modify global state.
250 // (EmitMeshTasks is a terminator).
251 case OpSetMeshOutputsEXT
:
254 // Barriers disallow any reordering, so we should treat blocks with barrier as writing.
255 case OpControlBarrier
:
256 case OpMemoryBarrier
:
259 // Ray tracing builtins are impure.
260 case OpReportIntersectionKHR
:
261 case OpIgnoreIntersectionNV
:
262 case OpTerminateRayNV
:
265 case OpExecuteCallableNV
:
266 case OpExecuteCallableKHR
:
267 case OpRayQueryInitializeKHR
:
268 case OpRayQueryTerminateKHR
:
269 case OpRayQueryGenerateIntersectionKHR
:
270 case OpRayQueryConfirmIntersectionKHR
:
271 case OpRayQueryProceedKHR
:
272 // There are various getters in ray query, but they are considered pure.
275 // OpExtInst is potentially impure depending on extension, but GLSL builtins are at least pure.
277 case OpDemoteToHelperInvocationEXT
:
278 // This is a global side effect of the function.
283 uint32_t extension_set
= ops
[2];
284 if (get
<SPIRExtension
>(extension_set
).ext
== SPIRExtension::GLSL
)
286 auto op_450
= static_cast<GLSLstd450
>(ops
[3]);
290 case GLSLstd450Frexp
:
292 auto &type
= expression_type(ops
[5]);
293 if (type
.storage
!= StorageClassFunction
)
313 string
Compiler::to_name(uint32_t id
, bool allow_alias
) const
315 if (allow_alias
&& ir
.ids
[id
].get_type() == TypeType
)
317 // If this type is a simple alias, emit the
318 // name of the original type instead.
319 // We don't want to override the meta alias
320 // as that can be overridden by the reflection APIs after parse.
321 auto &type
= get
<SPIRType
>(id
);
324 // If the alias master has been specially packed, we will have emitted a clean variant as well,
325 // so skip the name aliasing here.
326 if (!has_extended_decoration(type
.type_alias
, SPIRVCrossDecorationBufferBlockRepacked
))
327 return to_name(type
.type_alias
);
331 auto &alias
= ir
.get_name(id
);
333 return join("_", id
);
338 bool Compiler::function_is_pure(const SPIRFunction
&func
)
340 for (auto block
: func
.blocks
)
341 if (!block_is_pure(get
<SPIRBlock
>(block
)))
347 bool Compiler::function_is_control_dependent(const SPIRFunction
&func
)
349 for (auto block
: func
.blocks
)
350 if (block_is_control_dependent(get
<SPIRBlock
>(block
)))
356 void Compiler::register_global_read_dependencies(const SPIRBlock
&block
, uint32_t id
)
358 for (auto &i
: block
.ops
)
360 auto ops
= stream(i
);
361 auto op
= static_cast<Op
>(i
.op
);
367 uint32_t func
= ops
[2];
368 register_global_read_dependencies(get
<SPIRFunction
>(func
), id
);
375 // If we're in a storage class which does not get invalidated, adding dependencies here is no big deal.
376 auto *var
= maybe_get_backing_variable(ops
[2]);
377 if (var
&& var
->storage
!= StorageClassFunction
)
379 auto &type
= get
<SPIRType
>(var
->basetype
);
381 // InputTargets are immutable.
382 if (type
.basetype
!= SPIRType::Image
&& type
.image
.dim
!= DimSubpassData
)
383 var
->dependees
.push_back(id
);
394 void Compiler::register_global_read_dependencies(const SPIRFunction
&func
, uint32_t id
)
396 for (auto block
: func
.blocks
)
397 register_global_read_dependencies(get
<SPIRBlock
>(block
), id
);
400 SPIRVariable
*Compiler::maybe_get_backing_variable(uint32_t chain
)
402 auto *var
= maybe_get
<SPIRVariable
>(chain
);
405 auto *cexpr
= maybe_get
<SPIRExpression
>(chain
);
407 var
= maybe_get
<SPIRVariable
>(cexpr
->loaded_from
);
409 auto *access_chain
= maybe_get
<SPIRAccessChain
>(chain
);
411 var
= maybe_get
<SPIRVariable
>(access_chain
->loaded_from
);
417 void Compiler::register_read(uint32_t expr
, uint32_t chain
, bool forwarded
)
419 auto &e
= get
<SPIRExpression
>(expr
);
420 auto *var
= maybe_get_backing_variable(chain
);
424 e
.loaded_from
= var
->self
;
426 // If the backing variable is immutable, we do not need to depend on the variable.
427 if (forwarded
&& !is_immutable(var
->self
))
428 var
->dependees
.push_back(e
.self
);
430 // If we load from a parameter, make sure we create "inout" if we also write to the parameter.
431 // The default is "in" however, so we never invalidate our compilation by reading.
432 if (var
&& var
->parameter
)
433 var
->parameter
->read_count
++;
437 void Compiler::register_write(uint32_t chain
)
439 auto *var
= maybe_get
<SPIRVariable
>(chain
);
442 // If we're storing through an access chain, invalidate the backing variable instead.
443 auto *expr
= maybe_get
<SPIRExpression
>(chain
);
444 if (expr
&& expr
->loaded_from
)
445 var
= maybe_get
<SPIRVariable
>(expr
->loaded_from
);
447 auto *access_chain
= maybe_get
<SPIRAccessChain
>(chain
);
448 if (access_chain
&& access_chain
->loaded_from
)
449 var
= maybe_get
<SPIRVariable
>(access_chain
->loaded_from
);
452 auto &chain_type
= expression_type(chain
);
456 bool check_argument_storage_qualifier
= true;
457 auto &type
= expression_type(chain
);
459 // If our variable is in a storage class which can alias with other buffers,
460 // invalidate all variables which depend on aliased variables. And if this is a
461 // variable pointer, then invalidate all variables regardless.
462 if (get_variable_data_type(*var
).pointer
)
464 flush_all_active_variables();
466 if (type
.pointer_depth
== 1)
468 // We have a backing variable which is a pointer-to-pointer type.
469 // We are storing some data through a pointer acquired through that variable,
470 // but we are not writing to the value of the variable itself,
471 // i.e., we are not modifying the pointer directly.
472 // If we are storing a non-pointer type (pointer_depth == 1),
473 // we know that we are storing some unrelated data.
474 // A case here would be
475 // void foo(Foo * const *arg) {
477 // bar->unrelated = 42;
479 // arg, the argument is constant.
480 check_argument_storage_qualifier
= false;
484 if (type
.storage
== StorageClassPhysicalStorageBufferEXT
|| variable_storage_is_aliased(*var
))
485 flush_all_aliased_variables();
487 flush_dependees(*var
);
489 // We tried to write to a parameter which is not marked with out qualifier, force a recompile.
490 if (check_argument_storage_qualifier
&& var
->parameter
&& var
->parameter
->write_count
== 0)
492 var
->parameter
->write_count
++;
496 else if (chain_type
.pointer
)
498 // If we stored through a variable pointer, then we don't know which
499 // variable we stored to. So *all* expressions after this point need to
501 // FIXME: If we can prove that the variable pointer will point to
502 // only certain variables, we can invalidate only those.
503 flush_all_active_variables();
506 // If chain_type.pointer is false, we're not writing to memory backed variables, but temporaries instead.
507 // This can happen in copy_logical_type where we unroll complex reads and writes to temporaries.
510 void Compiler::flush_dependees(SPIRVariable
&var
)
512 for (auto expr
: var
.dependees
)
513 invalid_expressions
.insert(expr
);
514 var
.dependees
.clear();
517 void Compiler::flush_all_aliased_variables()
519 for (auto aliased
: aliased_variables
)
520 flush_dependees(get
<SPIRVariable
>(aliased
));
523 void Compiler::flush_all_atomic_capable_variables()
525 for (auto global
: global_variables
)
526 flush_dependees(get
<SPIRVariable
>(global
));
527 flush_all_aliased_variables();
530 void Compiler::flush_control_dependent_expressions(uint32_t block_id
)
532 auto &block
= get
<SPIRBlock
>(block_id
);
533 for (auto &expr
: block
.invalidate_expressions
)
534 invalid_expressions
.insert(expr
);
535 block
.invalidate_expressions
.clear();
538 void Compiler::flush_all_active_variables()
540 // Invalidate all temporaries we read from variables in this block since they were forwarded.
541 // Invalidate all temporaries we read from globals.
542 for (auto &v
: current_function
->local_variables
)
543 flush_dependees(get
<SPIRVariable
>(v
));
544 for (auto &arg
: current_function
->arguments
)
545 flush_dependees(get
<SPIRVariable
>(arg
.id
));
546 for (auto global
: global_variables
)
547 flush_dependees(get
<SPIRVariable
>(global
));
549 flush_all_aliased_variables();
552 uint32_t Compiler::expression_type_id(uint32_t id
) const
554 switch (ir
.ids
[id
].get_type())
557 return get
<SPIRVariable
>(id
).basetype
;
560 return get
<SPIRExpression
>(id
).expression_type
;
563 return get
<SPIRConstant
>(id
).constant_type
;
566 return get
<SPIRConstantOp
>(id
).basetype
;
569 return get
<SPIRUndef
>(id
).basetype
;
571 case TypeCombinedImageSampler
:
572 return get
<SPIRCombinedImageSampler
>(id
).combined_type
;
574 case TypeAccessChain
:
575 return get
<SPIRAccessChain
>(id
).basetype
;
578 SPIRV_CROSS_THROW("Cannot resolve expression type.");
582 const SPIRType
&Compiler::expression_type(uint32_t id
) const
584 return get
<SPIRType
>(expression_type_id(id
));
587 bool Compiler::expression_is_lvalue(uint32_t id
) const
589 auto &type
= expression_type(id
);
590 switch (type
.basetype
)
592 case SPIRType::SampledImage
:
593 case SPIRType::Image
:
594 case SPIRType::Sampler
:
602 bool Compiler::is_immutable(uint32_t id
) const
604 if (ir
.ids
[id
].get_type() == TypeVariable
)
606 auto &var
= get
<SPIRVariable
>(id
);
608 // Anything we load from the UniformConstant address space is guaranteed to be immutable.
609 bool pointer_to_const
= var
.storage
== StorageClassUniformConstant
;
610 return pointer_to_const
|| var
.phi_variable
|| !expression_is_lvalue(id
);
612 else if (ir
.ids
[id
].get_type() == TypeAccessChain
)
613 return get
<SPIRAccessChain
>(id
).immutable
;
614 else if (ir
.ids
[id
].get_type() == TypeExpression
)
615 return get
<SPIRExpression
>(id
).immutable
;
616 else if (ir
.ids
[id
].get_type() == TypeConstant
|| ir
.ids
[id
].get_type() == TypeConstantOp
||
617 ir
.ids
[id
].get_type() == TypeUndef
)
623 static inline bool storage_class_is_interface(spv::StorageClass storage
)
627 case StorageClassInput
:
628 case StorageClassOutput
:
629 case StorageClassUniform
:
630 case StorageClassUniformConstant
:
631 case StorageClassAtomicCounter
:
632 case StorageClassPushConstant
:
633 case StorageClassStorageBuffer
:
641 bool Compiler::is_hidden_variable(const SPIRVariable
&var
, bool include_builtins
) const
643 if ((is_builtin_variable(var
) && !include_builtins
) || var
.remapped_variable
)
646 // Combined image samplers are always considered active as they are "magic" variables.
647 if (find_if(begin(combined_image_samplers
), end(combined_image_samplers
), [&var
](const CombinedImageSampler
&samp
) {
648 return samp
.combined_id
== var
.self
;
649 }) != end(combined_image_samplers
))
654 // In SPIR-V 1.4 and up we must also use the active variable interface to disable global variables
655 // which are not part of the entry point.
656 if (ir
.get_spirv_version() >= 0x10400 && var
.storage
!= spv::StorageClassGeneric
&&
657 var
.storage
!= spv::StorageClassFunction
&& !interface_variable_exists_in_entry_point(var
.self
))
662 return check_active_interface_variables
&& storage_class_is_interface(var
.storage
) &&
663 active_interface_variables
.find(var
.self
) == end(active_interface_variables
);
666 bool Compiler::is_builtin_type(const SPIRType
&type
) const
668 auto *type_meta
= ir
.find_meta(type
.self
);
670 // We can have builtin structs as well. If one member of a struct is builtin, the struct must also be builtin.
672 for (auto &m
: type_meta
->members
)
679 bool Compiler::is_builtin_variable(const SPIRVariable
&var
) const
681 auto *m
= ir
.find_meta(var
.self
);
683 if (var
.compat_builtin
|| (m
&& m
->decoration
.builtin
))
686 return is_builtin_type(get
<SPIRType
>(var
.basetype
));
689 bool Compiler::is_member_builtin(const SPIRType
&type
, uint32_t index
, BuiltIn
*builtin
) const
691 auto *type_meta
= ir
.find_meta(type
.self
);
695 auto &memb
= type_meta
->members
;
696 if (index
< memb
.size() && memb
[index
].builtin
)
699 *builtin
= memb
[index
].builtin_type
;
707 bool Compiler::is_scalar(const SPIRType
&type
) const
709 return type
.basetype
!= SPIRType::Struct
&& type
.vecsize
== 1 && type
.columns
== 1;
712 bool Compiler::is_vector(const SPIRType
&type
) const
714 return type
.vecsize
> 1 && type
.columns
== 1;
717 bool Compiler::is_matrix(const SPIRType
&type
) const
719 return type
.vecsize
> 1 && type
.columns
> 1;
722 bool Compiler::is_array(const SPIRType
&type
) const
724 return type
.op
== OpTypeArray
|| type
.op
== OpTypeRuntimeArray
;
727 bool Compiler::is_pointer(const SPIRType
&type
) const
729 return type
.op
== OpTypePointer
&& type
.basetype
!= SPIRType::Unknown
; // Ignore function pointers.
732 bool Compiler::is_physical_pointer(const SPIRType
&type
) const
734 return type
.op
== OpTypePointer
&& type
.storage
== StorageClassPhysicalStorageBuffer
;
737 bool Compiler::is_physical_pointer_to_buffer_block(const SPIRType
&type
) const
739 return is_physical_pointer(type
) && get_pointee_type(type
).self
== type
.parent_type
&&
740 (has_decoration(type
.self
, DecorationBlock
) ||
741 has_decoration(type
.self
, DecorationBufferBlock
));
744 bool Compiler::is_runtime_size_array(const SPIRType
&type
)
746 return type
.op
== OpTypeRuntimeArray
;
749 ShaderResources
Compiler::get_shader_resources() const
751 return get_shader_resources(nullptr);
754 ShaderResources
Compiler::get_shader_resources(const unordered_set
<VariableID
> &active_variables
) const
756 return get_shader_resources(&active_variables
);
759 bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode
, const uint32_t *args
, uint32_t length
)
761 uint32_t variable
= 0;
764 // Need this first, otherwise, GCC complains about unhandled switch statements.
774 uint32_t count
= length
- 3;
776 for (uint32_t i
= 0; i
< count
; i
++)
778 auto *var
= compiler
.maybe_get
<SPIRVariable
>(args
[i
]);
779 if (var
&& storage_class_is_interface(var
->storage
))
780 variables
.insert(args
[i
]);
791 uint32_t count
= length
- 3;
793 for (uint32_t i
= 0; i
< count
; i
++)
795 auto *var
= compiler
.maybe_get
<SPIRVariable
>(args
[i
]);
796 if (var
&& storage_class_is_interface(var
->storage
))
797 variables
.insert(args
[i
]);
808 uint32_t count
= length
- 2;
810 for (uint32_t i
= 0; i
< count
; i
+= 2)
812 auto *var
= compiler
.maybe_get
<SPIRVariable
>(args
[i
]);
813 if (var
&& storage_class_is_interface(var
->storage
))
814 variables
.insert(args
[i
]);
832 auto *var
= compiler
.maybe_get
<SPIRVariable
>(args
[0]);
833 if (var
&& storage_class_is_interface(var
->storage
))
834 variables
.insert(args
[0]);
836 var
= compiler
.maybe_get
<SPIRVariable
>(args
[1]);
837 if (var
&& storage_class_is_interface(var
->storage
))
838 variables
.insert(args
[1]);
846 auto &extension_set
= compiler
.get
<SPIRExtension
>(args
[2]);
847 switch (extension_set
.ext
)
849 case SPIRExtension::GLSL
:
851 auto op
= static_cast<GLSLstd450
>(args
[3]);
855 case GLSLstd450InterpolateAtCentroid
:
856 case GLSLstd450InterpolateAtSample
:
857 case GLSLstd450InterpolateAtOffset
:
859 auto *var
= compiler
.maybe_get
<SPIRVariable
>(args
[4]);
860 if (var
&& storage_class_is_interface(var
->storage
))
861 variables
.insert(args
[4]);
866 case GLSLstd450Fract
:
868 auto *var
= compiler
.maybe_get
<SPIRVariable
>(args
[5]);
869 if (var
&& storage_class_is_interface(var
->storage
))
870 variables
.insert(args
[5]);
879 case SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter
:
881 enum AMDShaderExplicitVertexParameter
883 InterpolateAtVertexAMD
= 1
886 auto op
= static_cast<AMDShaderExplicitVertexParameter
>(args
[3]);
890 case InterpolateAtVertexAMD
:
892 auto *var
= compiler
.maybe_get
<SPIRVariable
>(args
[4]);
893 if (var
&& storage_class_is_interface(var
->storage
))
894 variables
.insert(args
[4]);
910 case OpInBoundsAccessChain
:
911 case OpPtrAccessChain
:
914 case OpImageTexelPointer
:
916 case OpAtomicExchange
:
917 case OpAtomicCompareExchange
:
918 case OpAtomicCompareExchangeWeak
:
919 case OpAtomicIIncrement
:
920 case OpAtomicIDecrement
:
940 auto *var
= compiler
.maybe_get
<SPIRVariable
>(variable
);
941 if (var
&& storage_class_is_interface(var
->storage
))
942 variables
.insert(variable
);
947 unordered_set
<VariableID
> Compiler::get_active_interface_variables() const
949 // Traverse the call graph and find all interface variables which are in use.
950 unordered_set
<VariableID
> variables
;
951 InterfaceVariableAccessHandler
handler(*this, variables
);
952 traverse_all_reachable_opcodes(get
<SPIRFunction
>(ir
.default_entry_point
), handler
);
954 ir
.for_each_typed_id
<SPIRVariable
>([&](uint32_t, const SPIRVariable
&var
) {
955 if (var
.storage
!= StorageClassOutput
)
957 if (!interface_variable_exists_in_entry_point(var
.self
))
960 // An output variable which is just declared (but uninitialized) might be read by subsequent stages
961 // so we should force-enable these outputs,
962 // since compilation will fail if a subsequent stage attempts to read from the variable in question.
963 // Also, make sure we preserve output variables which are only initialized, but never accessed by any code.
964 if (var
.initializer
!= ID(0) || get_execution_model() != ExecutionModelFragment
)
965 variables
.insert(var
.self
);
968 // If we needed to create one, we'll need it.
969 if (dummy_sampler_id
)
970 variables
.insert(dummy_sampler_id
);
975 void Compiler::set_enabled_interface_variables(std::unordered_set
<VariableID
> active_variables
)
977 active_interface_variables
= std::move(active_variables
);
978 check_active_interface_variables
= true;
981 ShaderResources
Compiler::get_shader_resources(const unordered_set
<VariableID
> *active_variables
) const
985 bool ssbo_instance_name
= reflection_ssbo_instance_name_is_significant();
987 ir
.for_each_typed_id
<SPIRVariable
>([&](uint32_t, const SPIRVariable
&var
) {
988 auto &type
= this->get
<SPIRType
>(var
.basetype
);
990 // It is possible for uniform storage classes to be passed as function parameters, so detect
991 // that. To detect function parameters, check of StorageClass of variable is function scope.
992 if (var
.storage
== StorageClassFunction
|| !type
.pointer
)
995 if (active_variables
&& active_variables
->find(var
.self
) == end(*active_variables
))
998 // In SPIR-V 1.4 and up, every global must be present in the entry point interface list,
999 // not just IO variables.
1000 bool active_in_entry_point
= true;
1001 if (ir
.get_spirv_version() < 0x10400)
1003 if (var
.storage
== StorageClassInput
|| var
.storage
== StorageClassOutput
)
1004 active_in_entry_point
= interface_variable_exists_in_entry_point(var
.self
);
1007 active_in_entry_point
= interface_variable_exists_in_entry_point(var
.self
);
1009 if (!active_in_entry_point
)
1012 bool is_builtin
= is_builtin_variable(var
);
1016 if (var
.storage
!= StorageClassInput
&& var
.storage
!= StorageClassOutput
)
1019 auto &list
= var
.storage
== StorageClassInput
? res
.builtin_inputs
: res
.builtin_outputs
;
1020 BuiltInResource resource
;
1022 if (has_decoration(type
.self
, DecorationBlock
))
1024 resource
.resource
= { var
.self
, var
.basetype
, type
.self
,
1025 get_remapped_declared_block_name(var
.self
, false) };
1027 for (uint32_t i
= 0; i
< uint32_t(type
.member_types
.size()); i
++)
1029 resource
.value_type_id
= type
.member_types
[i
];
1030 resource
.builtin
= BuiltIn(get_member_decoration(type
.self
, i
, DecorationBuiltIn
));
1031 list
.push_back(resource
);
1037 !has_decoration(var
.self
, DecorationPatch
) && (
1038 get_execution_model() == ExecutionModelTessellationControl
||
1039 (get_execution_model() == ExecutionModelTessellationEvaluation
&&
1040 var
.storage
== StorageClassInput
));
1042 resource
.resource
= { var
.self
, var
.basetype
, type
.self
, get_name(var
.self
) };
1044 if (strip_array
&& !type
.array
.empty())
1045 resource
.value_type_id
= get_variable_data_type(var
).parent_type
;
1047 resource
.value_type_id
= get_variable_data_type_id(var
);
1049 assert(resource
.value_type_id
);
1051 resource
.builtin
= BuiltIn(get_decoration(var
.self
, DecorationBuiltIn
));
1052 list
.push_back(std::move(resource
));
1058 if (var
.storage
== StorageClassInput
)
1060 if (has_decoration(type
.self
, DecorationBlock
))
1062 res
.stage_inputs
.push_back(
1063 { var
.self
, var
.basetype
, type
.self
,
1064 get_remapped_declared_block_name(var
.self
, false) });
1067 res
.stage_inputs
.push_back({ var
.self
, var
.basetype
, type
.self
, get_name(var
.self
) });
1070 else if (var
.storage
== StorageClassUniformConstant
&& type
.image
.dim
== DimSubpassData
)
1072 res
.subpass_inputs
.push_back({ var
.self
, var
.basetype
, type
.self
, get_name(var
.self
) });
1075 else if (var
.storage
== StorageClassOutput
)
1077 if (has_decoration(type
.self
, DecorationBlock
))
1079 res
.stage_outputs
.push_back(
1080 { var
.self
, var
.basetype
, type
.self
, get_remapped_declared_block_name(var
.self
, false) });
1083 res
.stage_outputs
.push_back({ var
.self
, var
.basetype
, type
.self
, get_name(var
.self
) });
1086 else if (type
.storage
== StorageClassUniform
&& has_decoration(type
.self
, DecorationBlock
))
1088 res
.uniform_buffers
.push_back(
1089 { var
.self
, var
.basetype
, type
.self
, get_remapped_declared_block_name(var
.self
, false) });
1091 // Old way to declare SSBOs.
1092 else if (type
.storage
== StorageClassUniform
&& has_decoration(type
.self
, DecorationBufferBlock
))
1094 res
.storage_buffers
.push_back(
1095 { var
.self
, var
.basetype
, type
.self
, get_remapped_declared_block_name(var
.self
, ssbo_instance_name
) });
1097 // Modern way to declare SSBOs.
1098 else if (type
.storage
== StorageClassStorageBuffer
)
1100 res
.storage_buffers
.push_back(
1101 { var
.self
, var
.basetype
, type
.self
, get_remapped_declared_block_name(var
.self
, ssbo_instance_name
) });
1103 // Push constant blocks
1104 else if (type
.storage
== StorageClassPushConstant
)
1106 // There can only be one push constant block, but keep the vector in case this restriction is lifted
1108 res
.push_constant_buffers
.push_back({ var
.self
, var
.basetype
, type
.self
, get_name(var
.self
) });
1110 else if (type
.storage
== StorageClassShaderRecordBufferKHR
)
1112 res
.shader_record_buffers
.push_back({ var
.self
, var
.basetype
, type
.self
, get_remapped_declared_block_name(var
.self
, ssbo_instance_name
) });
1115 else if (type
.storage
== StorageClassAtomicCounter
)
1117 res
.atomic_counters
.push_back({ var
.self
, var
.basetype
, type
.self
, get_name(var
.self
) });
1119 else if (type
.storage
== StorageClassUniformConstant
)
1121 if (type
.basetype
== SPIRType::Image
)
1124 if (type
.image
.sampled
== 2)
1126 res
.storage_images
.push_back({ var
.self
, var
.basetype
, type
.self
, get_name(var
.self
) });
1129 else if (type
.image
.sampled
== 1)
1131 res
.separate_images
.push_back({ var
.self
, var
.basetype
, type
.self
, get_name(var
.self
) });
1134 // Separate samplers
1135 else if (type
.basetype
== SPIRType::Sampler
)
1137 res
.separate_samplers
.push_back({ var
.self
, var
.basetype
, type
.self
, get_name(var
.self
) });
1140 else if (type
.basetype
== SPIRType::SampledImage
)
1142 res
.sampled_images
.push_back({ var
.self
, var
.basetype
, type
.self
, get_name(var
.self
) });
1144 // Acceleration structures
1145 else if (type
.basetype
== SPIRType::AccelerationStructure
)
1147 res
.acceleration_structures
.push_back({ var
.self
, var
.basetype
, type
.self
, get_name(var
.self
) });
1151 res
.gl_plain_uniforms
.push_back({ var
.self
, var
.basetype
, type
.self
, get_name(var
.self
) });
1159 bool Compiler::type_is_top_level_block(const SPIRType
&type
) const
1161 if (type
.basetype
!= SPIRType::Struct
)
1163 return has_decoration(type
.self
, DecorationBlock
) || has_decoration(type
.self
, DecorationBufferBlock
);
1166 bool Compiler::type_is_block_like(const SPIRType
&type
) const
1168 if (type_is_top_level_block(type
))
1171 if (type
.basetype
== SPIRType::Struct
)
1173 // Block-like types may have Offset decorations.
1174 for (uint32_t i
= 0; i
< uint32_t(type
.member_types
.size()); i
++)
1175 if (has_member_decoration(type
.self
, i
, DecorationOffset
))
1182 void Compiler::parse_fixup()
1184 // Figure out specialization constants for work group sizes.
1185 for (auto id_
: ir
.ids_for_constant_or_variable
)
1187 auto &id
= ir
.ids
[id_
];
1189 if (id
.get_type() == TypeConstant
)
1191 auto &c
= id
.get
<SPIRConstant
>();
1192 if (has_decoration(c
.self
, DecorationBuiltIn
) &&
1193 BuiltIn(get_decoration(c
.self
, DecorationBuiltIn
)) == BuiltInWorkgroupSize
)
1195 // In current SPIR-V, there can be just one constant like this.
1196 // All entry points will receive the constant value.
1197 // WorkgroupSize take precedence over LocalSizeId.
1198 for (auto &entry
: ir
.entry_points
)
1200 entry
.second
.workgroup_size
.constant
= c
.self
;
1201 entry
.second
.workgroup_size
.x
= c
.scalar(0, 0);
1202 entry
.second
.workgroup_size
.y
= c
.scalar(0, 1);
1203 entry
.second
.workgroup_size
.z
= c
.scalar(0, 2);
1207 else if (id
.get_type() == TypeVariable
)
1209 auto &var
= id
.get
<SPIRVariable
>();
1210 if (var
.storage
== StorageClassPrivate
|| var
.storage
== StorageClassWorkgroup
||
1211 var
.storage
== StorageClassTaskPayloadWorkgroupEXT
||
1212 var
.storage
== StorageClassOutput
)
1214 global_variables
.push_back(var
.self
);
1216 if (variable_storage_is_aliased(var
))
1217 aliased_variables
.push_back(var
.self
);
1222 void Compiler::update_name_cache(unordered_set
<string
> &cache_primary
, const unordered_set
<string
> &cache_secondary
,
1228 const auto find_name
= [&](const string
&n
) -> bool {
1229 if (cache_primary
.find(n
) != end(cache_primary
))
1232 if (&cache_primary
!= &cache_secondary
)
1233 if (cache_secondary
.find(n
) != end(cache_secondary
))
1239 const auto insert_name
= [&](const string
&n
) { cache_primary
.insert(n
); };
1241 if (!find_name(name
))
1247 uint32_t counter
= 0;
1248 auto tmpname
= name
;
1250 bool use_linked_underscore
= true;
1254 // We cannot just append numbers, as we will end up creating internally reserved names.
1255 // Make it like _0_<counter> instead.
1258 else if (tmpname
.back() == '_')
1260 // The last_character is an underscore, so we don't need to link in underscore.
1261 // This would violate double underscore rules.
1262 use_linked_underscore
= false;
1265 // If there is a collision (very rare),
1266 // keep tacking on extra identifier until it's unique.
1270 name
= tmpname
+ (use_linked_underscore
? "_" : "") + convert_to_string(counter
);
1271 } while (find_name(name
));
1275 void Compiler::update_name_cache(unordered_set
<string
> &cache
, string
&name
)
1277 update_name_cache(cache
, cache
, name
);
1280 void Compiler::set_name(ID id
, const std::string
&name
)
1282 ir
.set_name(id
, name
);
1285 const SPIRType
&Compiler::get_type(TypeID id
) const
1287 return get
<SPIRType
>(id
);
1290 const SPIRType
&Compiler::get_type_from_variable(VariableID id
) const
1292 return get
<SPIRType
>(get
<SPIRVariable
>(id
).basetype
);
1295 uint32_t Compiler::get_pointee_type_id(uint32_t type_id
) const
1297 auto *p_type
= &get
<SPIRType
>(type_id
);
1298 if (p_type
->pointer
)
1300 assert(p_type
->parent_type
);
1301 type_id
= p_type
->parent_type
;
1306 const SPIRType
&Compiler::get_pointee_type(const SPIRType
&type
) const
1308 auto *p_type
= &type
;
1309 if (p_type
->pointer
)
1311 assert(p_type
->parent_type
);
1312 p_type
= &get
<SPIRType
>(p_type
->parent_type
);
1317 const SPIRType
&Compiler::get_pointee_type(uint32_t type_id
) const
1319 return get_pointee_type(get
<SPIRType
>(type_id
));
1322 uint32_t Compiler::get_variable_data_type_id(const SPIRVariable
&var
) const
1324 if (var
.phi_variable
|| var
.storage
== spv::StorageClass::StorageClassAtomicCounter
)
1325 return var
.basetype
;
1326 return get_pointee_type_id(var
.basetype
);
1329 SPIRType
&Compiler::get_variable_data_type(const SPIRVariable
&var
)
1331 return get
<SPIRType
>(get_variable_data_type_id(var
));
1334 const SPIRType
&Compiler::get_variable_data_type(const SPIRVariable
&var
) const
1336 return get
<SPIRType
>(get_variable_data_type_id(var
));
1339 SPIRType
&Compiler::get_variable_element_type(const SPIRVariable
&var
)
1341 SPIRType
*type
= &get_variable_data_type(var
);
1342 if (is_array(*type
))
1343 type
= &get
<SPIRType
>(type
->parent_type
);
1347 const SPIRType
&Compiler::get_variable_element_type(const SPIRVariable
&var
) const
1349 const SPIRType
*type
= &get_variable_data_type(var
);
1350 if (is_array(*type
))
1351 type
= &get
<SPIRType
>(type
->parent_type
);
1355 bool Compiler::is_sampled_image_type(const SPIRType
&type
)
1357 return (type
.basetype
== SPIRType::Image
|| type
.basetype
== SPIRType::SampledImage
) && type
.image
.sampled
== 1 &&
1358 type
.image
.dim
!= DimBuffer
;
1361 void Compiler::set_member_decoration_string(TypeID id
, uint32_t index
, spv::Decoration decoration
,
1362 const std::string
&argument
)
1364 ir
.set_member_decoration_string(id
, index
, decoration
, argument
);
1367 void Compiler::set_member_decoration(TypeID id
, uint32_t index
, Decoration decoration
, uint32_t argument
)
1369 ir
.set_member_decoration(id
, index
, decoration
, argument
);
1372 void Compiler::set_member_name(TypeID id
, uint32_t index
, const std::string
&name
)
1374 ir
.set_member_name(id
, index
, name
);
1377 const std::string
&Compiler::get_member_name(TypeID id
, uint32_t index
) const
1379 return ir
.get_member_name(id
, index
);
1382 void Compiler::set_qualified_name(uint32_t id
, const string
&name
)
1384 ir
.meta
[id
].decoration
.qualified_alias
= name
;
1387 void Compiler::set_member_qualified_name(uint32_t type_id
, uint32_t index
, const std::string
&name
)
1389 ir
.meta
[type_id
].members
.resize(max(ir
.meta
[type_id
].members
.size(), size_t(index
) + 1));
1390 ir
.meta
[type_id
].members
[index
].qualified_alias
= name
;
1393 const string
&Compiler::get_member_qualified_name(TypeID type_id
, uint32_t index
) const
1395 auto *m
= ir
.find_meta(type_id
);
1396 if (m
&& index
< m
->members
.size())
1397 return m
->members
[index
].qualified_alias
;
1399 return ir
.get_empty_string();
1402 uint32_t Compiler::get_member_decoration(TypeID id
, uint32_t index
, Decoration decoration
) const
1404 return ir
.get_member_decoration(id
, index
, decoration
);
1407 const Bitset
&Compiler::get_member_decoration_bitset(TypeID id
, uint32_t index
) const
1409 return ir
.get_member_decoration_bitset(id
, index
);
1412 bool Compiler::has_member_decoration(TypeID id
, uint32_t index
, Decoration decoration
) const
1414 return ir
.has_member_decoration(id
, index
, decoration
);
1417 void Compiler::unset_member_decoration(TypeID id
, uint32_t index
, Decoration decoration
)
1419 ir
.unset_member_decoration(id
, index
, decoration
);
1422 void Compiler::set_decoration_string(ID id
, spv::Decoration decoration
, const std::string
&argument
)
1424 ir
.set_decoration_string(id
, decoration
, argument
);
1427 void Compiler::set_decoration(ID id
, Decoration decoration
, uint32_t argument
)
1429 ir
.set_decoration(id
, decoration
, argument
);
1432 void Compiler::set_extended_decoration(uint32_t id
, ExtendedDecorations decoration
, uint32_t value
)
1434 auto &dec
= ir
.meta
[id
].decoration
;
1435 dec
.extended
.flags
.set(decoration
);
1436 dec
.extended
.values
[decoration
] = value
;
1439 void Compiler::set_extended_member_decoration(uint32_t type
, uint32_t index
, ExtendedDecorations decoration
,
1442 ir
.meta
[type
].members
.resize(max(ir
.meta
[type
].members
.size(), size_t(index
) + 1));
1443 auto &dec
= ir
.meta
[type
].members
[index
];
1444 dec
.extended
.flags
.set(decoration
);
1445 dec
.extended
.values
[decoration
] = value
;
1448 static uint32_t get_default_extended_decoration(ExtendedDecorations decoration
)
1452 case SPIRVCrossDecorationResourceIndexPrimary
:
1453 case SPIRVCrossDecorationResourceIndexSecondary
:
1454 case SPIRVCrossDecorationResourceIndexTertiary
:
1455 case SPIRVCrossDecorationResourceIndexQuaternary
:
1456 case SPIRVCrossDecorationInterfaceMemberIndex
:
1464 uint32_t Compiler::get_extended_decoration(uint32_t id
, ExtendedDecorations decoration
) const
1466 auto *m
= ir
.find_meta(id
);
1470 auto &dec
= m
->decoration
;
1472 if (!dec
.extended
.flags
.get(decoration
))
1473 return get_default_extended_decoration(decoration
);
1475 return dec
.extended
.values
[decoration
];
1478 uint32_t Compiler::get_extended_member_decoration(uint32_t type
, uint32_t index
, ExtendedDecorations decoration
) const
1480 auto *m
= ir
.find_meta(type
);
1484 if (index
>= m
->members
.size())
1487 auto &dec
= m
->members
[index
];
1488 if (!dec
.extended
.flags
.get(decoration
))
1489 return get_default_extended_decoration(decoration
);
1490 return dec
.extended
.values
[decoration
];
1493 bool Compiler::has_extended_decoration(uint32_t id
, ExtendedDecorations decoration
) const
1495 auto *m
= ir
.find_meta(id
);
1499 auto &dec
= m
->decoration
;
1500 return dec
.extended
.flags
.get(decoration
);
1503 bool Compiler::has_extended_member_decoration(uint32_t type
, uint32_t index
, ExtendedDecorations decoration
) const
1505 auto *m
= ir
.find_meta(type
);
1509 if (index
>= m
->members
.size())
1512 auto &dec
= m
->members
[index
];
1513 return dec
.extended
.flags
.get(decoration
);
1516 void Compiler::unset_extended_decoration(uint32_t id
, ExtendedDecorations decoration
)
1518 auto &dec
= ir
.meta
[id
].decoration
;
1519 dec
.extended
.flags
.clear(decoration
);
1520 dec
.extended
.values
[decoration
] = 0;
1523 void Compiler::unset_extended_member_decoration(uint32_t type
, uint32_t index
, ExtendedDecorations decoration
)
1525 ir
.meta
[type
].members
.resize(max(ir
.meta
[type
].members
.size(), size_t(index
) + 1));
1526 auto &dec
= ir
.meta
[type
].members
[index
];
1527 dec
.extended
.flags
.clear(decoration
);
1528 dec
.extended
.values
[decoration
] = 0;
1531 StorageClass
Compiler::get_storage_class(VariableID id
) const
1533 return get
<SPIRVariable
>(id
).storage
;
1536 const std::string
&Compiler::get_name(ID id
) const
1538 return ir
.get_name(id
);
1541 const std::string
Compiler::get_fallback_name(ID id
) const
1543 return join("_", id
);
1546 const std::string
Compiler::get_block_fallback_name(VariableID id
) const
1548 auto &var
= get
<SPIRVariable
>(id
);
1549 if (get_name(id
).empty())
1550 return join("_", get
<SPIRType
>(var
.basetype
).self
, "_", id
);
1552 return get_name(id
);
1555 const Bitset
&Compiler::get_decoration_bitset(ID id
) const
1557 return ir
.get_decoration_bitset(id
);
1560 bool Compiler::has_decoration(ID id
, Decoration decoration
) const
1562 return ir
.has_decoration(id
, decoration
);
1565 const string
&Compiler::get_decoration_string(ID id
, Decoration decoration
) const
1567 return ir
.get_decoration_string(id
, decoration
);
1570 const string
&Compiler::get_member_decoration_string(TypeID id
, uint32_t index
, Decoration decoration
) const
1572 return ir
.get_member_decoration_string(id
, index
, decoration
);
1575 uint32_t Compiler::get_decoration(ID id
, Decoration decoration
) const
1577 return ir
.get_decoration(id
, decoration
);
1580 void Compiler::unset_decoration(ID id
, Decoration decoration
)
1582 ir
.unset_decoration(id
, decoration
);
1585 bool Compiler::get_binary_offset_for_decoration(VariableID id
, spv::Decoration decoration
, uint32_t &word_offset
) const
1587 auto *m
= ir
.find_meta(id
);
1591 auto &word_offsets
= m
->decoration_word_offset
;
1592 auto itr
= word_offsets
.find(decoration
);
1593 if (itr
== end(word_offsets
))
1596 word_offset
= itr
->second
;
1600 bool Compiler::block_is_noop(const SPIRBlock
&block
) const
1602 if (block
.terminator
!= SPIRBlock::Direct
)
1605 auto &child
= get
<SPIRBlock
>(block
.next_block
);
1607 // If this block participates in PHI, the block isn't really noop.
1608 for (auto &phi
: block
.phi_variables
)
1609 if (phi
.parent
== block
.self
|| phi
.parent
== child
.self
)
1612 for (auto &phi
: child
.phi_variables
)
1613 if (phi
.parent
== block
.self
)
1616 // Verify all instructions have no semantic impact.
1617 for (auto &i
: block
.ops
)
1619 auto op
= static_cast<Op
>(i
.op
);
1623 // Non-Semantic instructions.
1630 auto *ops
= stream(i
);
1631 auto ext
= get
<SPIRExtension
>(ops
[2]).ext
;
1633 bool ext_is_nonsemantic_only
=
1634 ext
== SPIRExtension::NonSemanticShaderDebugInfo
||
1635 ext
== SPIRExtension::SPV_debug_info
||
1636 ext
== SPIRExtension::NonSemanticGeneric
;
1638 if (!ext_is_nonsemantic_only
)
1652 bool Compiler::block_is_loop_candidate(const SPIRBlock
&block
, SPIRBlock::Method method
) const
1654 // Tried and failed.
1655 if (block
.disable_block_optimization
|| block
.complex_continue
)
1658 if (method
== SPIRBlock::MergeToSelectForLoop
|| method
== SPIRBlock::MergeToSelectContinueForLoop
)
1660 // Try to detect common for loop pattern
1661 // which the code backend can use to create cleaner code.
1662 // for(;;) { if (cond) { some_body; } else { break; } }
1663 // is the pattern we're looking for.
1664 const auto *false_block
= maybe_get
<SPIRBlock
>(block
.false_block
);
1665 const auto *true_block
= maybe_get
<SPIRBlock
>(block
.true_block
);
1666 const auto *merge_block
= maybe_get
<SPIRBlock
>(block
.merge_block
);
1668 bool false_block_is_merge
= block
.false_block
== block
.merge_block
||
1669 (false_block
&& merge_block
&& execution_is_noop(*false_block
, *merge_block
));
1671 bool true_block_is_merge
= block
.true_block
== block
.merge_block
||
1672 (true_block
&& merge_block
&& execution_is_noop(*true_block
, *merge_block
));
1674 bool positive_candidate
=
1675 block
.true_block
!= block
.merge_block
&& block
.true_block
!= block
.self
&& false_block_is_merge
;
1677 bool negative_candidate
=
1678 block
.false_block
!= block
.merge_block
&& block
.false_block
!= block
.self
&& true_block_is_merge
;
1680 bool ret
= block
.terminator
== SPIRBlock::Select
&& block
.merge
== SPIRBlock::MergeLoop
&&
1681 (positive_candidate
|| negative_candidate
);
1683 if (ret
&& positive_candidate
&& method
== SPIRBlock::MergeToSelectContinueForLoop
)
1684 ret
= block
.true_block
== block
.continue_block
;
1685 else if (ret
&& negative_candidate
&& method
== SPIRBlock::MergeToSelectContinueForLoop
)
1686 ret
= block
.false_block
== block
.continue_block
;
1688 // If we have OpPhi which depends on branches which came from our own block,
1689 // we need to flush phi variables in else block instead of a trivial break,
1690 // so we cannot assume this is a for loop candidate.
1693 for (auto &phi
: block
.phi_variables
)
1694 if (phi
.parent
== block
.self
)
1697 auto *merge
= maybe_get
<SPIRBlock
>(block
.merge_block
);
1699 for (auto &phi
: merge
->phi_variables
)
1700 if (phi
.parent
== block
.self
)
1705 else if (method
== SPIRBlock::MergeToDirectForLoop
)
1707 // Empty loop header that just sets up merge target
1708 // and branches to loop body.
1709 bool ret
= block
.terminator
== SPIRBlock::Direct
&& block
.merge
== SPIRBlock::MergeLoop
&& block_is_noop(block
);
1714 auto &child
= get
<SPIRBlock
>(block
.next_block
);
1716 const auto *false_block
= maybe_get
<SPIRBlock
>(child
.false_block
);
1717 const auto *true_block
= maybe_get
<SPIRBlock
>(child
.true_block
);
1718 const auto *merge_block
= maybe_get
<SPIRBlock
>(block
.merge_block
);
1720 bool false_block_is_merge
= child
.false_block
== block
.merge_block
||
1721 (false_block
&& merge_block
&& execution_is_noop(*false_block
, *merge_block
));
1723 bool true_block_is_merge
= child
.true_block
== block
.merge_block
||
1724 (true_block
&& merge_block
&& execution_is_noop(*true_block
, *merge_block
));
1726 bool positive_candidate
=
1727 child
.true_block
!= block
.merge_block
&& child
.true_block
!= block
.self
&& false_block_is_merge
;
1729 bool negative_candidate
=
1730 child
.false_block
!= block
.merge_block
&& child
.false_block
!= block
.self
&& true_block_is_merge
;
1732 ret
= child
.terminator
== SPIRBlock::Select
&& child
.merge
== SPIRBlock::MergeNone
&&
1733 (positive_candidate
|| negative_candidate
);
1737 auto *merge
= maybe_get
<SPIRBlock
>(block
.merge_block
);
1739 for (auto &phi
: merge
->phi_variables
)
1740 if (phi
.parent
== block
.self
|| phi
.parent
== child
.false_block
)
1750 bool Compiler::execution_is_noop(const SPIRBlock
&from
, const SPIRBlock
&to
) const
1752 if (!execution_is_branchless(from
, to
))
1755 auto *start
= &from
;
1758 if (start
->self
== to
.self
)
1761 if (!block_is_noop(*start
))
1764 auto &next
= get
<SPIRBlock
>(start
->next_block
);
1769 bool Compiler::execution_is_branchless(const SPIRBlock
&from
, const SPIRBlock
&to
) const
1771 auto *start
= &from
;
1774 if (start
->self
== to
.self
)
1777 if (start
->terminator
== SPIRBlock::Direct
&& start
->merge
== SPIRBlock::MergeNone
)
1778 start
= &get
<SPIRBlock
>(start
->next_block
);
1784 bool Compiler::execution_is_direct_branch(const SPIRBlock
&from
, const SPIRBlock
&to
) const
1786 return from
.terminator
== SPIRBlock::Direct
&& from
.merge
== SPIRBlock::MergeNone
&& from
.next_block
== to
.self
;
1789 SPIRBlock::ContinueBlockType
Compiler::continue_block_type(const SPIRBlock
&block
) const
1791 // The block was deemed too complex during code emit, pick conservative fallback paths.
1792 if (block
.complex_continue
)
1793 return SPIRBlock::ComplexLoop
;
1795 // In older glslang output continue block can be equal to the loop header.
1796 // In this case, execution is clearly branchless, so just assume a while loop header here.
1797 if (block
.merge
== SPIRBlock::MergeLoop
)
1798 return SPIRBlock::WhileLoop
;
1800 if (block
.loop_dominator
== BlockID(SPIRBlock::NoDominator
))
1802 // Continue block is never reached from CFG.
1803 return SPIRBlock::ComplexLoop
;
1806 auto &dominator
= get
<SPIRBlock
>(block
.loop_dominator
);
1808 if (execution_is_noop(block
, dominator
))
1809 return SPIRBlock::WhileLoop
;
1810 else if (execution_is_branchless(block
, dominator
))
1811 return SPIRBlock::ForLoop
;
1814 const auto *false_block
= maybe_get
<SPIRBlock
>(block
.false_block
);
1815 const auto *true_block
= maybe_get
<SPIRBlock
>(block
.true_block
);
1816 const auto *merge_block
= maybe_get
<SPIRBlock
>(dominator
.merge_block
);
1818 // If we need to flush Phi in this block, we cannot have a DoWhile loop.
1819 bool flush_phi_to_false
= false_block
&& flush_phi_required(block
.self
, block
.false_block
);
1820 bool flush_phi_to_true
= true_block
&& flush_phi_required(block
.self
, block
.true_block
);
1821 if (flush_phi_to_false
|| flush_phi_to_true
)
1822 return SPIRBlock::ComplexLoop
;
1824 bool positive_do_while
= block
.true_block
== dominator
.self
&&
1825 (block
.false_block
== dominator
.merge_block
||
1826 (false_block
&& merge_block
&& execution_is_noop(*false_block
, *merge_block
)));
1828 bool negative_do_while
= block
.false_block
== dominator
.self
&&
1829 (block
.true_block
== dominator
.merge_block
||
1830 (true_block
&& merge_block
&& execution_is_noop(*true_block
, *merge_block
)));
1832 if (block
.merge
== SPIRBlock::MergeNone
&& block
.terminator
== SPIRBlock::Select
&&
1833 (positive_do_while
|| negative_do_while
))
1835 return SPIRBlock::DoWhileLoop
;
1838 return SPIRBlock::ComplexLoop
;
1842 const SmallVector
<SPIRBlock::Case
> &Compiler::get_case_list(const SPIRBlock
&block
) const
1846 // First we check if we can get the type directly from the block.condition
1847 // since it can be a SPIRConstant or a SPIRVariable.
1848 if (const auto *constant
= maybe_get
<SPIRConstant
>(block
.condition
))
1850 const auto &type
= get
<SPIRType
>(constant
->constant_type
);
1853 else if (const auto *op
= maybe_get
<SPIRConstantOp
>(block
.condition
))
1855 const auto &type
= get
<SPIRType
>(op
->basetype
);
1858 else if (const auto *var
= maybe_get
<SPIRVariable
>(block
.condition
))
1860 const auto &type
= get
<SPIRType
>(var
->basetype
);
1863 else if (const auto *undef
= maybe_get
<SPIRUndef
>(block
.condition
))
1865 const auto &type
= get
<SPIRType
>(undef
->basetype
);
1870 auto search
= ir
.load_type_width
.find(block
.condition
);
1871 if (search
== ir
.load_type_width
.end())
1873 SPIRV_CROSS_THROW("Use of undeclared variable on a switch statement.");
1876 width
= search
->second
;
1880 return block
.cases_64bit
;
1882 return block
.cases_32bit
;
1885 bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock
&block
, OpcodeHandler
&handler
) const
1887 handler
.set_current_block(block
);
1888 handler
.rearm_current_block(block
);
1890 // Ideally, perhaps traverse the CFG instead of all blocks in order to eliminate dead blocks,
1891 // but this shouldn't be a problem in practice unless the SPIR-V is doing insane things like recursing
1892 // inside dead blocks ...
1893 for (auto &i
: block
.ops
)
1895 auto ops
= stream(i
);
1896 auto op
= static_cast<Op
>(i
.op
);
1898 if (!handler
.handle(op
, ops
, i
.length
))
1901 if (op
== OpFunctionCall
)
1903 auto &func
= get
<SPIRFunction
>(ops
[2]);
1904 if (handler
.follow_function_call(func
))
1906 if (!handler
.begin_function_scope(ops
, i
.length
))
1908 if (!traverse_all_reachable_opcodes(get
<SPIRFunction
>(ops
[2]), handler
))
1910 if (!handler
.end_function_scope(ops
, i
.length
))
1913 handler
.rearm_current_block(block
);
1918 if (!handler
.handle_terminator(block
))
1924 bool Compiler::traverse_all_reachable_opcodes(const SPIRFunction
&func
, OpcodeHandler
&handler
) const
1926 for (auto block
: func
.blocks
)
1927 if (!traverse_all_reachable_opcodes(get
<SPIRBlock
>(block
), handler
))
1933 uint32_t Compiler::type_struct_member_offset(const SPIRType
&type
, uint32_t index
) const
1935 auto *type_meta
= ir
.find_meta(type
.self
);
1938 // Decoration must be set in valid SPIR-V, otherwise throw.
1939 auto &dec
= type_meta
->members
[index
];
1940 if (dec
.decoration_flags
.get(DecorationOffset
))
1943 SPIRV_CROSS_THROW("Struct member does not have Offset set.");
1946 SPIRV_CROSS_THROW("Struct member does not have Offset set.");
1949 uint32_t Compiler::type_struct_member_array_stride(const SPIRType
&type
, uint32_t index
) const
1951 auto *type_meta
= ir
.find_meta(type
.member_types
[index
]);
1954 // Decoration must be set in valid SPIR-V, otherwise throw.
1955 // ArrayStride is part of the array type not OpMemberDecorate.
1956 auto &dec
= type_meta
->decoration
;
1957 if (dec
.decoration_flags
.get(DecorationArrayStride
))
1958 return dec
.array_stride
;
1960 SPIRV_CROSS_THROW("Struct member does not have ArrayStride set.");
1963 SPIRV_CROSS_THROW("Struct member does not have ArrayStride set.");
1966 uint32_t Compiler::type_struct_member_matrix_stride(const SPIRType
&type
, uint32_t index
) const
1968 auto *type_meta
= ir
.find_meta(type
.self
);
1971 // Decoration must be set in valid SPIR-V, otherwise throw.
1972 // MatrixStride is part of OpMemberDecorate.
1973 auto &dec
= type_meta
->members
[index
];
1974 if (dec
.decoration_flags
.get(DecorationMatrixStride
))
1975 return dec
.matrix_stride
;
1977 SPIRV_CROSS_THROW("Struct member does not have MatrixStride set.");
1980 SPIRV_CROSS_THROW("Struct member does not have MatrixStride set.");
1983 size_t Compiler::get_declared_struct_size(const SPIRType
&type
) const
1985 if (type
.member_types
.empty())
1986 SPIRV_CROSS_THROW("Declared struct in block cannot be empty.");
1988 // Offsets can be declared out of order, so we need to deduce the actual size
1989 // based on last member instead.
1990 uint32_t member_index
= 0;
1991 size_t highest_offset
= 0;
1992 for (uint32_t i
= 0; i
< uint32_t(type
.member_types
.size()); i
++)
1994 size_t offset
= type_struct_member_offset(type
, i
);
1995 if (offset
> highest_offset
)
1997 highest_offset
= offset
;
2002 size_t size
= get_declared_struct_member_size(type
, member_index
);
2003 return highest_offset
+ size
;
2006 size_t Compiler::get_declared_struct_size_runtime_array(const SPIRType
&type
, size_t array_size
) const
2008 if (type
.member_types
.empty())
2009 SPIRV_CROSS_THROW("Declared struct in block cannot be empty.");
2011 size_t size
= get_declared_struct_size(type
);
2012 auto &last_type
= get
<SPIRType
>(type
.member_types
.back());
2013 if (!last_type
.array
.empty() && last_type
.array_size_literal
[0] && last_type
.array
[0] == 0) // Runtime array
2014 size
+= array_size
* type_struct_member_array_stride(type
, uint32_t(type
.member_types
.size() - 1));
2019 uint32_t Compiler::evaluate_spec_constant_u32(const SPIRConstantOp
&spec
) const
2021 auto &result_type
= get
<SPIRType
>(spec
.basetype
);
2022 if (result_type
.basetype
!= SPIRType::UInt
&& result_type
.basetype
!= SPIRType::Int
&&
2023 result_type
.basetype
!= SPIRType::Boolean
)
2026 "Only 32-bit integers and booleans are currently supported when evaluating specialization constants.\n");
2029 if (!is_scalar(result_type
))
2030 SPIRV_CROSS_THROW("Spec constant evaluation must be a scalar.\n");
2034 const auto eval_u32
= [&](uint32_t id
) -> uint32_t {
2035 auto &type
= expression_type(id
);
2036 if (type
.basetype
!= SPIRType::UInt
&& type
.basetype
!= SPIRType::Int
&& type
.basetype
!= SPIRType::Boolean
)
2038 SPIRV_CROSS_THROW("Only 32-bit integers and booleans are currently supported when evaluating "
2039 "specialization constants.\n");
2042 if (!is_scalar(type
))
2043 SPIRV_CROSS_THROW("Spec constant evaluation must be a scalar.\n");
2044 if (const auto *c
= this->maybe_get
<SPIRConstant
>(id
))
2047 return evaluate_spec_constant_u32(this->get
<SPIRConstantOp
>(id
));
2050 #define binary_spec_op(op, binary_op) \
2052 value = eval_u32(spec.arguments[0]) binary_op eval_u32(spec.arguments[1]); \
2054 #define binary_spec_op_cast(op, binary_op, type) \
2056 value = uint32_t(type(eval_u32(spec.arguments[0])) binary_op type(eval_u32(spec.arguments[1]))); \
2059 // Support the basic opcodes which are typically used when computing array sizes.
2060 switch (spec
.opcode
)
2062 binary_spec_op(IAdd
, +);
2063 binary_spec_op(ISub
, -);
2064 binary_spec_op(IMul
, *);
2065 binary_spec_op(BitwiseAnd
, &);
2066 binary_spec_op(BitwiseOr
, |);
2067 binary_spec_op(BitwiseXor
, ^);
2068 binary_spec_op(LogicalAnd
, &);
2069 binary_spec_op(LogicalOr
, |);
2070 binary_spec_op(ShiftLeftLogical
, <<);
2071 binary_spec_op(ShiftRightLogical
, >>);
2072 binary_spec_op_cast(ShiftRightArithmetic
, >>, int32_t);
2073 binary_spec_op(LogicalEqual
, ==);
2074 binary_spec_op(LogicalNotEqual
, !=);
2075 binary_spec_op(IEqual
, ==);
2076 binary_spec_op(INotEqual
, !=);
2077 binary_spec_op(ULessThan
, <);
2078 binary_spec_op(ULessThanEqual
, <=);
2079 binary_spec_op(UGreaterThan
, >);
2080 binary_spec_op(UGreaterThanEqual
, >=);
2081 binary_spec_op_cast(SLessThan
, <, int32_t);
2082 binary_spec_op_cast(SLessThanEqual
, <=, int32_t);
2083 binary_spec_op_cast(SGreaterThan
, >, int32_t);
2084 binary_spec_op_cast(SGreaterThanEqual
, >=, int32_t);
2085 #undef binary_spec_op
2086 #undef binary_spec_op_cast
2089 value
= uint32_t(!eval_u32(spec
.arguments
[0]));
2093 value
= ~eval_u32(spec
.arguments
[0]);
2097 value
= uint32_t(-int32_t(eval_u32(spec
.arguments
[0])));
2101 value
= eval_u32(spec
.arguments
[0]) ? eval_u32(spec
.arguments
[1]) : eval_u32(spec
.arguments
[2]);
2106 uint32_t a
= eval_u32(spec
.arguments
[0]);
2107 uint32_t b
= eval_u32(spec
.arguments
[1]);
2109 SPIRV_CROSS_THROW("Undefined behavior in UMod, b == 0.\n");
2116 auto a
= int32_t(eval_u32(spec
.arguments
[0]));
2117 auto b
= int32_t(eval_u32(spec
.arguments
[1]));
2119 SPIRV_CROSS_THROW("Undefined behavior in SRem, b == 0.\n");
2126 auto a
= int32_t(eval_u32(spec
.arguments
[0]));
2127 auto b
= int32_t(eval_u32(spec
.arguments
[1]));
2129 SPIRV_CROSS_THROW("Undefined behavior in SMod, b == 0.\n");
2132 // Makes sure we match the sign of b, not a.
2133 if ((b
< 0 && v
> 0) || (b
> 0 && v
< 0))
2141 uint32_t a
= eval_u32(spec
.arguments
[0]);
2142 uint32_t b
= eval_u32(spec
.arguments
[1]);
2144 SPIRV_CROSS_THROW("Undefined behavior in UDiv, b == 0.\n");
2151 auto a
= int32_t(eval_u32(spec
.arguments
[0]));
2152 auto b
= int32_t(eval_u32(spec
.arguments
[1]));
2154 SPIRV_CROSS_THROW("Undefined behavior in SDiv, b == 0.\n");
2160 SPIRV_CROSS_THROW("Unsupported spec constant opcode for evaluation.\n");
2166 uint32_t Compiler::evaluate_constant_u32(uint32_t id
) const
2168 if (const auto *c
= maybe_get
<SPIRConstant
>(id
))
2171 return evaluate_spec_constant_u32(get
<SPIRConstantOp
>(id
));
2174 size_t Compiler::get_declared_struct_member_size(const SPIRType
&struct_type
, uint32_t index
) const
2176 if (struct_type
.member_types
.empty())
2177 SPIRV_CROSS_THROW("Declared struct in block cannot be empty.");
2179 auto &flags
= get_member_decoration_bitset(struct_type
.self
, index
);
2180 auto &type
= get
<SPIRType
>(struct_type
.member_types
[index
]);
2182 switch (type
.basetype
)
2184 case SPIRType::Unknown
:
2185 case SPIRType::Void
:
2186 case SPIRType::Boolean
: // Bools are purely logical, and cannot be used for externally visible types.
2187 case SPIRType::AtomicCounter
:
2188 case SPIRType::Image
:
2189 case SPIRType::SampledImage
:
2190 case SPIRType::Sampler
:
2191 SPIRV_CROSS_THROW("Querying size for object with opaque size.");
2197 if (type
.pointer
&& type
.storage
== StorageClassPhysicalStorageBuffer
)
2199 // Check if this is a top-level pointer type, and not an array of pointers.
2200 if (type
.pointer_depth
> get
<SPIRType
>(type
.parent_type
).pointer_depth
)
2204 if (!type
.array
.empty())
2206 // For arrays, we can use ArrayStride to get an easy check.
2207 bool array_size_literal
= type
.array_size_literal
.back();
2208 uint32_t array_size
= array_size_literal
? type
.array
.back() : evaluate_constant_u32(type
.array
.back());
2209 return type_struct_member_array_stride(struct_type
, index
) * array_size
;
2211 else if (type
.basetype
== SPIRType::Struct
)
2213 return get_declared_struct_size(type
);
2217 unsigned vecsize
= type
.vecsize
;
2218 unsigned columns
= type
.columns
;
2223 size_t component_size
= type
.width
/ 8;
2224 return vecsize
* component_size
;
2228 uint32_t matrix_stride
= type_struct_member_matrix_stride(struct_type
, index
);
2230 // Per SPIR-V spec, matrices must be tightly packed and aligned up for vec3 accesses.
2231 if (flags
.get(DecorationRowMajor
))
2232 return matrix_stride
* vecsize
;
2233 else if (flags
.get(DecorationColMajor
))
2234 return matrix_stride
* columns
;
2236 SPIRV_CROSS_THROW("Either row-major or column-major must be declared for matrices.");
2241 bool Compiler::BufferAccessHandler::handle(Op opcode
, const uint32_t *args
, uint32_t length
)
2243 if (opcode
!= OpAccessChain
&& opcode
!= OpInBoundsAccessChain
&& opcode
!= OpPtrAccessChain
)
2246 bool ptr_chain
= (opcode
== OpPtrAccessChain
);
2249 if (length
< (ptr_chain
? 5u : 4u))
2255 // Don't bother traversing the entire access chain tree yet.
2256 // If we access a struct member, assume we access the entire member.
2257 uint32_t index
= compiler
.get
<SPIRConstant
>(args
[ptr_chain
? 4 : 3]).scalar();
2259 // Seen this index already.
2260 if (seen
.find(index
) != end(seen
))
2264 auto &type
= compiler
.expression_type(id
);
2265 uint32_t offset
= compiler
.type_struct_member_offset(type
, index
);
2268 // If we have another member in the struct, deduce the range by looking at the next member.
2269 // This is okay since structs in SPIR-V can have padding, but Offset decoration must be
2270 // monotonically increasing.
2271 // Of course, this doesn't take into account if the SPIR-V for some reason decided to add
2272 // very large amounts of padding, but that's not really a big deal.
2273 if (index
+ 1 < type
.member_types
.size())
2275 range
= compiler
.type_struct_member_offset(type
, index
+ 1) - offset
;
2279 // No padding, so just deduce it from the size of the member directly.
2280 range
= compiler
.get_declared_struct_member_size(type
, index
);
2283 ranges
.push_back({ index
, offset
, range
});
2287 SmallVector
<BufferRange
> Compiler::get_active_buffer_ranges(VariableID id
) const
2289 SmallVector
<BufferRange
> ranges
;
2290 BufferAccessHandler
handler(*this, ranges
, id
);
2291 traverse_all_reachable_opcodes(get
<SPIRFunction
>(ir
.default_entry_point
), handler
);
2295 bool Compiler::types_are_logically_equivalent(const SPIRType
&a
, const SPIRType
&b
) const
2297 if (a
.basetype
!= b
.basetype
)
2299 if (a
.width
!= b
.width
)
2301 if (a
.vecsize
!= b
.vecsize
)
2303 if (a
.columns
!= b
.columns
)
2305 if (a
.array
.size() != b
.array
.size())
2308 size_t array_count
= a
.array
.size();
2309 if (array_count
&& memcmp(a
.array
.data(), b
.array
.data(), array_count
* sizeof(uint32_t)) != 0)
2312 if (a
.basetype
== SPIRType::Image
|| a
.basetype
== SPIRType::SampledImage
)
2314 if (memcmp(&a
.image
, &b
.image
, sizeof(SPIRType::Image
)) != 0)
2318 if (a
.member_types
.size() != b
.member_types
.size())
2321 size_t member_types
= a
.member_types
.size();
2322 for (size_t i
= 0; i
< member_types
; i
++)
2324 if (!types_are_logically_equivalent(get
<SPIRType
>(a
.member_types
[i
]), get
<SPIRType
>(b
.member_types
[i
])))
2331 const Bitset
&Compiler::get_execution_mode_bitset() const
2333 return get_entry_point().flags
;
2336 void Compiler::set_execution_mode(ExecutionMode mode
, uint32_t arg0
, uint32_t arg1
, uint32_t arg2
)
2338 auto &execution
= get_entry_point();
2340 execution
.flags
.set(mode
);
2343 case ExecutionModeLocalSize
:
2344 execution
.workgroup_size
.x
= arg0
;
2345 execution
.workgroup_size
.y
= arg1
;
2346 execution
.workgroup_size
.z
= arg2
;
2349 case ExecutionModeLocalSizeId
:
2350 execution
.workgroup_size
.id_x
= arg0
;
2351 execution
.workgroup_size
.id_y
= arg1
;
2352 execution
.workgroup_size
.id_z
= arg2
;
2355 case ExecutionModeInvocations
:
2356 execution
.invocations
= arg0
;
2359 case ExecutionModeOutputVertices
:
2360 execution
.output_vertices
= arg0
;
2363 case ExecutionModeOutputPrimitivesEXT
:
2364 execution
.output_primitives
= arg0
;
2372 void Compiler::unset_execution_mode(ExecutionMode mode
)
2374 auto &execution
= get_entry_point();
2375 execution
.flags
.clear(mode
);
2378 uint32_t Compiler::get_work_group_size_specialization_constants(SpecializationConstant
&x
, SpecializationConstant
&y
,
2379 SpecializationConstant
&z
) const
2381 auto &execution
= get_entry_point();
2386 // WorkgroupSize builtin takes precedence over LocalSize / LocalSizeId.
2387 if (execution
.workgroup_size
.constant
!= 0)
2389 auto &c
= get
<SPIRConstant
>(execution
.workgroup_size
.constant
);
2391 if (c
.m
.c
[0].id
[0] != ID(0))
2393 x
.id
= c
.m
.c
[0].id
[0];
2394 x
.constant_id
= get_decoration(c
.m
.c
[0].id
[0], DecorationSpecId
);
2397 if (c
.m
.c
[0].id
[1] != ID(0))
2399 y
.id
= c
.m
.c
[0].id
[1];
2400 y
.constant_id
= get_decoration(c
.m
.c
[0].id
[1], DecorationSpecId
);
2403 if (c
.m
.c
[0].id
[2] != ID(0))
2405 z
.id
= c
.m
.c
[0].id
[2];
2406 z
.constant_id
= get_decoration(c
.m
.c
[0].id
[2], DecorationSpecId
);
2409 else if (execution
.flags
.get(ExecutionModeLocalSizeId
))
2411 auto &cx
= get
<SPIRConstant
>(execution
.workgroup_size
.id_x
);
2412 if (cx
.specialization
)
2414 x
.id
= execution
.workgroup_size
.id_x
;
2415 x
.constant_id
= get_decoration(execution
.workgroup_size
.id_x
, DecorationSpecId
);
2418 auto &cy
= get
<SPIRConstant
>(execution
.workgroup_size
.id_y
);
2419 if (cy
.specialization
)
2421 y
.id
= execution
.workgroup_size
.id_y
;
2422 y
.constant_id
= get_decoration(execution
.workgroup_size
.id_y
, DecorationSpecId
);
2425 auto &cz
= get
<SPIRConstant
>(execution
.workgroup_size
.id_z
);
2426 if (cz
.specialization
)
2428 z
.id
= execution
.workgroup_size
.id_z
;
2429 z
.constant_id
= get_decoration(execution
.workgroup_size
.id_z
, DecorationSpecId
);
2433 return execution
.workgroup_size
.constant
;
2436 uint32_t Compiler::get_execution_mode_argument(spv::ExecutionMode mode
, uint32_t index
) const
2438 auto &execution
= get_entry_point();
2441 case ExecutionModeLocalSizeId
:
2442 if (execution
.flags
.get(ExecutionModeLocalSizeId
))
2447 return execution
.workgroup_size
.id_x
;
2449 return execution
.workgroup_size
.id_y
;
2451 return execution
.workgroup_size
.id_z
;
2459 case ExecutionModeLocalSize
:
2463 if (execution
.flags
.get(ExecutionModeLocalSizeId
) && execution
.workgroup_size
.id_x
!= 0)
2464 return get
<SPIRConstant
>(execution
.workgroup_size
.id_x
).scalar();
2466 return execution
.workgroup_size
.x
;
2468 if (execution
.flags
.get(ExecutionModeLocalSizeId
) && execution
.workgroup_size
.id_y
!= 0)
2469 return get
<SPIRConstant
>(execution
.workgroup_size
.id_y
).scalar();
2471 return execution
.workgroup_size
.y
;
2473 if (execution
.flags
.get(ExecutionModeLocalSizeId
) && execution
.workgroup_size
.id_z
!= 0)
2474 return get
<SPIRConstant
>(execution
.workgroup_size
.id_z
).scalar();
2476 return execution
.workgroup_size
.z
;
2481 case ExecutionModeInvocations
:
2482 return execution
.invocations
;
2484 case ExecutionModeOutputVertices
:
2485 return execution
.output_vertices
;
2487 case ExecutionModeOutputPrimitivesEXT
:
2488 return execution
.output_primitives
;
2495 ExecutionModel
Compiler::get_execution_model() const
2497 auto &execution
= get_entry_point();
2498 return execution
.model
;
2501 bool Compiler::is_tessellation_shader(ExecutionModel model
)
2503 return model
== ExecutionModelTessellationControl
|| model
== ExecutionModelTessellationEvaluation
;
2506 bool Compiler::is_vertex_like_shader() const
2508 auto model
= get_execution_model();
2509 return model
== ExecutionModelVertex
|| model
== ExecutionModelGeometry
||
2510 model
== ExecutionModelTessellationControl
|| model
== ExecutionModelTessellationEvaluation
;
2513 bool Compiler::is_tessellation_shader() const
2515 return is_tessellation_shader(get_execution_model());
2518 bool Compiler::is_tessellating_triangles() const
2520 return get_execution_mode_bitset().get(ExecutionModeTriangles
);
2523 void Compiler::set_remapped_variable_state(VariableID id
, bool remap_enable
)
2525 get
<SPIRVariable
>(id
).remapped_variable
= remap_enable
;
2528 bool Compiler::get_remapped_variable_state(VariableID id
) const
2530 return get
<SPIRVariable
>(id
).remapped_variable
;
2533 void Compiler::set_subpass_input_remapped_components(VariableID id
, uint32_t components
)
2535 get
<SPIRVariable
>(id
).remapped_components
= components
;
2538 uint32_t Compiler::get_subpass_input_remapped_components(VariableID id
) const
2540 return get
<SPIRVariable
>(id
).remapped_components
;
2543 void Compiler::add_implied_read_expression(SPIRExpression
&e
, uint32_t source
)
2545 auto itr
= find(begin(e
.implied_read_expressions
), end(e
.implied_read_expressions
), ID(source
));
2546 if (itr
== end(e
.implied_read_expressions
))
2547 e
.implied_read_expressions
.push_back(source
);
2550 void Compiler::add_implied_read_expression(SPIRAccessChain
&e
, uint32_t source
)
2552 auto itr
= find(begin(e
.implied_read_expressions
), end(e
.implied_read_expressions
), ID(source
));
2553 if (itr
== end(e
.implied_read_expressions
))
2554 e
.implied_read_expressions
.push_back(source
);
2557 void Compiler::add_active_interface_variable(uint32_t var_id
)
2559 active_interface_variables
.insert(var_id
);
2561 // In SPIR-V 1.4 and up we must also track the interface variable in the entry point.
2562 if (ir
.get_spirv_version() >= 0x10400)
2564 auto &vars
= get_entry_point().interface_variables
;
2565 if (find(begin(vars
), end(vars
), VariableID(var_id
)) == end(vars
))
2566 vars
.push_back(var_id
);
2570 void Compiler::inherit_expression_dependencies(uint32_t dst
, uint32_t source_expression
)
2572 auto *ptr_e
= maybe_get
<SPIRExpression
>(dst
);
2574 if (is_position_invariant() && ptr_e
&& maybe_get
<SPIRExpression
>(source_expression
))
2576 auto &deps
= ptr_e
->invariance_dependencies
;
2577 if (std::find(deps
.begin(), deps
.end(), source_expression
) == deps
.end())
2578 deps
.push_back(source_expression
);
2581 // Don't inherit any expression dependencies if the expression in dst
2582 // is not a forwarded temporary.
2583 if (forwarded_temporaries
.find(dst
) == end(forwarded_temporaries
) ||
2584 forced_temporaries
.find(dst
) != end(forced_temporaries
))
2590 auto *phi
= maybe_get
<SPIRVariable
>(source_expression
);
2591 if (phi
&& phi
->phi_variable
)
2593 // We have used a phi variable, which can change at the end of the block,
2594 // so make sure we take a dependency on this phi variable.
2595 phi
->dependees
.push_back(dst
);
2598 auto *s
= maybe_get
<SPIRExpression
>(source_expression
);
2602 auto &e_deps
= e
.expression_dependencies
;
2603 auto &s_deps
= s
->expression_dependencies
;
2605 // If we depend on a expression, we also depend on all sub-dependencies from source.
2606 e_deps
.push_back(source_expression
);
2607 e_deps
.insert(end(e_deps
), begin(s_deps
), end(s_deps
));
2609 // Eliminate duplicated dependencies.
2610 sort(begin(e_deps
), end(e_deps
));
2611 e_deps
.erase(unique(begin(e_deps
), end(e_deps
)), end(e_deps
));
2614 SmallVector
<EntryPoint
> Compiler::get_entry_points_and_stages() const
2616 SmallVector
<EntryPoint
> entries
;
2617 for (auto &entry
: ir
.entry_points
)
2618 entries
.push_back({ entry
.second
.orig_name
, entry
.second
.model
});
2622 void Compiler::rename_entry_point(const std::string
&old_name
, const std::string
&new_name
, spv::ExecutionModel model
)
2624 auto &entry
= get_entry_point(old_name
, model
);
2625 entry
.orig_name
= new_name
;
2626 entry
.name
= new_name
;
2629 void Compiler::set_entry_point(const std::string
&name
, spv::ExecutionModel model
)
2631 auto &entry
= get_entry_point(name
, model
);
2632 ir
.default_entry_point
= entry
.self
;
2635 SPIREntryPoint
&Compiler::get_first_entry_point(const std::string
&name
)
2638 begin(ir
.entry_points
), end(ir
.entry_points
),
2639 [&](const std::pair
<uint32_t, SPIREntryPoint
> &entry
) -> bool { return entry
.second
.orig_name
== name
; });
2641 if (itr
== end(ir
.entry_points
))
2642 SPIRV_CROSS_THROW("Entry point does not exist.");
2647 const SPIREntryPoint
&Compiler::get_first_entry_point(const std::string
&name
) const
2650 begin(ir
.entry_points
), end(ir
.entry_points
),
2651 [&](const std::pair
<uint32_t, SPIREntryPoint
> &entry
) -> bool { return entry
.second
.orig_name
== name
; });
2653 if (itr
== end(ir
.entry_points
))
2654 SPIRV_CROSS_THROW("Entry point does not exist.");
2659 SPIREntryPoint
&Compiler::get_entry_point(const std::string
&name
, ExecutionModel model
)
2661 auto itr
= find_if(begin(ir
.entry_points
), end(ir
.entry_points
),
2662 [&](const std::pair
<uint32_t, SPIREntryPoint
> &entry
) -> bool {
2663 return entry
.second
.orig_name
== name
&& entry
.second
.model
== model
;
2666 if (itr
== end(ir
.entry_points
))
2667 SPIRV_CROSS_THROW("Entry point does not exist.");
2672 const SPIREntryPoint
&Compiler::get_entry_point(const std::string
&name
, ExecutionModel model
) const
2674 auto itr
= find_if(begin(ir
.entry_points
), end(ir
.entry_points
),
2675 [&](const std::pair
<uint32_t, SPIREntryPoint
> &entry
) -> bool {
2676 return entry
.second
.orig_name
== name
&& entry
.second
.model
== model
;
2679 if (itr
== end(ir
.entry_points
))
2680 SPIRV_CROSS_THROW("Entry point does not exist.");
2685 const string
&Compiler::get_cleansed_entry_point_name(const std::string
&name
, ExecutionModel model
) const
2687 return get_entry_point(name
, model
).name
;
2690 const SPIREntryPoint
&Compiler::get_entry_point() const
2692 return ir
.entry_points
.find(ir
.default_entry_point
)->second
;
2695 SPIREntryPoint
&Compiler::get_entry_point()
2697 return ir
.entry_points
.find(ir
.default_entry_point
)->second
;
2700 bool Compiler::interface_variable_exists_in_entry_point(uint32_t id
) const
2702 auto &var
= get
<SPIRVariable
>(id
);
2704 if (ir
.get_spirv_version() < 0x10400)
2706 if (var
.storage
!= StorageClassInput
&& var
.storage
!= StorageClassOutput
&&
2707 var
.storage
!= StorageClassUniformConstant
)
2708 SPIRV_CROSS_THROW("Only Input, Output variables and Uniform constants are part of a shader linking interface.");
2710 // This is to avoid potential problems with very old glslang versions which did
2711 // not emit input/output interfaces properly.
2712 // We can assume they only had a single entry point, and single entry point
2713 // shaders could easily be assumed to use every interface variable anyways.
2714 if (ir
.entry_points
.size() <= 1)
2718 // In SPIR-V 1.4 and later, all global resource variables must be present.
2720 auto &execution
= get_entry_point();
2721 return find(begin(execution
.interface_variables
), end(execution
.interface_variables
), VariableID(id
)) !=
2722 end(execution
.interface_variables
);
2725 void Compiler::CombinedImageSamplerHandler::push_remap_parameters(const SPIRFunction
&func
, const uint32_t *args
,
2728 // If possible, pipe through a remapping table so that parameters know
2729 // which variables they actually bind to in this scope.
2730 unordered_map
<uint32_t, uint32_t> remapping
;
2731 for (uint32_t i
= 0; i
< length
; i
++)
2732 remapping
[func
.arguments
[i
].id
] = remap_parameter(args
[i
]);
2733 parameter_remapping
.push(std::move(remapping
));
2736 void Compiler::CombinedImageSamplerHandler::pop_remap_parameters()
2738 parameter_remapping
.pop();
2741 uint32_t Compiler::CombinedImageSamplerHandler::remap_parameter(uint32_t id
)
2743 auto *var
= compiler
.maybe_get_backing_variable(id
);
2747 if (parameter_remapping
.empty())
2750 auto &remapping
= parameter_remapping
.top();
2751 auto itr
= remapping
.find(id
);
2752 if (itr
!= end(remapping
))
2758 bool Compiler::CombinedImageSamplerHandler::begin_function_scope(const uint32_t *args
, uint32_t length
)
2763 auto &callee
= compiler
.get
<SPIRFunction
>(args
[2]);
2766 push_remap_parameters(callee
, args
, length
);
2767 functions
.push(&callee
);
2771 bool Compiler::CombinedImageSamplerHandler::end_function_scope(const uint32_t *args
, uint32_t length
)
2776 auto &callee
= compiler
.get
<SPIRFunction
>(args
[2]);
2779 // There are two types of cases we have to handle,
2780 // a callee might call sampler2D(texture2D, sampler) directly where
2781 // one or more parameters originate from parameters.
2782 // Alternatively, we need to provide combined image samplers to our callees,
2783 // and in this case we need to add those as well.
2785 pop_remap_parameters();
2787 // Our callee has now been processed at least once.
2788 // No point in doing it again.
2789 callee
.do_combined_parameters
= false;
2791 auto ¶ms
= functions
.top()->combined_parameters
;
2793 if (functions
.empty())
2796 auto &caller
= *functions
.top();
2797 if (caller
.do_combined_parameters
)
2799 for (auto ¶m
: params
)
2801 VariableID image_id
= param
.global_image
? param
.image_id
: VariableID(args
[param
.image_id
]);
2802 VariableID sampler_id
= param
.global_sampler
? param
.sampler_id
: VariableID(args
[param
.sampler_id
]);
2804 auto *i
= compiler
.maybe_get_backing_variable(image_id
);
2805 auto *s
= compiler
.maybe_get_backing_variable(sampler_id
);
2809 sampler_id
= s
->self
;
2811 register_combined_image_sampler(caller
, 0, image_id
, sampler_id
, param
.depth
);
2818 void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIRFunction
&caller
,
2819 VariableID combined_module_id
,
2820 VariableID image_id
, VariableID sampler_id
,
2823 // We now have a texture ID and a sampler ID which will either be found as a global
2824 // or a parameter in our own function. If both are global, they will not need a parameter,
2825 // otherwise, add it to our list.
2826 SPIRFunction::CombinedImageSamplerParameter param
= {
2827 0u, image_id
, sampler_id
, true, true, depth
,
2830 auto texture_itr
= find_if(begin(caller
.arguments
), end(caller
.arguments
),
2831 [image_id
](const SPIRFunction::Parameter
&p
) { return p
.id
== image_id
; });
2832 auto sampler_itr
= find_if(begin(caller
.arguments
), end(caller
.arguments
),
2833 [sampler_id
](const SPIRFunction::Parameter
&p
) { return p
.id
== sampler_id
; });
2835 if (texture_itr
!= end(caller
.arguments
))
2837 param
.global_image
= false;
2838 param
.image_id
= uint32_t(texture_itr
- begin(caller
.arguments
));
2841 if (sampler_itr
!= end(caller
.arguments
))
2843 param
.global_sampler
= false;
2844 param
.sampler_id
= uint32_t(sampler_itr
- begin(caller
.arguments
));
2847 if (param
.global_image
&& param
.global_sampler
)
2850 auto itr
= find_if(begin(caller
.combined_parameters
), end(caller
.combined_parameters
),
2851 [¶m
](const SPIRFunction::CombinedImageSamplerParameter
&p
) {
2852 return param
.image_id
== p
.image_id
&& param
.sampler_id
== p
.sampler_id
&&
2853 param
.global_image
== p
.global_image
&& param
.global_sampler
== p
.global_sampler
;
2856 if (itr
== end(caller
.combined_parameters
))
2858 uint32_t id
= compiler
.ir
.increase_bound_by(3);
2859 auto type_id
= id
+ 0;
2860 auto ptr_type_id
= id
+ 1;
2861 auto combined_id
= id
+ 2;
2862 auto &base
= compiler
.expression_type(image_id
);
2863 auto &type
= compiler
.set
<SPIRType
>(type_id
, OpTypeSampledImage
);
2864 auto &ptr_type
= compiler
.set
<SPIRType
>(ptr_type_id
, OpTypePointer
);
2867 type
.self
= type_id
;
2868 type
.basetype
= SPIRType::SampledImage
;
2869 type
.pointer
= false;
2870 type
.storage
= StorageClassGeneric
;
2871 type
.image
.depth
= depth
;
2874 ptr_type
.pointer
= true;
2875 ptr_type
.storage
= StorageClassUniformConstant
;
2876 ptr_type
.parent_type
= type_id
;
2878 // Build new variable.
2879 compiler
.set
<SPIRVariable
>(combined_id
, ptr_type_id
, StorageClassFunction
, 0);
2881 // Inherit RelaxedPrecision.
2882 // If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration.
2883 bool relaxed_precision
=
2884 compiler
.has_decoration(sampler_id
, DecorationRelaxedPrecision
) ||
2885 compiler
.has_decoration(image_id
, DecorationRelaxedPrecision
) ||
2886 (combined_module_id
&& compiler
.has_decoration(combined_module_id
, DecorationRelaxedPrecision
));
2888 if (relaxed_precision
)
2889 compiler
.set_decoration(combined_id
, DecorationRelaxedPrecision
);
2891 param
.id
= combined_id
;
2893 compiler
.set_name(combined_id
,
2894 join("SPIRV_Cross_Combined", compiler
.to_name(image_id
), compiler
.to_name(sampler_id
)));
2896 caller
.combined_parameters
.push_back(param
);
2897 caller
.shadow_arguments
.push_back({ ptr_type_id
, combined_id
, 0u, 0u, true });
2901 bool Compiler::DummySamplerForCombinedImageHandler::handle(Op opcode
, const uint32_t *args
, uint32_t length
)
2903 if (need_dummy_sampler
)
2905 // No need to traverse further, we know the result.
2916 uint32_t result_type
= args
[0];
2918 auto &type
= compiler
.get
<SPIRType
>(result_type
);
2919 bool separate_image
=
2920 type
.basetype
== SPIRType::Image
&& type
.image
.sampled
== 1 && type
.image
.dim
!= DimBuffer
;
2922 // If not separate image, don't bother.
2923 if (!separate_image
)
2926 uint32_t id
= args
[1];
2927 uint32_t ptr
= args
[2];
2928 compiler
.set
<SPIRExpression
>(id
, "", result_type
, true);
2929 compiler
.register_read(id
, ptr
, true);
2934 case OpImageQuerySizeLod
:
2935 case OpImageQuerySize
:
2936 case OpImageQueryLevels
:
2937 case OpImageQuerySamples
:
2939 // If we are fetching or querying LOD from a plain OpTypeImage, we must pre-combine with our dummy sampler.
2940 auto *var
= compiler
.maybe_get_backing_variable(args
[2]);
2943 auto &type
= compiler
.get
<SPIRType
>(var
->basetype
);
2944 if (type
.basetype
== SPIRType::Image
&& type
.image
.sampled
== 1 && type
.image
.dim
!= DimBuffer
)
2945 need_dummy_sampler
= true;
2951 case OpInBoundsAccessChain
:
2953 case OpPtrAccessChain
:
2958 uint32_t result_type
= args
[0];
2959 auto &type
= compiler
.get
<SPIRType
>(result_type
);
2960 bool separate_image
=
2961 type
.basetype
== SPIRType::Image
&& type
.image
.sampled
== 1 && type
.image
.dim
!= DimBuffer
;
2962 if (!separate_image
)
2965 uint32_t id
= args
[1];
2966 uint32_t ptr
= args
[2];
2967 compiler
.set
<SPIRExpression
>(id
, "", result_type
, true);
2968 compiler
.register_read(id
, ptr
, true);
2970 // Other backends might use SPIRAccessChain for this later.
2971 compiler
.ir
.ids
[id
].set_allow_type_rewrite();
2982 bool Compiler::CombinedImageSamplerHandler::handle(Op opcode
, const uint32_t *args
, uint32_t length
)
2984 // We need to figure out where samplers and images are loaded from, so do only the bare bones compilation we need.
2985 bool is_fetch
= false;
2994 uint32_t result_type
= args
[0];
2996 auto &type
= compiler
.get
<SPIRType
>(result_type
);
2997 bool separate_image
= type
.basetype
== SPIRType::Image
&& type
.image
.sampled
== 1;
2998 bool separate_sampler
= type
.basetype
== SPIRType::Sampler
;
3000 // If not separate image or sampler, don't bother.
3001 if (!separate_image
&& !separate_sampler
)
3004 uint32_t id
= args
[1];
3005 uint32_t ptr
= args
[2];
3006 compiler
.set
<SPIRExpression
>(id
, "", result_type
, true);
3007 compiler
.register_read(id
, ptr
, true);
3011 case OpInBoundsAccessChain
:
3013 case OpPtrAccessChain
:
3018 // Technically, it is possible to have arrays of textures and arrays of samplers and combine them, but this becomes essentially
3019 // impossible to implement, since we don't know which concrete sampler we are accessing.
3020 // One potential way is to create a combinatorial explosion where N textures and M samplers are combined into N * M sampler2Ds,
3021 // but this seems ridiculously complicated for a problem which is easy to work around.
3022 // Checking access chains like this assumes we don't have samplers or textures inside uniform structs, but this makes no sense.
3024 uint32_t result_type
= args
[0];
3026 auto &type
= compiler
.get
<SPIRType
>(result_type
);
3027 bool separate_image
= type
.basetype
== SPIRType::Image
&& type
.image
.sampled
== 1;
3028 bool separate_sampler
= type
.basetype
== SPIRType::Sampler
;
3029 if (separate_sampler
)
3031 "Attempting to use arrays or structs of separate samplers. This is not possible to statically "
3032 "remap to plain GLSL.");
3036 uint32_t id
= args
[1];
3037 uint32_t ptr
= args
[2];
3038 compiler
.set
<SPIRExpression
>(id
, "", result_type
, true);
3039 compiler
.register_read(id
, ptr
, true);
3045 case OpImageQuerySizeLod
:
3046 case OpImageQuerySize
:
3047 case OpImageQueryLevels
:
3048 case OpImageQuerySamples
:
3050 // If we are fetching from a plain OpTypeImage or querying LOD, we must pre-combine with our dummy sampler.
3051 auto *var
= compiler
.maybe_get_backing_variable(args
[2]);
3055 auto &type
= compiler
.get
<SPIRType
>(var
->basetype
);
3056 if (type
.basetype
== SPIRType::Image
&& type
.image
.sampled
== 1 && type
.image
.dim
!= DimBuffer
)
3058 if (compiler
.dummy_sampler_id
== 0)
3059 SPIRV_CROSS_THROW("texelFetch without sampler was found, but no dummy sampler has been created with "
3060 "build_dummy_sampler_for_combined_images().");
3070 case OpSampledImage
:
3078 // Registers sampler2D calls used in case they are parameters so
3079 // that their callees know which combined image samplers to propagate down the call stack.
3080 if (!functions
.empty())
3082 auto &callee
= *functions
.top();
3083 if (callee
.do_combined_parameters
)
3085 uint32_t image_id
= args
[2];
3087 auto *image
= compiler
.maybe_get_backing_variable(image_id
);
3089 image_id
= image
->self
;
3091 uint32_t sampler_id
= is_fetch
? compiler
.dummy_sampler_id
: args
[3];
3092 auto *sampler
= compiler
.maybe_get_backing_variable(sampler_id
);
3094 sampler_id
= sampler
->self
;
3096 uint32_t combined_id
= args
[1];
3098 auto &combined_type
= compiler
.get
<SPIRType
>(args
[0]);
3099 register_combined_image_sampler(callee
, combined_id
, image_id
, sampler_id
, combined_type
.image
.depth
);
3103 // For function calls, we need to remap IDs which are function parameters into global variables.
3104 // This information is statically known from the current place in the call stack.
3105 // Function parameters are not necessarily pointers, so if we don't have a backing variable, remapping will know
3106 // which backing variable the image/sample came from.
3107 VariableID image_id
= remap_parameter(args
[2]);
3108 VariableID sampler_id
= is_fetch
? compiler
.dummy_sampler_id
: remap_parameter(args
[3]);
3110 auto itr
= find_if(begin(compiler
.combined_image_samplers
), end(compiler
.combined_image_samplers
),
3111 [image_id
, sampler_id
](const CombinedImageSampler
&combined
) {
3112 return combined
.image_id
== image_id
&& combined
.sampler_id
== sampler_id
;
3115 if (itr
== end(compiler
.combined_image_samplers
))
3117 uint32_t sampled_type
;
3118 uint32_t combined_module_id
;
3121 // Have to invent the sampled image type.
3122 sampled_type
= compiler
.ir
.increase_bound_by(1);
3123 auto &type
= compiler
.set
<SPIRType
>(sampled_type
, OpTypeSampledImage
);
3124 type
= compiler
.expression_type(args
[2]);
3125 type
.self
= sampled_type
;
3126 type
.basetype
= SPIRType::SampledImage
;
3127 type
.image
.depth
= false;
3128 combined_module_id
= 0;
3132 sampled_type
= args
[0];
3133 combined_module_id
= args
[1];
3136 auto id
= compiler
.ir
.increase_bound_by(2);
3137 auto type_id
= id
+ 0;
3138 auto combined_id
= id
+ 1;
3140 // Make a new type, pointer to OpTypeSampledImage, so we can make a variable of this type.
3141 // We will probably have this type lying around, but it doesn't hurt to make duplicates for internal purposes.
3142 auto &type
= compiler
.set
<SPIRType
>(type_id
, OpTypePointer
);
3143 auto &base
= compiler
.get
<SPIRType
>(sampled_type
);
3145 type
.pointer
= true;
3146 type
.storage
= StorageClassUniformConstant
;
3147 type
.parent_type
= type_id
;
3149 // Build new variable.
3150 compiler
.set
<SPIRVariable
>(combined_id
, type_id
, StorageClassUniformConstant
, 0);
3152 // Inherit RelaxedPrecision (and potentially other useful flags if deemed relevant).
3153 // If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration.
3154 bool relaxed_precision
=
3155 (sampler_id
&& compiler
.has_decoration(sampler_id
, DecorationRelaxedPrecision
)) ||
3156 (image_id
&& compiler
.has_decoration(image_id
, DecorationRelaxedPrecision
)) ||
3157 (combined_module_id
&& compiler
.has_decoration(combined_module_id
, DecorationRelaxedPrecision
));
3159 if (relaxed_precision
)
3160 compiler
.set_decoration(combined_id
, DecorationRelaxedPrecision
);
3162 // Propagate the array type for the original image as well.
3163 auto *var
= compiler
.maybe_get_backing_variable(image_id
);
3166 auto &parent_type
= compiler
.get
<SPIRType
>(var
->basetype
);
3167 type
.array
= parent_type
.array
;
3168 type
.array_size_literal
= parent_type
.array_size_literal
;
3171 compiler
.combined_image_samplers
.push_back({ combined_id
, image_id
, sampler_id
});
3177 VariableID
Compiler::build_dummy_sampler_for_combined_images()
3179 DummySamplerForCombinedImageHandler
handler(*this);
3180 traverse_all_reachable_opcodes(get
<SPIRFunction
>(ir
.default_entry_point
), handler
);
3181 if (handler
.need_dummy_sampler
)
3183 uint32_t offset
= ir
.increase_bound_by(3);
3184 auto type_id
= offset
+ 0;
3185 auto ptr_type_id
= offset
+ 1;
3186 auto var_id
= offset
+ 2;
3188 auto &sampler
= set
<SPIRType
>(type_id
, OpTypeSampler
);
3189 sampler
.basetype
= SPIRType::Sampler
;
3191 auto &ptr_sampler
= set
<SPIRType
>(ptr_type_id
, OpTypePointer
);
3192 ptr_sampler
= sampler
;
3193 ptr_sampler
.self
= type_id
;
3194 ptr_sampler
.storage
= StorageClassUniformConstant
;
3195 ptr_sampler
.pointer
= true;
3196 ptr_sampler
.parent_type
= type_id
;
3198 set
<SPIRVariable
>(var_id
, ptr_type_id
, StorageClassUniformConstant
, 0);
3199 set_name(var_id
, "SPIRV_Cross_DummySampler");
3200 dummy_sampler_id
= var_id
;
3207 void Compiler::build_combined_image_samplers()
3209 ir
.for_each_typed_id
<SPIRFunction
>([&](uint32_t, SPIRFunction
&func
) {
3210 func
.combined_parameters
.clear();
3211 func
.shadow_arguments
.clear();
3212 func
.do_combined_parameters
= true;
3215 combined_image_samplers
.clear();
3216 CombinedImageSamplerHandler
handler(*this);
3217 traverse_all_reachable_opcodes(get
<SPIRFunction
>(ir
.default_entry_point
), handler
);
3220 SmallVector
<SpecializationConstant
> Compiler::get_specialization_constants() const
3222 SmallVector
<SpecializationConstant
> spec_consts
;
3223 ir
.for_each_typed_id
<SPIRConstant
>([&](uint32_t, const SPIRConstant
&c
) {
3224 if (c
.specialization
&& has_decoration(c
.self
, DecorationSpecId
))
3225 spec_consts
.push_back({ c
.self
, get_decoration(c
.self
, DecorationSpecId
) });
3230 SPIRConstant
&Compiler::get_constant(ConstantID id
)
3232 return get
<SPIRConstant
>(id
);
3235 const SPIRConstant
&Compiler::get_constant(ConstantID id
) const
3237 return get
<SPIRConstant
>(id
);
3240 static bool exists_unaccessed_path_to_return(const CFG
&cfg
, uint32_t block
, const unordered_set
<uint32_t> &blocks
,
3241 unordered_set
<uint32_t> &visit_cache
)
3243 // This block accesses the variable.
3244 if (blocks
.find(block
) != end(blocks
))
3247 // We are at the end of the CFG.
3248 if (cfg
.get_succeeding_edges(block
).empty())
3251 // If any of our successors have a path to the end, there exists a path from block.
3252 for (auto &succ
: cfg
.get_succeeding_edges(block
))
3254 if (visit_cache
.count(succ
) == 0)
3256 if (exists_unaccessed_path_to_return(cfg
, succ
, blocks
, visit_cache
))
3258 visit_cache
.insert(succ
);
3265 void Compiler::analyze_parameter_preservation(
3266 SPIRFunction
&entry
, const CFG
&cfg
, const unordered_map
<uint32_t, unordered_set
<uint32_t>> &variable_to_blocks
,
3267 const unordered_map
<uint32_t, unordered_set
<uint32_t>> &complete_write_blocks
)
3269 for (auto &arg
: entry
.arguments
)
3271 // Non-pointers are always inputs.
3272 auto &type
= get
<SPIRType
>(arg
.type
);
3276 // Opaque argument types are always in
3277 bool potential_preserve
;
3278 switch (type
.basetype
)
3280 case SPIRType::Sampler
:
3281 case SPIRType::Image
:
3282 case SPIRType::SampledImage
:
3283 case SPIRType::AtomicCounter
:
3284 potential_preserve
= false;
3288 potential_preserve
= true;
3292 if (!potential_preserve
)
3295 auto itr
= variable_to_blocks
.find(arg
.id
);
3296 if (itr
== end(variable_to_blocks
))
3298 // Variable is never accessed.
3302 // We have accessed a variable, but there was no complete writes to that variable.
3303 // We deduce that we must preserve the argument.
3304 itr
= complete_write_blocks
.find(arg
.id
);
3305 if (itr
== end(complete_write_blocks
))
3311 // If there is a path through the CFG where no block completely writes to the variable, the variable will be in an undefined state
3312 // when the function returns. We therefore need to implicitly preserve the variable in case there are writers in the function.
3313 // Major case here is if a function is
3314 // void foo(int &var) { if (cond) var = 10; }
3315 // Using read/write counts, we will think it's just an out variable, but it really needs to be inout,
3316 // because if we don't write anything whatever we put into the function must return back to the caller.
3317 unordered_set
<uint32_t> visit_cache
;
3318 if (exists_unaccessed_path_to_return(cfg
, entry
.entry_block
, itr
->second
, visit_cache
))
3323 Compiler::AnalyzeVariableScopeAccessHandler::AnalyzeVariableScopeAccessHandler(Compiler
&compiler_
,
3324 SPIRFunction
&entry_
)
3325 : compiler(compiler_
)
3330 bool Compiler::AnalyzeVariableScopeAccessHandler::follow_function_call(const SPIRFunction
&)
3332 // Only analyze within this function.
3336 void Compiler::AnalyzeVariableScopeAccessHandler::set_current_block(const SPIRBlock
&block
)
3338 current_block
= &block
;
3340 // If we're branching to a block which uses OpPhi, in GLSL
3341 // this will be a variable write when we branch,
3342 // so we need to track access to these variables as well to
3343 // have a complete picture.
3344 const auto test_phi
= [this, &block
](uint32_t to
) {
3345 auto &next
= compiler
.get
<SPIRBlock
>(to
);
3346 for (auto &phi
: next
.phi_variables
)
3348 if (phi
.parent
== block
.self
)
3350 accessed_variables_to_block
[phi
.function_variable
].insert(block
.self
);
3351 // Phi variables are also accessed in our target branch block.
3352 accessed_variables_to_block
[phi
.function_variable
].insert(next
.self
);
3354 notify_variable_access(phi
.local_variable
, block
.self
);
3359 switch (block
.terminator
)
3361 case SPIRBlock::Direct
:
3362 notify_variable_access(block
.condition
, block
.self
);
3363 test_phi(block
.next_block
);
3366 case SPIRBlock::Select
:
3367 notify_variable_access(block
.condition
, block
.self
);
3368 test_phi(block
.true_block
);
3369 test_phi(block
.false_block
);
3372 case SPIRBlock::MultiSelect
:
3374 notify_variable_access(block
.condition
, block
.self
);
3375 auto &cases
= compiler
.get_case_list(block
);
3376 for (auto &target
: cases
)
3377 test_phi(target
.block
);
3378 if (block
.default_block
)
3379 test_phi(block
.default_block
);
3388 void Compiler::AnalyzeVariableScopeAccessHandler::notify_variable_access(uint32_t id
, uint32_t block
)
3393 // Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers.
3394 auto itr
= rvalue_forward_children
.find(id
);
3395 if (itr
!= end(rvalue_forward_children
))
3396 for (auto child_id
: itr
->second
)
3397 notify_variable_access(child_id
, block
);
3399 if (id_is_phi_variable(id
))
3400 accessed_variables_to_block
[id
].insert(block
);
3401 else if (id_is_potential_temporary(id
))
3402 accessed_temporaries_to_block
[id
].insert(block
);
3405 bool Compiler::AnalyzeVariableScopeAccessHandler::id_is_phi_variable(uint32_t id
) const
3407 if (id
>= compiler
.get_current_id_bound())
3409 auto *var
= compiler
.maybe_get
<SPIRVariable
>(id
);
3410 return var
&& var
->phi_variable
;
3413 bool Compiler::AnalyzeVariableScopeAccessHandler::id_is_potential_temporary(uint32_t id
) const
3415 if (id
>= compiler
.get_current_id_bound())
3418 // Temporaries are not created before we start emitting code.
3419 return compiler
.ir
.ids
[id
].empty() || (compiler
.ir
.ids
[id
].get_type() == TypeExpression
);
3422 bool Compiler::AnalyzeVariableScopeAccessHandler::handle_terminator(const SPIRBlock
&block
)
3424 switch (block
.terminator
)
3426 case SPIRBlock::Return
:
3427 if (block
.return_value
)
3428 notify_variable_access(block
.return_value
, block
.self
);
3431 case SPIRBlock::Select
:
3432 case SPIRBlock::MultiSelect
:
3433 notify_variable_access(block
.condition
, block
.self
);
3443 bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op
, const uint32_t *args
, uint32_t length
)
3445 // Keep track of the types of temporaries, so we can hoist them out as necessary.
3446 uint32_t result_type
= 0, result_id
= 0;
3447 if (compiler
.instruction_to_result_type(result_type
, result_id
, op
, args
, length
))
3449 // For some opcodes, we will need to override the result id.
3450 // If we need to hoist the temporary, the temporary type is the input, not the result.
3451 if (op
== OpConvertUToAccelerationStructureKHR
)
3453 auto itr
= result_id_to_type
.find(args
[2]);
3454 if (itr
!= result_id_to_type
.end())
3455 result_type
= itr
->second
;
3458 result_id_to_type
[result_id
] = result_type
;
3469 auto *var
= compiler
.maybe_get_backing_variable(ptr
);
3471 // If we store through an access chain, we have a partial write.
3474 accessed_variables_to_block
[var
->self
].insert(current_block
->self
);
3475 if (var
->self
== ptr
)
3476 complete_write_variables_to_block
[var
->self
].insert(current_block
->self
);
3478 partial_write_variables_to_block
[var
->self
].insert(current_block
->self
);
3481 // args[0] might be an access chain we have to track use of.
3482 notify_variable_access(args
[0], current_block
->self
);
3483 // Might try to store a Phi variable here.
3484 notify_variable_access(args
[1], current_block
->self
);
3489 case OpInBoundsAccessChain
:
3490 case OpPtrAccessChain
:
3495 // Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers.
3496 uint32_t ptr
= args
[2];
3497 auto *var
= compiler
.maybe_get
<SPIRVariable
>(ptr
);
3500 accessed_variables_to_block
[var
->self
].insert(current_block
->self
);
3501 rvalue_forward_children
[args
[1]].insert(var
->self
);
3504 // args[2] might be another access chain we have to track use of.
3505 for (uint32_t i
= 2; i
< length
; i
++)
3507 notify_variable_access(args
[i
], current_block
->self
);
3508 rvalue_forward_children
[args
[1]].insert(args
[i
]);
3511 // Also keep track of the access chain pointer itself.
3512 // In exceptionally rare cases, we can end up with a case where
3513 // the access chain is generated in the loop body, but is consumed in continue block.
3514 // This means we need complex loop workarounds, and we must detect this via CFG analysis.
3515 notify_variable_access(args
[1], current_block
->self
);
3517 // The result of an access chain is a fixed expression and is not really considered a temporary.
3518 auto &e
= compiler
.set
<SPIRExpression
>(args
[1], "", args
[0], true);
3519 auto *backing_variable
= compiler
.maybe_get_backing_variable(ptr
);
3520 e
.loaded_from
= backing_variable
? VariableID(backing_variable
->self
) : VariableID(0);
3522 // Other backends might use SPIRAccessChain for this later.
3523 compiler
.ir
.ids
[args
[1]].set_allow_type_rewrite();
3524 access_chain_expressions
.insert(args
[1]);
3535 auto *var
= compiler
.maybe_get_backing_variable(lhs
);
3537 // If we store through an access chain, we have a partial write.
3540 accessed_variables_to_block
[var
->self
].insert(current_block
->self
);
3541 if (var
->self
== lhs
)
3542 complete_write_variables_to_block
[var
->self
].insert(current_block
->self
);
3544 partial_write_variables_to_block
[var
->self
].insert(current_block
->self
);
3547 // args[0:1] might be access chains we have to track use of.
3548 for (uint32_t i
= 0; i
< 2; i
++)
3549 notify_variable_access(args
[i
], current_block
->self
);
3551 var
= compiler
.maybe_get_backing_variable(rhs
);
3553 accessed_variables_to_block
[var
->self
].insert(current_block
->self
);
3559 // OpCopyObject copies the underlying non-pointer type,
3560 // so any temp variable should be declared using the underlying type.
3561 // If the type is a pointer, get its base type and overwrite the result type mapping.
3562 auto &type
= compiler
.get
<SPIRType
>(result_type
);
3564 result_id_to_type
[result_id
] = type
.parent_type
;
3569 auto *var
= compiler
.maybe_get_backing_variable(args
[2]);
3571 accessed_variables_to_block
[var
->self
].insert(current_block
->self
);
3573 // Might be an access chain which we have to keep track of.
3574 notify_variable_access(args
[1], current_block
->self
);
3575 if (access_chain_expressions
.count(args
[2]))
3576 access_chain_expressions
.insert(args
[1]);
3578 // Might try to copy a Phi variable here.
3579 notify_variable_access(args
[2], current_block
->self
);
3587 uint32_t ptr
= args
[2];
3588 auto *var
= compiler
.maybe_get_backing_variable(ptr
);
3590 accessed_variables_to_block
[var
->self
].insert(current_block
->self
);
3592 // Loaded value is a temporary.
3593 notify_variable_access(args
[1], current_block
->self
);
3595 // Might be an access chain we have to track use of.
3596 notify_variable_access(args
[2], current_block
->self
);
3598 // If we're loading an opaque type we cannot lower it to a temporary,
3599 // we must defer access of args[2] until it's used.
3600 auto &type
= compiler
.get
<SPIRType
>(args
[0]);
3601 if (compiler
.type_is_opaque_value(type
))
3602 rvalue_forward_children
[args
[1]].insert(args
[2]);
3606 case OpFunctionCall
:
3611 // Return value may be a temporary.
3612 if (compiler
.get_type(args
[0]).basetype
!= SPIRType::Void
)
3613 notify_variable_access(args
[1], current_block
->self
);
3618 for (uint32_t i
= 0; i
< length
; i
++)
3620 auto *var
= compiler
.maybe_get_backing_variable(args
[i
]);
3623 accessed_variables_to_block
[var
->self
].insert(current_block
->self
);
3624 // Assume we can get partial writes to this variable.
3625 partial_write_variables_to_block
[var
->self
].insert(current_block
->self
);
3628 // Cannot easily prove if argument we pass to a function is completely written.
3629 // Usually, functions write to a dummy variable,
3630 // which is then copied to in full to the real argument.
3632 // Might try to copy a Phi variable here.
3633 notify_variable_access(args
[i
], current_block
->self
);
3640 // In case of variable pointers, we might access a variable here.
3641 // We cannot prove anything about these accesses however.
3642 for (uint32_t i
= 1; i
< length
; i
++)
3646 auto *var
= compiler
.maybe_get_backing_variable(args
[i
]);
3649 accessed_variables_to_block
[var
->self
].insert(current_block
->self
);
3650 // Assume we can get partial writes to this variable.
3651 partial_write_variables_to_block
[var
->self
].insert(current_block
->self
);
3655 // Might try to copy a Phi variable here.
3656 notify_variable_access(args
[i
], current_block
->self
);
3663 for (uint32_t i
= 4; i
< length
; i
++)
3664 notify_variable_access(args
[i
], current_block
->self
);
3665 notify_variable_access(args
[1], current_block
->self
);
3667 uint32_t extension_set
= args
[2];
3668 if (compiler
.get
<SPIRExtension
>(extension_set
).ext
== SPIRExtension::GLSL
)
3670 auto op_450
= static_cast<GLSLstd450
>(args
[3]);
3673 case GLSLstd450Modf
:
3674 case GLSLstd450Frexp
:
3676 uint32_t ptr
= args
[5];
3677 auto *var
= compiler
.maybe_get_backing_variable(ptr
);
3680 accessed_variables_to_block
[var
->self
].insert(current_block
->self
);
3681 if (var
->self
== ptr
)
3682 complete_write_variables_to_block
[var
->self
].insert(current_block
->self
);
3684 partial_write_variables_to_block
[var
->self
].insert(current_block
->self
);
3697 // Only result is a temporary.
3698 notify_variable_access(args
[1], current_block
->self
);
3703 // Uses literals, but cannot be a phi variable or temporary, so ignore.
3706 // Atomics shouldn't be able to access function-local variables.
3707 // Some GLSL builtins access a pointer.
3709 case OpCompositeInsert
:
3710 case OpVectorShuffle
:
3711 // Specialize for opcode which contains literals.
3712 for (uint32_t i
= 1; i
< 4; i
++)
3713 notify_variable_access(args
[i
], current_block
->self
);
3716 case OpCompositeExtract
:
3717 // Specialize for opcode which contains literals.
3718 for (uint32_t i
= 1; i
< 3; i
++)
3719 notify_variable_access(args
[i
], current_block
->self
);
3723 for (uint32_t i
= 0; i
< length
; i
++)
3725 // Argument 3 is a literal.
3727 notify_variable_access(args
[i
], current_block
->self
);
3731 case OpImageSampleImplicitLod
:
3732 case OpImageSampleExplicitLod
:
3733 case OpImageSparseSampleImplicitLod
:
3734 case OpImageSparseSampleExplicitLod
:
3735 case OpImageSampleProjImplicitLod
:
3736 case OpImageSampleProjExplicitLod
:
3737 case OpImageSparseSampleProjImplicitLod
:
3738 case OpImageSparseSampleProjExplicitLod
:
3740 case OpImageSparseFetch
:
3742 case OpImageSparseRead
:
3743 for (uint32_t i
= 1; i
< length
; i
++)
3745 // Argument 4 is a literal.
3747 notify_variable_access(args
[i
], current_block
->self
);
3751 case OpImageSampleDrefImplicitLod
:
3752 case OpImageSampleDrefExplicitLod
:
3753 case OpImageSparseSampleDrefImplicitLod
:
3754 case OpImageSparseSampleDrefExplicitLod
:
3755 case OpImageSampleProjDrefImplicitLod
:
3756 case OpImageSampleProjDrefExplicitLod
:
3757 case OpImageSparseSampleProjDrefImplicitLod
:
3758 case OpImageSparseSampleProjDrefExplicitLod
:
3760 case OpImageSparseGather
:
3761 case OpImageDrefGather
:
3762 case OpImageSparseDrefGather
:
3763 for (uint32_t i
= 1; i
< length
; i
++)
3765 // Argument 5 is a literal.
3767 notify_variable_access(args
[i
], current_block
->self
);
3773 // Rather dirty way of figuring out where Phi variables are used.
3774 // As long as only IDs are used, we can scan through instructions and try to find any evidence that
3775 // the ID of a variable has been used.
3776 // There are potential false positives here where a literal is used in-place of an ID,
3777 // but worst case, it does not affect the correctness of the compile.
3778 // Exhaustive analysis would be better here, but it's not worth it for now.
3779 for (uint32_t i
= 0; i
< length
; i
++)
3780 notify_variable_access(args
[i
], current_block
->self
);
3787 Compiler::StaticExpressionAccessHandler::StaticExpressionAccessHandler(Compiler
&compiler_
, uint32_t variable_id_
)
3788 : compiler(compiler_
)
3789 , variable_id(variable_id_
)
3793 bool Compiler::StaticExpressionAccessHandler::follow_function_call(const SPIRFunction
&)
3798 bool Compiler::StaticExpressionAccessHandler::handle(spv::Op op
, const uint32_t *args
, uint32_t length
)
3805 if (args
[0] == variable_id
)
3807 static_expression
= args
[1];
3815 if (args
[2] == variable_id
&& static_expression
== 0) // Tried to read from variable before it was initialized.
3820 case OpInBoundsAccessChain
:
3821 case OpPtrAccessChain
:
3824 if (args
[2] == variable_id
) // If we try to access chain our candidate variable before we store to it, bail.
3835 void Compiler::find_function_local_luts(SPIRFunction
&entry
, const AnalyzeVariableScopeAccessHandler
&handler
,
3836 bool single_function
)
3838 auto &cfg
= *function_cfgs
.find(entry
.self
)->second
;
3840 // For each variable which is statically accessed.
3841 for (auto &accessed_var
: handler
.accessed_variables_to_block
)
3843 auto &blocks
= accessed_var
.second
;
3844 auto &var
= get
<SPIRVariable
>(accessed_var
.first
);
3845 auto &type
= expression_type(accessed_var
.first
);
3847 // First check if there are writes to the variable. Later, if there are none, we'll
3848 // reconsider it as globally accessed LUT.
3849 if (!var
.is_written_to
)
3851 var
.is_written_to
= handler
.complete_write_variables_to_block
.count(var
.self
) != 0 ||
3852 handler
.partial_write_variables_to_block
.count(var
.self
) != 0;
3855 // Only consider function local variables here.
3856 // If we only have a single function in our CFG, private storage is also fine,
3857 // since it behaves like a function local variable.
3858 bool allow_lut
= var
.storage
== StorageClassFunction
|| (single_function
&& var
.storage
== StorageClassPrivate
);
3862 // We cannot be a phi variable.
3863 if (var
.phi_variable
)
3866 // Only consider arrays here.
3867 if (type
.array
.empty())
3870 // If the variable has an initializer, make sure it is a constant expression.
3871 uint32_t static_constant_expression
= 0;
3872 if (var
.initializer
)
3874 if (ir
.ids
[var
.initializer
].get_type() != TypeConstant
)
3876 static_constant_expression
= var
.initializer
;
3878 // There can be no stores to this variable, we have now proved we have a LUT.
3879 if (var
.is_written_to
)
3884 // We can have one, and only one write to the variable, and that write needs to be a constant.
3886 // No partial writes allowed.
3887 if (handler
.partial_write_variables_to_block
.count(var
.self
) != 0)
3890 auto itr
= handler
.complete_write_variables_to_block
.find(var
.self
);
3893 if (itr
== end(handler
.complete_write_variables_to_block
))
3896 // We write to the variable in more than one block.
3897 auto &write_blocks
= itr
->second
;
3898 if (write_blocks
.size() != 1)
3901 // The write needs to happen in the dominating block.
3902 DominatorBuilder
builder(cfg
);
3903 for (auto &block
: blocks
)
3904 builder
.add_block(block
);
3905 uint32_t dominator
= builder
.get_dominator();
3907 // The complete write happened in a branch or similar, cannot deduce static expression.
3908 if (write_blocks
.count(dominator
) == 0)
3911 // Find the static expression for this variable.
3912 StaticExpressionAccessHandler
static_expression_handler(*this, var
.self
);
3913 traverse_all_reachable_opcodes(get
<SPIRBlock
>(dominator
), static_expression_handler
);
3915 // We want one, and exactly one write
3916 if (static_expression_handler
.write_count
!= 1 || static_expression_handler
.static_expression
== 0)
3919 // Is it a constant expression?
3920 if (ir
.ids
[static_expression_handler
.static_expression
].get_type() != TypeConstant
)
3924 static_constant_expression
= static_expression_handler
.static_expression
;
3927 get
<SPIRConstant
>(static_constant_expression
).is_used_as_lut
= true;
3928 var
.static_expression
= static_constant_expression
;
3929 var
.statically_assigned
= true;
3930 var
.remapped_variable
= true;
3934 void Compiler::analyze_variable_scope(SPIRFunction
&entry
, AnalyzeVariableScopeAccessHandler
&handler
)
3936 // First, we map out all variable access within a function.
3937 // Essentially a map of block -> { variables accessed in the basic block }
3938 traverse_all_reachable_opcodes(entry
, handler
);
3940 auto &cfg
= *function_cfgs
.find(entry
.self
)->second
;
3942 // Analyze if there are parameters which need to be implicitly preserved with an "in" qualifier.
3943 analyze_parameter_preservation(entry
, cfg
, handler
.accessed_variables_to_block
,
3944 handler
.complete_write_variables_to_block
);
3946 unordered_map
<uint32_t, uint32_t> potential_loop_variables
;
3948 // Find the loop dominator block for each block.
3949 for (auto &block_id
: entry
.blocks
)
3951 auto &block
= get
<SPIRBlock
>(block_id
);
3953 auto itr
= ir
.continue_block_to_loop_header
.find(block_id
);
3954 if (itr
!= end(ir
.continue_block_to_loop_header
) && itr
->second
!= block_id
)
3956 // Continue block might be unreachable in the CFG, but we still like to know the loop dominator.
3957 // Edge case is when continue block is also the loop header, don't set the dominator in this case.
3958 block
.loop_dominator
= itr
->second
;
3962 uint32_t loop_dominator
= cfg
.find_loop_dominator(block_id
);
3963 if (loop_dominator
!= block_id
)
3964 block
.loop_dominator
= loop_dominator
;
3966 block
.loop_dominator
= SPIRBlock::NoDominator
;
3970 // For each variable which is statically accessed.
3971 for (auto &var
: handler
.accessed_variables_to_block
)
3973 // Only deal with variables which are considered local variables in this function.
3974 if (find(begin(entry
.local_variables
), end(entry
.local_variables
), VariableID(var
.first
)) ==
3975 end(entry
.local_variables
))
3978 DominatorBuilder
builder(cfg
);
3979 auto &blocks
= var
.second
;
3980 auto &type
= expression_type(var
.first
);
3981 BlockID potential_continue_block
= 0;
3983 // Figure out which block is dominating all accesses of those variables.
3984 for (auto &block
: blocks
)
3986 // If we're accessing a variable inside a continue block, this variable might be a loop variable.
3987 // We can only use loop variables with scalars, as we cannot track static expressions for vectors.
3988 if (is_continue(block
))
3990 // Potentially awkward case to check for.
3991 // We might have a variable inside a loop, which is touched by the continue block,
3992 // but is not actually a loop variable.
3993 // The continue block is dominated by the inner part of the loop, which does not make sense in high-level
3994 // language output because it will be declared before the body,
3995 // so we will have to lift the dominator up to the relevant loop header instead.
3996 builder
.add_block(ir
.continue_block_to_loop_header
[block
]);
3998 // Arrays or structs cannot be loop variables.
3999 if (type
.vecsize
== 1 && type
.columns
== 1 && type
.basetype
!= SPIRType::Struct
&& type
.array
.empty())
4001 // The variable is used in multiple continue blocks, this is not a loop
4002 // candidate, signal that by setting block to -1u.
4003 if (potential_continue_block
== 0)
4004 potential_continue_block
= block
;
4006 potential_continue_block
= ~(0u);
4010 builder
.add_block(block
);
4013 builder
.lift_continue_block_dominator();
4015 // Add it to a per-block list of variables.
4016 BlockID dominating_block
= builder
.get_dominator();
4018 if (dominating_block
&& potential_continue_block
!= 0 && potential_continue_block
!= ~0u)
4020 auto &inner_block
= get
<SPIRBlock
>(dominating_block
);
4022 BlockID merge_candidate
= 0;
4024 // Analyze the dominator. If it lives in a different loop scope than the candidate continue
4025 // block, reject the loop variable candidate.
4026 if (inner_block
.merge
== SPIRBlock::MergeLoop
)
4027 merge_candidate
= inner_block
.merge_block
;
4028 else if (inner_block
.loop_dominator
!= SPIRBlock::NoDominator
)
4029 merge_candidate
= get
<SPIRBlock
>(inner_block
.loop_dominator
).merge_block
;
4031 if (merge_candidate
!= 0 && cfg
.is_reachable(merge_candidate
))
4033 // If the merge block has a higher post-visit order, we know that continue candidate
4034 // cannot reach the merge block, and we have two separate scopes.
4035 if (!cfg
.is_reachable(potential_continue_block
) ||
4036 cfg
.get_visit_order(merge_candidate
) > cfg
.get_visit_order(potential_continue_block
))
4038 potential_continue_block
= 0;
4043 if (potential_continue_block
!= 0 && potential_continue_block
!= ~0u)
4044 potential_loop_variables
[var
.first
] = potential_continue_block
;
4046 // For variables whose dominating block is inside a loop, there is a risk that these variables
4047 // actually need to be preserved across loop iterations. We can express this by adding
4048 // a "read" access to the loop header.
4049 // In the dominating block, we must see an OpStore or equivalent as the first access of an OpVariable.
4050 // Should that fail, we look for the outermost loop header and tack on an access there.
4051 // Phi nodes cannot have this problem.
4052 if (dominating_block
)
4054 auto &variable
= get
<SPIRVariable
>(var
.first
);
4055 if (!variable
.phi_variable
)
4057 auto *block
= &get
<SPIRBlock
>(dominating_block
);
4058 bool preserve
= may_read_undefined_variable_in_block(*block
, var
.first
);
4061 // Find the outermost loop scope.
4062 while (block
->loop_dominator
!= BlockID(SPIRBlock::NoDominator
))
4063 block
= &get
<SPIRBlock
>(block
->loop_dominator
);
4065 if (block
->self
!= dominating_block
)
4067 builder
.add_block(block
->self
);
4068 dominating_block
= builder
.get_dominator();
4074 // If all blocks here are dead code, this will be 0, so the variable in question
4075 // will be completely eliminated.
4076 if (dominating_block
)
4078 auto &block
= get
<SPIRBlock
>(dominating_block
);
4079 block
.dominated_variables
.push_back(var
.first
);
4080 get
<SPIRVariable
>(var
.first
).dominator
= dominating_block
;
4084 for (auto &var
: handler
.accessed_temporaries_to_block
)
4086 auto itr
= handler
.result_id_to_type
.find(var
.first
);
4088 if (itr
== end(handler
.result_id_to_type
))
4090 // We found a false positive ID being used, ignore.
4091 // This should probably be an assert.
4095 // There is no point in doing domination analysis for opaque types.
4096 auto &type
= get
<SPIRType
>(itr
->second
);
4097 if (type_is_opaque_value(type
))
4100 DominatorBuilder
builder(cfg
);
4101 bool force_temporary
= false;
4102 bool used_in_header_hoisted_continue_block
= false;
4104 // Figure out which block is dominating all accesses of those temporaries.
4105 auto &blocks
= var
.second
;
4106 for (auto &block
: blocks
)
4108 builder
.add_block(block
);
4110 if (blocks
.size() != 1 && is_continue(block
))
4112 // The risk here is that inner loop can dominate the continue block.
4113 // Any temporary we access in the continue block must be declared before the loop.
4114 // This is moot for complex loops however.
4115 auto &loop_header_block
= get
<SPIRBlock
>(ir
.continue_block_to_loop_header
[block
]);
4116 assert(loop_header_block
.merge
== SPIRBlock::MergeLoop
);
4117 builder
.add_block(loop_header_block
.self
);
4118 used_in_header_hoisted_continue_block
= true;
4122 uint32_t dominating_block
= builder
.get_dominator();
4124 if (blocks
.size() != 1 && is_single_block_loop(dominating_block
))
4126 // Awkward case, because the loop header is also the continue block,
4127 // so hoisting to loop header does not help.
4128 force_temporary
= true;
4131 if (dominating_block
)
4133 // If we touch a variable in the dominating block, this is the expected setup.
4134 // SPIR-V normally mandates this, but we have extra cases for temporary use inside loops.
4135 bool first_use_is_dominator
= blocks
.count(dominating_block
) != 0;
4137 if (!first_use_is_dominator
|| force_temporary
)
4139 if (handler
.access_chain_expressions
.count(var
.first
))
4141 // Exceptionally rare case.
4142 // We cannot declare temporaries of access chains (except on MSL perhaps with pointers).
4143 // Rather than do that, we force the indexing expressions to be declared in the right scope by
4144 // tracking their usage to that end. There is no temporary to hoist.
4145 // However, we still need to observe declaration order of the access chain.
4147 if (used_in_header_hoisted_continue_block
)
4149 // For this scenario, we used an access chain inside a continue block where we also registered an access to header block.
4150 // This is a problem as we need to declare an access chain properly first with full definition.
4151 // We cannot use temporaries for these expressions,
4152 // so we must make sure the access chain is declared ahead of time.
4153 // Force a complex for loop to deal with this.
4154 // TODO: Out-of-order declaring for loops where continue blocks are emitted last might be another option.
4155 auto &loop_header_block
= get
<SPIRBlock
>(dominating_block
);
4156 assert(loop_header_block
.merge
== SPIRBlock::MergeLoop
);
4157 loop_header_block
.complex_continue
= true;
4162 // This should be very rare, but if we try to declare a temporary inside a loop,
4163 // and that temporary is used outside the loop as well (spirv-opt inliner likes this)
4164 // we should actually emit the temporary outside the loop.
4165 hoisted_temporaries
.insert(var
.first
);
4166 forced_temporaries
.insert(var
.first
);
4168 auto &block_temporaries
= get
<SPIRBlock
>(dominating_block
).declare_temporary
;
4169 block_temporaries
.emplace_back(handler
.result_id_to_type
[var
.first
], var
.first
);
4172 else if (blocks
.size() > 1)
4174 // Keep track of the temporary as we might have to declare this temporary.
4175 // This can happen if the loop header dominates a temporary, but we have a complex fallback loop.
4176 // In this case, the header is actually inside the for (;;) {} block, and we have problems.
4177 // What we need to do is hoist the temporaries outside the for (;;) {} block in case the header block
4178 // declares the temporary.
4179 auto &block_temporaries
= get
<SPIRBlock
>(dominating_block
).potential_declare_temporary
;
4180 block_temporaries
.emplace_back(handler
.result_id_to_type
[var
.first
], var
.first
);
4185 unordered_set
<uint32_t> seen_blocks
;
4187 // Now, try to analyze whether or not these variables are actually loop variables.
4188 for (auto &loop_variable
: potential_loop_variables
)
4190 auto &var
= get
<SPIRVariable
>(loop_variable
.first
);
4191 auto dominator
= var
.dominator
;
4192 BlockID block
= loop_variable
.second
;
4194 // The variable was accessed in multiple continue blocks, ignore.
4195 if (block
== BlockID(~(0u)) || block
== BlockID(0))
4199 if (dominator
== ID(0))
4204 // Find the loop header for this block if we are a continue block.
4206 auto itr
= ir
.continue_block_to_loop_header
.find(block
);
4207 if (itr
!= end(ir
.continue_block_to_loop_header
))
4209 header
= itr
->second
;
4211 else if (get
<SPIRBlock
>(block
).continue_block
== block
)
4213 // Also check for self-referential continue block.
4219 auto &header_block
= get
<SPIRBlock
>(header
);
4220 auto &blocks
= handler
.accessed_variables_to_block
[loop_variable
.first
];
4222 // If a loop variable is not used before the loop, it's probably not a loop variable.
4223 bool has_accessed_variable
= blocks
.count(header
) != 0;
4225 // Now, there are two conditions we need to meet for the variable to be a loop variable.
4226 // 1. The dominating block must have a branch-free path to the loop header,
4227 // this way we statically know which expression should be part of the loop variable initializer.
4229 // Walk from the dominator, if there is one straight edge connecting
4230 // dominator and loop header, we statically know the loop initializer.
4231 bool static_loop_init
= true;
4232 while (dominator
!= header
)
4234 if (blocks
.count(dominator
) != 0)
4235 has_accessed_variable
= true;
4237 auto &succ
= cfg
.get_succeeding_edges(dominator
);
4238 if (succ
.size() != 1)
4240 static_loop_init
= false;
4244 auto &pred
= cfg
.get_preceding_edges(succ
.front());
4245 if (pred
.size() != 1 || pred
.front() != dominator
)
4247 static_loop_init
= false;
4251 dominator
= succ
.front();
4254 if (!static_loop_init
|| !has_accessed_variable
)
4257 // The second condition we need to meet is that no access after the loop
4258 // merge can occur. Walk the CFG to see if we find anything.
4260 seen_blocks
.clear();
4261 cfg
.walk_from(seen_blocks
, header_block
.merge_block
, [&](uint32_t walk_block
) -> bool {
4262 // We found a block which accesses the variable outside the loop.
4263 if (blocks
.find(walk_block
) != end(blocks
))
4264 static_loop_init
= false;
4268 if (!static_loop_init
)
4271 // We have a loop variable.
4272 header_block
.loop_variables
.push_back(loop_variable
.first
);
4273 // Need to sort here as variables come from an unordered container, and pushing stuff in wrong order
4274 // will break reproducability in regression runs.
4275 sort(begin(header_block
.loop_variables
), end(header_block
.loop_variables
));
4276 get
<SPIRVariable
>(loop_variable
.first
).loop_variable
= true;
4280 bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock
&block
, uint32_t var
)
4282 for (auto &op
: block
.ops
)
4284 auto *ops
= stream(op
);
4294 case OpInBoundsAccessChain
:
4295 case OpPtrAccessChain
:
4296 // Access chains are generally used to partially read and write. It's too hard to analyze
4297 // if all constituents are written fully before continuing, so just assume it's preserved.
4298 // This is the same as the parameter preservation analysis.
4304 // Variable pointers.
4305 // We might read before writing.
4306 if (ops
[3] == var
|| ops
[4] == var
)
4312 // Variable pointers.
4313 // We might read before writing.
4317 uint32_t count
= op
.length
- 2;
4318 for (uint32_t i
= 0; i
< count
; i
+= 2)
4319 if (ops
[i
+ 2] == var
)
4330 case OpFunctionCall
:
4335 // May read before writing.
4336 uint32_t count
= op
.length
- 3;
4337 for (uint32_t i
= 0; i
< count
; i
++)
4338 if (ops
[i
+ 3] == var
)
4348 // Not accessed somehow, at least not in a usual fashion.
4349 // It's likely accessed in a branch, so assume we must preserve.
4353 Bitset
Compiler::get_buffer_block_flags(VariableID id
) const
4355 return ir
.get_buffer_block_flags(get
<SPIRVariable
>(id
));
4358 bool Compiler::get_common_basic_type(const SPIRType
&type
, SPIRType::BaseType
&base_type
)
4360 if (type
.basetype
== SPIRType::Struct
)
4362 base_type
= SPIRType::Unknown
;
4363 for (auto &member_type
: type
.member_types
)
4365 SPIRType::BaseType member_base
;
4366 if (!get_common_basic_type(get
<SPIRType
>(member_type
), member_base
))
4369 if (base_type
== SPIRType::Unknown
)
4370 base_type
= member_base
;
4371 else if (base_type
!= member_base
)
4378 base_type
= type
.basetype
;
4383 void Compiler::ActiveBuiltinHandler::handle_builtin(const SPIRType
&type
, BuiltIn builtin
,
4384 const Bitset
&decoration_flags
)
4386 // If used, we will need to explicitly declare a new array size for these builtins.
4388 if (builtin
== BuiltInClipDistance
)
4390 if (!type
.array_size_literal
[0])
4391 SPIRV_CROSS_THROW("Array size for ClipDistance must be a literal.");
4392 uint32_t array_size
= type
.array
[0];
4393 if (array_size
== 0)
4394 SPIRV_CROSS_THROW("Array size for ClipDistance must not be unsized.");
4395 compiler
.clip_distance_count
= array_size
;
4397 else if (builtin
== BuiltInCullDistance
)
4399 if (!type
.array_size_literal
[0])
4400 SPIRV_CROSS_THROW("Array size for CullDistance must be a literal.");
4401 uint32_t array_size
= type
.array
[0];
4402 if (array_size
== 0)
4403 SPIRV_CROSS_THROW("Array size for CullDistance must not be unsized.");
4404 compiler
.cull_distance_count
= array_size
;
4406 else if (builtin
== BuiltInPosition
)
4408 if (decoration_flags
.get(DecorationInvariant
))
4409 compiler
.position_invariant
= true;
4413 void Compiler::ActiveBuiltinHandler::add_if_builtin(uint32_t id
, bool allow_blocks
)
4415 // Only handle plain variables here.
4416 // Builtins which are part of a block are handled in AccessChain.
4417 // If allow_blocks is used however, this is to handle initializers of blocks,
4418 // which implies that all members are written to.
4420 auto *var
= compiler
.maybe_get
<SPIRVariable
>(id
);
4421 auto *m
= compiler
.ir
.find_meta(id
);
4424 auto &type
= compiler
.get
<SPIRType
>(var
->basetype
);
4425 auto &decorations
= m
->decoration
;
4426 auto &flags
= type
.storage
== StorageClassInput
?
4427 compiler
.active_input_builtins
: compiler
.active_output_builtins
;
4428 if (decorations
.builtin
)
4430 flags
.set(decorations
.builtin_type
);
4431 handle_builtin(type
, decorations
.builtin_type
, decorations
.decoration_flags
);
4433 else if (allow_blocks
&& compiler
.has_decoration(type
.self
, DecorationBlock
))
4435 uint32_t member_count
= uint32_t(type
.member_types
.size());
4436 for (uint32_t i
= 0; i
< member_count
; i
++)
4438 if (compiler
.has_member_decoration(type
.self
, i
, DecorationBuiltIn
))
4440 auto &member_type
= compiler
.get
<SPIRType
>(type
.member_types
[i
]);
4441 BuiltIn builtin
= BuiltIn(compiler
.get_member_decoration(type
.self
, i
, DecorationBuiltIn
));
4443 handle_builtin(member_type
, builtin
, compiler
.get_member_decoration_bitset(type
.self
, i
));
4450 void Compiler::ActiveBuiltinHandler::add_if_builtin(uint32_t id
)
4452 add_if_builtin(id
, false);
4455 void Compiler::ActiveBuiltinHandler::add_if_builtin_or_block(uint32_t id
)
4457 add_if_builtin(id
, true);
4460 bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode
, const uint32_t *args
, uint32_t length
)
4468 add_if_builtin(args
[0]);
4475 add_if_builtin(args
[0]);
4476 add_if_builtin(args
[1]);
4484 add_if_builtin(args
[2]);
4491 add_if_builtin(args
[3]);
4492 add_if_builtin(args
[4]);
4500 uint32_t count
= length
- 2;
4502 for (uint32_t i
= 0; i
< count
; i
+= 2)
4503 add_if_builtin(args
[i
]);
4507 case OpFunctionCall
:
4512 uint32_t count
= length
- 3;
4514 for (uint32_t i
= 0; i
< count
; i
++)
4515 add_if_builtin(args
[i
]);
4520 case OpInBoundsAccessChain
:
4521 case OpPtrAccessChain
:
4526 // Only consider global variables, cannot consider variables in functions yet, or other
4527 // access chains as they have not been created yet.
4528 auto *var
= compiler
.maybe_get
<SPIRVariable
>(args
[2]);
4532 // Required if we access chain into builtins like gl_GlobalInvocationID.
4533 add_if_builtin(args
[2]);
4535 // Start traversing type hierarchy at the proper non-pointer types.
4536 auto *type
= &compiler
.get_variable_data_type(*var
);
4539 var
->storage
== StorageClassInput
? compiler
.active_input_builtins
: compiler
.active_output_builtins
;
4541 uint32_t count
= length
- 3;
4543 for (uint32_t i
= 0; i
< count
; i
++)
4546 // PtrAccessChain functions more like a pointer offset. Type remains the same.
4547 if (opcode
== OpPtrAccessChain
&& i
== 0)
4551 if (!type
->array
.empty())
4553 type
= &compiler
.get
<SPIRType
>(type
->parent_type
);
4556 else if (type
->basetype
== SPIRType::Struct
)
4558 uint32_t index
= compiler
.get
<SPIRConstant
>(args
[i
]).scalar();
4560 if (index
< uint32_t(compiler
.ir
.meta
[type
->self
].members
.size()))
4562 auto &decorations
= compiler
.ir
.meta
[type
->self
].members
[index
];
4563 if (decorations
.builtin
)
4565 flags
.set(decorations
.builtin_type
);
4566 handle_builtin(compiler
.get
<SPIRType
>(type
->member_types
[index
]), decorations
.builtin_type
,
4567 decorations
.decoration_flags
);
4571 type
= &compiler
.get
<SPIRType
>(type
->member_types
[index
]);
4575 // No point in traversing further. We won't find any extra builtins.
4589 void Compiler::update_active_builtins()
4591 active_input_builtins
.reset();
4592 active_output_builtins
.reset();
4593 cull_distance_count
= 0;
4594 clip_distance_count
= 0;
4595 ActiveBuiltinHandler
handler(*this);
4596 traverse_all_reachable_opcodes(get
<SPIRFunction
>(ir
.default_entry_point
), handler
);
4598 ir
.for_each_typed_id
<SPIRVariable
>([&](uint32_t, const SPIRVariable
&var
) {
4599 if (var
.storage
!= StorageClassOutput
)
4601 if (!interface_variable_exists_in_entry_point(var
.self
))
4604 // Also, make sure we preserve output variables which are only initialized, but never accessed by any code.
4605 if (var
.initializer
!= ID(0))
4606 handler
.add_if_builtin_or_block(var
.self
);
4610 // Returns whether this shader uses a builtin of the storage class
4611 bool Compiler::has_active_builtin(BuiltIn builtin
, StorageClass storage
) const
4613 const Bitset
*flags
;
4616 case StorageClassInput
:
4617 flags
= &active_input_builtins
;
4619 case StorageClassOutput
:
4620 flags
= &active_output_builtins
;
4626 return flags
->get(builtin
);
4629 void Compiler::analyze_image_and_sampler_usage()
4631 CombinedImageSamplerDrefHandler
dref_handler(*this);
4632 traverse_all_reachable_opcodes(get
<SPIRFunction
>(ir
.default_entry_point
), dref_handler
);
4634 CombinedImageSamplerUsageHandler
handler(*this, dref_handler
.dref_combined_samplers
);
4635 traverse_all_reachable_opcodes(get
<SPIRFunction
>(ir
.default_entry_point
), handler
);
4637 // Need to run this traversal twice. First time, we propagate any comparison sampler usage from leaf functions
4639 // In the second pass, we can propagate up forced depth state coming from main() up into leaf functions.
4640 handler
.dependency_hierarchy
.clear();
4641 traverse_all_reachable_opcodes(get
<SPIRFunction
>(ir
.default_entry_point
), handler
);
4643 comparison_ids
= std::move(handler
.comparison_ids
);
4644 need_subpass_input
= handler
.need_subpass_input
;
4645 need_subpass_input_ms
= handler
.need_subpass_input_ms
;
4647 // Forward information from separate images and samplers into combined image samplers.
4648 for (auto &combined
: combined_image_samplers
)
4649 if (comparison_ids
.count(combined
.sampler_id
))
4650 comparison_ids
.insert(combined
.combined_id
);
4653 bool Compiler::CombinedImageSamplerDrefHandler::handle(spv::Op opcode
, const uint32_t *args
, uint32_t)
4655 // Mark all sampled images which are used with Dref.
4658 case OpImageSampleDrefExplicitLod
:
4659 case OpImageSampleDrefImplicitLod
:
4660 case OpImageSampleProjDrefExplicitLod
:
4661 case OpImageSampleProjDrefImplicitLod
:
4662 case OpImageSparseSampleProjDrefImplicitLod
:
4663 case OpImageSparseSampleDrefImplicitLod
:
4664 case OpImageSparseSampleProjDrefExplicitLod
:
4665 case OpImageSparseSampleDrefExplicitLod
:
4666 case OpImageDrefGather
:
4667 case OpImageSparseDrefGather
:
4668 dref_combined_samplers
.insert(args
[2]);
4678 const CFG
&Compiler::get_cfg_for_current_function() const
4680 assert(current_function
);
4681 return get_cfg_for_function(current_function
->self
);
4684 const CFG
&Compiler::get_cfg_for_function(uint32_t id
) const
4686 auto cfg_itr
= function_cfgs
.find(id
);
4687 assert(cfg_itr
!= end(function_cfgs
));
4688 assert(cfg_itr
->second
);
4689 return *cfg_itr
->second
;
4692 void Compiler::build_function_control_flow_graphs_and_analyze()
4694 CFGBuilder
handler(*this);
4695 handler
.function_cfgs
[ir
.default_entry_point
].reset(new CFG(*this, get
<SPIRFunction
>(ir
.default_entry_point
)));
4696 traverse_all_reachable_opcodes(get
<SPIRFunction
>(ir
.default_entry_point
), handler
);
4697 function_cfgs
= std::move(handler
.function_cfgs
);
4698 bool single_function
= function_cfgs
.size() <= 1;
4700 for (auto &f
: function_cfgs
)
4702 auto &func
= get
<SPIRFunction
>(f
.first
);
4703 AnalyzeVariableScopeAccessHandler
scope_handler(*this, func
);
4704 analyze_variable_scope(func
, scope_handler
);
4705 find_function_local_luts(func
, scope_handler
, single_function
);
4707 // Check if we can actually use the loop variables we found in analyze_variable_scope.
4708 // To use multiple initializers, we need the same type and qualifiers.
4709 for (auto block
: func
.blocks
)
4711 auto &b
= get
<SPIRBlock
>(block
);
4712 if (b
.loop_variables
.size() < 2)
4715 auto &flags
= get_decoration_bitset(b
.loop_variables
.front());
4716 uint32_t type
= get
<SPIRVariable
>(b
.loop_variables
.front()).basetype
;
4717 bool invalid_initializers
= false;
4718 for (auto loop_variable
: b
.loop_variables
)
4720 if (flags
!= get_decoration_bitset(loop_variable
) ||
4721 type
!= get
<SPIRVariable
>(b
.loop_variables
.front()).basetype
)
4723 invalid_initializers
= true;
4728 if (invalid_initializers
)
4730 for (auto loop_variable
: b
.loop_variables
)
4731 get
<SPIRVariable
>(loop_variable
).loop_variable
= false;
4732 b
.loop_variables
.clear();
4737 // Find LUTs which are not function local. Only consider this case if the CFG is multi-function,
4738 // otherwise we treat Private as Function trivially.
4739 // Needs to be analyzed from the outside since we have to block the LUT optimization if at least
4740 // one function writes to it.
4741 if (!single_function
)
4743 for (auto &id
: global_variables
)
4745 auto &var
= get
<SPIRVariable
>(id
);
4746 auto &type
= get_variable_data_type(var
);
4748 if (is_array(type
) && var
.storage
== StorageClassPrivate
&&
4749 var
.initializer
&& !var
.is_written_to
&&
4750 ir
.ids
[var
.initializer
].get_type() == TypeConstant
)
4752 get
<SPIRConstant
>(var
.initializer
).is_used_as_lut
= true;
4753 var
.static_expression
= var
.initializer
;
4754 var
.statically_assigned
= true;
4755 var
.remapped_variable
= true;
4761 Compiler::CFGBuilder::CFGBuilder(Compiler
&compiler_
)
4762 : compiler(compiler_
)
4766 bool Compiler::CFGBuilder::handle(spv::Op
, const uint32_t *, uint32_t)
4771 bool Compiler::CFGBuilder::follow_function_call(const SPIRFunction
&func
)
4773 if (function_cfgs
.find(func
.self
) == end(function_cfgs
))
4775 function_cfgs
[func
.self
].reset(new CFG(compiler
, func
));
4782 void Compiler::CombinedImageSamplerUsageHandler::add_dependency(uint32_t dst
, uint32_t src
)
4784 dependency_hierarchy
[dst
].insert(src
);
4785 // Propagate up any comparison state if we're loading from one such variable.
4786 if (comparison_ids
.count(src
))
4787 comparison_ids
.insert(dst
);
4790 bool Compiler::CombinedImageSamplerUsageHandler::begin_function_scope(const uint32_t *args
, uint32_t length
)
4795 auto &func
= compiler
.get
<SPIRFunction
>(args
[2]);
4796 const auto *arg
= &args
[3];
4799 for (uint32_t i
= 0; i
< length
; i
++)
4801 auto &argument
= func
.arguments
[i
];
4802 add_dependency(argument
.id
, arg
[i
]);
4808 void Compiler::CombinedImageSamplerUsageHandler::add_hierarchy_to_comparison_ids(uint32_t id
)
4810 // Traverse the variable dependency hierarchy and tag everything in its path with comparison ids.
4811 comparison_ids
.insert(id
);
4813 for (auto &dep_id
: dependency_hierarchy
[id
])
4814 add_hierarchy_to_comparison_ids(dep_id
);
4817 bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode
, const uint32_t *args
, uint32_t length
)
4822 case OpInBoundsAccessChain
:
4823 case OpPtrAccessChain
:
4829 add_dependency(args
[1], args
[2]);
4831 // Ideally defer this to OpImageRead, but then we'd need to track loaded IDs.
4832 // If we load an image, we're going to use it and there is little harm in declaring an unused gl_FragCoord.
4833 auto &type
= compiler
.get
<SPIRType
>(args
[0]);
4834 if (type
.image
.dim
== DimSubpassData
)
4836 need_subpass_input
= true;
4838 need_subpass_input_ms
= true;
4841 // If we load a SampledImage and it will be used with Dref, propagate the state up.
4842 if (dref_combined_samplers
.count(args
[1]) != 0)
4843 add_hierarchy_to_comparison_ids(args
[1]);
4847 case OpSampledImage
:
4852 // If the underlying resource has been used for comparison then duplicate loads of that resource must be too.
4853 // This image must be a depth image.
4854 uint32_t result_id
= args
[1];
4855 uint32_t image
= args
[2];
4856 uint32_t sampler
= args
[3];
4858 if (dref_combined_samplers
.count(result_id
) != 0)
4860 add_hierarchy_to_comparison_ids(image
);
4862 // This sampler must be a SamplerComparisonState, and not a regular SamplerState.
4863 add_hierarchy_to_comparison_ids(sampler
);
4865 // Mark the OpSampledImage itself as being comparison state.
4866 comparison_ids
.insert(result_id
);
4878 bool Compiler::buffer_is_hlsl_counter_buffer(VariableID id
) const
4880 auto *m
= ir
.find_meta(id
);
4881 return m
&& m
->hlsl_is_magic_counter_buffer
;
4884 bool Compiler::buffer_get_hlsl_counter_buffer(VariableID id
, uint32_t &counter_id
) const
4886 auto *m
= ir
.find_meta(id
);
4888 // First, check for the proper decoration.
4889 if (m
&& m
->hlsl_magic_counter_buffer
!= 0)
4891 counter_id
= m
->hlsl_magic_counter_buffer
;
4898 void Compiler::make_constant_null(uint32_t id
, uint32_t type
)
4900 auto &constant_type
= get
<SPIRType
>(type
);
4902 if (constant_type
.pointer
)
4904 auto &constant
= set
<SPIRConstant
>(id
, type
);
4905 constant
.make_null(constant_type
);
4907 else if (!constant_type
.array
.empty())
4909 assert(constant_type
.parent_type
);
4910 uint32_t parent_id
= ir
.increase_bound_by(1);
4911 make_constant_null(parent_id
, constant_type
.parent_type
);
4913 if (!constant_type
.array_size_literal
.back())
4914 SPIRV_CROSS_THROW("Array size of OpConstantNull must be a literal.");
4916 SmallVector
<uint32_t> elements(constant_type
.array
.back());
4917 for (uint32_t i
= 0; i
< constant_type
.array
.back(); i
++)
4918 elements
[i
] = parent_id
;
4919 set
<SPIRConstant
>(id
, type
, elements
.data(), uint32_t(elements
.size()), false);
4921 else if (!constant_type
.member_types
.empty())
4923 uint32_t member_ids
= ir
.increase_bound_by(uint32_t(constant_type
.member_types
.size()));
4924 SmallVector
<uint32_t> elements(constant_type
.member_types
.size());
4925 for (uint32_t i
= 0; i
< constant_type
.member_types
.size(); i
++)
4927 make_constant_null(member_ids
+ i
, constant_type
.member_types
[i
]);
4928 elements
[i
] = member_ids
+ i
;
4930 set
<SPIRConstant
>(id
, type
, elements
.data(), uint32_t(elements
.size()), false);
4934 auto &constant
= set
<SPIRConstant
>(id
, type
);
4935 constant
.make_null(constant_type
);
4939 const SmallVector
<spv::Capability
> &Compiler::get_declared_capabilities() const
4941 return ir
.declared_capabilities
;
4944 const SmallVector
<std::string
> &Compiler::get_declared_extensions() const
4946 return ir
.declared_extensions
;
4949 std::string
Compiler::get_remapped_declared_block_name(VariableID id
) const
4951 return get_remapped_declared_block_name(id
, false);
4954 std::string
Compiler::get_remapped_declared_block_name(uint32_t id
, bool fallback_prefer_instance_name
) const
4956 auto itr
= declared_block_names
.find(id
);
4957 if (itr
!= end(declared_block_names
))
4963 auto &var
= get
<SPIRVariable
>(id
);
4965 if (fallback_prefer_instance_name
)
4967 return to_name(var
.self
);
4971 auto &type
= get
<SPIRType
>(var
.basetype
);
4972 auto *type_meta
= ir
.find_meta(type
.self
);
4973 auto *block_name
= type_meta
? &type_meta
->decoration
.alias
: nullptr;
4974 return (!block_name
|| block_name
->empty()) ? get_block_fallback_name(id
) : *block_name
;
4979 bool Compiler::reflection_ssbo_instance_name_is_significant() const
4981 if (ir
.source
.known
)
4983 // UAVs from HLSL source tend to be declared in a way where the type is reused
4984 // but the instance name is significant, and that's the name we should report.
4985 // For GLSL, SSBOs each have their own block type as that's how GLSL is written.
4986 return ir
.source
.hlsl
;
4989 unordered_set
<uint32_t> ssbo_type_ids
;
4990 bool aliased_ssbo_types
= false;
4992 // If we don't have any OpSource information, we need to perform some shaky heuristics.
4993 ir
.for_each_typed_id
<SPIRVariable
>([&](uint32_t, const SPIRVariable
&var
) {
4994 auto &type
= this->get
<SPIRType
>(var
.basetype
);
4995 if (!type
.pointer
|| var
.storage
== StorageClassFunction
)
4998 bool ssbo
= var
.storage
== StorageClassStorageBuffer
||
4999 (var
.storage
== StorageClassUniform
&& has_decoration(type
.self
, DecorationBufferBlock
));
5003 if (ssbo_type_ids
.count(type
.self
))
5004 aliased_ssbo_types
= true;
5006 ssbo_type_ids
.insert(type
.self
);
5010 // If the block name is aliased, assume we have HLSL-style UAV declarations.
5011 return aliased_ssbo_types
;
5014 bool Compiler::instruction_to_result_type(uint32_t &result_type
, uint32_t &result_id
, spv::Op op
,
5015 const uint32_t *args
, uint32_t length
)
5020 bool has_result_id
= false, has_result_type
= false;
5021 HasResultAndType(op
, &has_result_id
, &has_result_type
);
5022 if (has_result_id
&& has_result_type
)
5024 result_type
= args
[0];
5025 result_id
= args
[1];
5032 Bitset
Compiler::combined_decoration_for_member(const SPIRType
&type
, uint32_t index
) const
5035 auto *type_meta
= ir
.find_meta(type
.self
);
5039 auto &members
= type_meta
->members
;
5040 if (index
>= members
.size())
5042 auto &dec
= members
[index
];
5044 flags
.merge_or(dec
.decoration_flags
);
5046 auto &member_type
= get
<SPIRType
>(type
.member_types
[index
]);
5048 // If our member type is a struct, traverse all the child members as well recursively.
5049 auto &member_childs
= member_type
.member_types
;
5050 for (uint32_t i
= 0; i
< member_childs
.size(); i
++)
5052 auto &child_member_type
= get
<SPIRType
>(member_childs
[i
]);
5053 if (!child_member_type
.pointer
)
5054 flags
.merge_or(combined_decoration_for_member(member_type
, i
));
5061 bool Compiler::is_desktop_only_format(spv::ImageFormat format
)
5065 // Desktop-only formats
5066 case ImageFormatR11fG11fB10f
:
5067 case ImageFormatR16f
:
5068 case ImageFormatRgb10A2
:
5070 case ImageFormatRg8
:
5071 case ImageFormatR16
:
5072 case ImageFormatRg16
:
5073 case ImageFormatRgba16
:
5074 case ImageFormatR16Snorm
:
5075 case ImageFormatRg16Snorm
:
5076 case ImageFormatRgba16Snorm
:
5077 case ImageFormatR8Snorm
:
5078 case ImageFormatRg8Snorm
:
5079 case ImageFormatR8ui
:
5080 case ImageFormatRg8ui
:
5081 case ImageFormatR16ui
:
5082 case ImageFormatRgb10a2ui
:
5083 case ImageFormatR8i
:
5084 case ImageFormatRg8i
:
5085 case ImageFormatR16i
:
5094 // An image is determined to be a depth image if it is marked as a depth image and is not also
5095 // explicitly marked with a color format, or if there are any sample/gather compare operations on it.
5096 bool Compiler::is_depth_image(const SPIRType
&type
, uint32_t id
) const
5098 return (type
.image
.depth
&& type
.image
.format
== ImageFormatUnknown
) || comparison_ids
.count(id
);
5101 bool Compiler::type_is_opaque_value(const SPIRType
&type
) const
5103 return !type
.pointer
&& (type
.basetype
== SPIRType::SampledImage
|| type
.basetype
== SPIRType::Image
||
5104 type
.basetype
== SPIRType::Sampler
);
5107 // Make these member functions so we can easily break on any force_recompile events.
5108 void Compiler::force_recompile()
5110 is_force_recompile
= true;
5113 void Compiler::force_recompile_guarantee_forward_progress()
5116 is_force_recompile_forward_progress
= true;
5119 bool Compiler::is_forcing_recompilation() const
5121 return is_force_recompile
;
5124 void Compiler::clear_force_recompile()
5126 is_force_recompile
= false;
5127 is_force_recompile_forward_progress
= false;
5130 Compiler::PhysicalStorageBufferPointerHandler::PhysicalStorageBufferPointerHandler(Compiler
&compiler_
)
5131 : compiler(compiler_
)
5135 Compiler::PhysicalBlockMeta
*Compiler::PhysicalStorageBufferPointerHandler::find_block_meta(uint32_t id
) const
5137 auto chain_itr
= access_chain_to_physical_block
.find(id
);
5138 if (chain_itr
!= access_chain_to_physical_block
.end())
5139 return chain_itr
->second
;
5144 void Compiler::PhysicalStorageBufferPointerHandler::mark_aligned_access(uint32_t id
, const uint32_t *args
, uint32_t length
)
5146 uint32_t mask
= *args
;
5149 if (length
&& (mask
& MemoryAccessVolatileMask
) != 0)
5155 if (length
&& (mask
& MemoryAccessAlignedMask
) != 0)
5157 uint32_t alignment
= *args
;
5158 auto *meta
= find_block_meta(id
);
5160 // This makes the assumption that the application does not rely on insane edge cases like:
5161 // Bind buffer with ADDR = 8, use block offset of 8 bytes, load/store with 16 byte alignment.
5162 // If we emit the buffer with alignment = 16 here, the first element at offset = 0 should
5163 // actually have alignment of 8 bytes, but this is too theoretical and awkward to support.
5164 // We could potentially keep track of any offset in the access chain, but it's
5165 // practically impossible for high level compilers to emit code like that,
5166 // so deducing overall alignment requirement based on maximum observed Alignment value is probably fine.
5167 if (meta
&& alignment
> meta
->alignment
)
5168 meta
->alignment
= alignment
;
5172 bool Compiler::PhysicalStorageBufferPointerHandler::type_is_bda_block_entry(uint32_t type_id
) const
5174 auto &type
= compiler
.get
<SPIRType
>(type_id
);
5175 return compiler
.is_physical_pointer(type
);
5178 uint32_t Compiler::PhysicalStorageBufferPointerHandler::get_minimum_scalar_alignment(const SPIRType
&type
) const
5180 if (type
.storage
== spv::StorageClassPhysicalStorageBufferEXT
)
5182 else if (type
.basetype
== SPIRType::Struct
)
5184 uint32_t alignment
= 0;
5185 for (auto &member_type
: type
.member_types
)
5187 uint32_t member_align
= get_minimum_scalar_alignment(compiler
.get
<SPIRType
>(member_type
));
5188 if (member_align
> alignment
)
5189 alignment
= member_align
;
5194 return type
.width
/ 8;
5197 void Compiler::PhysicalStorageBufferPointerHandler::setup_meta_chain(uint32_t type_id
, uint32_t var_id
)
5199 if (type_is_bda_block_entry(type_id
))
5201 auto &meta
= physical_block_type_meta
[type_id
];
5202 access_chain_to_physical_block
[var_id
] = &meta
;
5204 auto &type
= compiler
.get
<SPIRType
>(type_id
);
5206 if (!compiler
.is_physical_pointer_to_buffer_block(type
))
5207 non_block_types
.insert(type_id
);
5209 if (meta
.alignment
== 0)
5210 meta
.alignment
= get_minimum_scalar_alignment(compiler
.get_pointee_type(type
));
5214 bool Compiler::PhysicalStorageBufferPointerHandler::handle(Op op
, const uint32_t *args
, uint32_t length
)
5216 // When a BDA pointer comes to life, we need to keep a mapping of SSA ID -> type ID for the pointer type.
5217 // For every load and store, we'll need to be able to look up the type ID being accessed and mark any alignment
5221 case OpConvertUToPtr
:
5223 case OpCompositeExtract
:
5224 // Extract can begin a new chain if we had a struct or array of pointers as input.
5225 // We don't begin chains before we have a pure scalar pointer.
5226 setup_meta_chain(args
[0], args
[1]);
5230 case OpInBoundsAccessChain
:
5231 case OpPtrAccessChain
:
5234 auto itr
= access_chain_to_physical_block
.find(args
[2]);
5235 if (itr
!= access_chain_to_physical_block
.end())
5236 access_chain_to_physical_block
[args
[1]] = itr
->second
;
5242 setup_meta_chain(args
[0], args
[1]);
5244 mark_aligned_access(args
[2], args
+ 3, length
- 3);
5251 mark_aligned_access(args
[0], args
+ 2, length
- 2);
5262 uint32_t Compiler::PhysicalStorageBufferPointerHandler::get_base_non_block_type_id(uint32_t type_id
) const
5264 auto *type
= &compiler
.get
<SPIRType
>(type_id
);
5265 while (compiler
.is_physical_pointer(*type
) && !type_is_bda_block_entry(type_id
))
5267 type_id
= type
->parent_type
;
5268 type
= &compiler
.get
<SPIRType
>(type_id
);
5271 assert(type_is_bda_block_entry(type_id
));
5275 void Compiler::PhysicalStorageBufferPointerHandler::analyze_non_block_types_from_block(const SPIRType
&type
)
5277 for (auto &member
: type
.member_types
)
5279 auto &subtype
= compiler
.get
<SPIRType
>(member
);
5281 if (compiler
.is_physical_pointer(subtype
) && !compiler
.is_physical_pointer_to_buffer_block(subtype
))
5282 non_block_types
.insert(get_base_non_block_type_id(member
));
5283 else if (subtype
.basetype
== SPIRType::Struct
&& !compiler
.is_pointer(subtype
))
5284 analyze_non_block_types_from_block(subtype
);
5288 void Compiler::analyze_non_block_pointer_types()
5290 PhysicalStorageBufferPointerHandler
handler(*this);
5291 traverse_all_reachable_opcodes(get
<SPIRFunction
>(ir
.default_entry_point
), handler
);
5293 // Analyze any block declaration we have to make. It might contain
5294 // physical pointers to POD types which we never used, and thus never added to the list.
5295 // We'll need to add those pointer types to the set of types we declare.
5296 ir
.for_each_typed_id
<SPIRType
>([&](uint32_t id
, SPIRType
&type
) {
5297 // Only analyze the raw block struct, not any pointer-to-struct, since that's just redundant.
5298 if (type
.self
== id
&&
5299 (has_decoration(type
.self
, DecorationBlock
) ||
5300 has_decoration(type
.self
, DecorationBufferBlock
)))
5302 handler
.analyze_non_block_types_from_block(type
);
5306 physical_storage_non_block_pointer_types
.reserve(handler
.non_block_types
.size());
5307 for (auto type
: handler
.non_block_types
)
5308 physical_storage_non_block_pointer_types
.push_back(type
);
5309 sort(begin(physical_storage_non_block_pointer_types
), end(physical_storage_non_block_pointer_types
));
5310 physical_storage_type_to_alignment
= std::move(handler
.physical_block_type_meta
);
5313 bool Compiler::InterlockedResourceAccessPrepassHandler::handle(Op op
, const uint32_t *, uint32_t)
5315 if (op
== OpBeginInvocationInterlockEXT
|| op
== OpEndInvocationInterlockEXT
)
5317 if (interlock_function_id
!= 0 && interlock_function_id
!= call_stack
.back())
5319 // Most complex case, we have no sensible way of dealing with this
5320 // other than taking the 100% conservative approach, exit early.
5321 split_function_case
= true;
5326 interlock_function_id
= call_stack
.back();
5327 // If this call is performed inside control flow we have a problem.
5328 auto &cfg
= compiler
.get_cfg_for_function(interlock_function_id
);
5330 uint32_t from_block_id
= compiler
.get
<SPIRFunction
>(interlock_function_id
).entry_block
;
5331 bool outside_control_flow
= cfg
.node_terminates_control_flow_in_sub_graph(from_block_id
, current_block_id
);
5332 if (!outside_control_flow
)
5333 control_flow_interlock
= true;
5339 void Compiler::InterlockedResourceAccessPrepassHandler::rearm_current_block(const SPIRBlock
&block
)
5341 current_block_id
= block
.self
;
5344 bool Compiler::InterlockedResourceAccessPrepassHandler::begin_function_scope(const uint32_t *args
, uint32_t length
)
5348 call_stack
.push_back(args
[2]);
5352 bool Compiler::InterlockedResourceAccessPrepassHandler::end_function_scope(const uint32_t *, uint32_t)
5354 call_stack
.pop_back();
5358 bool Compiler::InterlockedResourceAccessHandler::begin_function_scope(const uint32_t *args
, uint32_t length
)
5363 if (args
[2] == interlock_function_id
)
5364 call_stack_is_interlocked
= true;
5366 call_stack
.push_back(args
[2]);
5370 bool Compiler::InterlockedResourceAccessHandler::end_function_scope(const uint32_t *, uint32_t)
5372 if (call_stack
.back() == interlock_function_id
)
5373 call_stack_is_interlocked
= false;
5375 call_stack
.pop_back();
5379 void Compiler::InterlockedResourceAccessHandler::access_potential_resource(uint32_t id
)
5381 if ((use_critical_section
&& in_crit_sec
) || (control_flow_interlock
&& call_stack_is_interlocked
) ||
5382 split_function_case
)
5384 compiler
.interlocked_resources
.insert(id
);
5388 bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode
, const uint32_t *args
, uint32_t length
)
5390 // Only care about critical section analysis if we have simple case.
5391 if (use_critical_section
)
5393 if (opcode
== OpBeginInvocationInterlockEXT
)
5399 if (opcode
== OpEndInvocationInterlockEXT
)
5401 // End critical section--nothing more to do.
5406 // We need to figure out where images and buffers are loaded from, so do only the bare bones compilation we need.
5414 uint32_t ptr
= args
[2];
5415 auto *var
= compiler
.maybe_get_backing_variable(ptr
);
5417 // We're only concerned with buffer and image memory here.
5421 switch (var
->storage
)
5426 case StorageClassUniformConstant
:
5428 uint32_t result_type
= args
[0];
5429 uint32_t id
= args
[1];
5430 compiler
.set
<SPIRExpression
>(id
, "", result_type
, true);
5431 compiler
.register_read(id
, ptr
, true);
5435 case StorageClassUniform
:
5436 // Must have BufferBlock; we only care about SSBOs.
5437 if (!compiler
.has_decoration(compiler
.get
<SPIRType
>(var
->basetype
).self
, DecorationBufferBlock
))
5440 case StorageClassStorageBuffer
:
5441 access_potential_resource(var
->self
);
5447 case OpInBoundsAccessChain
:
5449 case OpPtrAccessChain
:
5454 uint32_t result_type
= args
[0];
5456 auto &type
= compiler
.get
<SPIRType
>(result_type
);
5457 if (type
.storage
== StorageClassUniform
|| type
.storage
== StorageClassUniformConstant
||
5458 type
.storage
== StorageClassStorageBuffer
)
5460 uint32_t id
= args
[1];
5461 uint32_t ptr
= args
[2];
5462 compiler
.set
<SPIRExpression
>(id
, "", result_type
, true);
5463 compiler
.register_read(id
, ptr
, true);
5464 compiler
.ir
.ids
[id
].set_allow_type_rewrite();
5469 case OpImageTexelPointer
:
5474 uint32_t result_type
= args
[0];
5475 uint32_t id
= args
[1];
5476 uint32_t ptr
= args
[2];
5477 auto &e
= compiler
.set
<SPIRExpression
>(id
, "", result_type
, true);
5478 auto *var
= compiler
.maybe_get_backing_variable(ptr
);
5480 e
.loaded_from
= var
->self
;
5491 uint32_t ptr
= args
[0];
5492 auto *var
= compiler
.maybe_get_backing_variable(ptr
);
5493 if (var
&& (var
->storage
== StorageClassUniform
|| var
->storage
== StorageClassUniformConstant
||
5494 var
->storage
== StorageClassStorageBuffer
))
5496 access_potential_resource(var
->self
);
5507 uint32_t dst
= args
[0];
5508 uint32_t src
= args
[1];
5509 auto *dst_var
= compiler
.maybe_get_backing_variable(dst
);
5510 auto *src_var
= compiler
.maybe_get_backing_variable(src
);
5512 if (dst_var
&& (dst_var
->storage
== StorageClassUniform
|| dst_var
->storage
== StorageClassStorageBuffer
))
5513 access_potential_resource(dst_var
->self
);
5517 if (src_var
->storage
!= StorageClassUniform
&& src_var
->storage
!= StorageClassStorageBuffer
)
5520 if (src_var
->storage
== StorageClassUniform
&&
5521 !compiler
.has_decoration(compiler
.get
<SPIRType
>(src_var
->basetype
).self
, DecorationBufferBlock
))
5526 access_potential_resource(src_var
->self
);
5538 uint32_t ptr
= args
[2];
5539 auto *var
= compiler
.maybe_get_backing_variable(ptr
);
5541 // We're only concerned with buffer and image memory here.
5545 switch (var
->storage
)
5550 case StorageClassUniform
:
5551 // Must have BufferBlock; we only care about SSBOs.
5552 if (!compiler
.has_decoration(compiler
.get
<SPIRType
>(var
->basetype
).self
, DecorationBufferBlock
))
5555 case StorageClassUniformConstant
:
5556 case StorageClassStorageBuffer
:
5557 access_potential_resource(var
->self
);
5563 case OpAtomicExchange
:
5564 case OpAtomicCompareExchange
:
5565 case OpAtomicIIncrement
:
5566 case OpAtomicIDecrement
:
5580 uint32_t ptr
= args
[2];
5581 auto *var
= compiler
.maybe_get_backing_variable(ptr
);
5582 if (var
&& (var
->storage
== StorageClassUniform
|| var
->storage
== StorageClassUniformConstant
||
5583 var
->storage
== StorageClassStorageBuffer
))
5585 access_potential_resource(var
->self
);
5598 void Compiler::analyze_interlocked_resource_usage()
5600 if (get_execution_model() == ExecutionModelFragment
&&
5601 (get_entry_point().flags
.get(ExecutionModePixelInterlockOrderedEXT
) ||
5602 get_entry_point().flags
.get(ExecutionModePixelInterlockUnorderedEXT
) ||
5603 get_entry_point().flags
.get(ExecutionModeSampleInterlockOrderedEXT
) ||
5604 get_entry_point().flags
.get(ExecutionModeSampleInterlockUnorderedEXT
)))
5606 InterlockedResourceAccessPrepassHandler
prepass_handler(*this, ir
.default_entry_point
);
5607 traverse_all_reachable_opcodes(get
<SPIRFunction
>(ir
.default_entry_point
), prepass_handler
);
5609 InterlockedResourceAccessHandler
handler(*this, ir
.default_entry_point
);
5610 handler
.interlock_function_id
= prepass_handler
.interlock_function_id
;
5611 handler
.split_function_case
= prepass_handler
.split_function_case
;
5612 handler
.control_flow_interlock
= prepass_handler
.control_flow_interlock
;
5613 handler
.use_critical_section
= !handler
.split_function_case
&& !handler
.control_flow_interlock
;
5615 traverse_all_reachable_opcodes(get
<SPIRFunction
>(ir
.default_entry_point
), handler
);
5617 // For GLSL. If we hit any of these cases, we have to fall back to conservative approach.
5618 interlocked_is_complex
=
5619 !handler
.use_critical_section
|| handler
.interlock_function_id
!= ir
.default_entry_point
;
5624 bool Compiler::check_internal_recursion(const SPIRType
&type
, std::unordered_set
<uint32_t> &checked_ids
)
5626 if (type
.basetype
!= SPIRType::Struct
)
5629 if (checked_ids
.count(type
.self
))
5632 // Recurse into struct members
5633 bool is_recursive
= false;
5634 checked_ids
.insert(type
.self
);
5635 uint32_t mbr_cnt
= uint32_t(type
.member_types
.size());
5636 for (uint32_t mbr_idx
= 0; !is_recursive
&& mbr_idx
< mbr_cnt
; mbr_idx
++)
5638 uint32_t mbr_type_id
= type
.member_types
[mbr_idx
];
5639 auto &mbr_type
= get
<SPIRType
>(mbr_type_id
);
5640 is_recursive
|= check_internal_recursion(mbr_type
, checked_ids
);
5642 checked_ids
.erase(type
.self
);
5643 return is_recursive
;
5646 // Return whether the struct type contains a structural recursion nested somewhere within its content.
5647 bool Compiler::type_contains_recursion(const SPIRType
&type
)
5649 std::unordered_set
<uint32_t> checked_ids
;
5650 return check_internal_recursion(type
, checked_ids
);
5653 bool Compiler::type_is_array_of_pointers(const SPIRType
&type
) const
5655 if (!is_array(type
))
5658 // BDA types must have parent type hierarchy.
5659 if (!type
.parent_type
)
5662 // Punch through all array layers.
5663 auto *parent
= &get
<SPIRType
>(type
.parent_type
);
5664 while (is_array(*parent
))
5665 parent
= &get
<SPIRType
>(parent
->parent_type
);
5667 return is_pointer(*parent
);
5670 bool Compiler::flush_phi_required(BlockID from
, BlockID to
) const
5672 auto &child
= get
<SPIRBlock
>(to
);
5673 for (auto &phi
: child
.phi_variables
)
5674 if (phi
.parent
== from
)
5679 void Compiler::add_loop_level()
5681 current_loop_level
++;