Merge pull request #2431 from scribam/cmake-minimum-required
[KhronosGroup/SPIRV-Cross.git] / spirv_cross.cpp
blob3492f0b3ed9b5f4a775d74f2b9810087bb532e5e
1 /*
2 * Copyright 2015-2021 Arm Limited
3 * SPDX-License-Identifier: Apache-2.0 OR MIT
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
19 * At your option, you may choose to accept this material under either:
20 * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
21 * 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
24 #include "spirv_cross.hpp"
25 #include "GLSL.std.450.h"
26 #include "spirv_cfg.hpp"
27 #include "spirv_common.hpp"
28 #include "spirv_parser.hpp"
29 #include <algorithm>
30 #include <cstring>
31 #include <utility>
33 using namespace std;
34 using namespace spv;
35 using namespace SPIRV_CROSS_NAMESPACE;
37 Compiler::Compiler(vector<uint32_t> ir_)
39 Parser parser(std::move(ir_));
40 parser.parse();
41 set_ir(std::move(parser.get_parsed_ir()));
44 Compiler::Compiler(const uint32_t *ir_, size_t word_count)
46 Parser parser(ir_, word_count);
47 parser.parse();
48 set_ir(std::move(parser.get_parsed_ir()));
51 Compiler::Compiler(const ParsedIR &ir_)
53 set_ir(ir_);
56 Compiler::Compiler(ParsedIR &&ir_)
58 set_ir(std::move(ir_));
61 void Compiler::set_ir(ParsedIR &&ir_)
63 ir = std::move(ir_);
64 parse_fixup();
67 void Compiler::set_ir(const ParsedIR &ir_)
69 ir = ir_;
70 parse_fixup();
73 string Compiler::compile()
75 return "";
78 bool Compiler::variable_storage_is_aliased(const SPIRVariable &v)
80 auto &type = get<SPIRType>(v.basetype);
81 bool ssbo = v.storage == StorageClassStorageBuffer ||
82 ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
83 bool image = type.basetype == SPIRType::Image;
84 bool counter = type.basetype == SPIRType::AtomicCounter;
85 bool buffer_reference = type.storage == StorageClassPhysicalStorageBufferEXT;
87 bool is_restrict;
88 if (ssbo)
89 is_restrict = ir.get_buffer_block_flags(v).get(DecorationRestrict);
90 else
91 is_restrict = has_decoration(v.self, DecorationRestrict);
93 return !is_restrict && (ssbo || image || counter || buffer_reference);
96 bool Compiler::block_is_control_dependent(const SPIRBlock &block)
98 for (auto &i : block.ops)
100 auto ops = stream(i);
101 auto op = static_cast<Op>(i.op);
103 switch (op)
105 case OpFunctionCall:
107 uint32_t func = ops[2];
108 if (function_is_control_dependent(get<SPIRFunction>(func)))
109 return true;
110 break;
113 // Derivatives
114 case OpDPdx:
115 case OpDPdxCoarse:
116 case OpDPdxFine:
117 case OpDPdy:
118 case OpDPdyCoarse:
119 case OpDPdyFine:
120 case OpFwidth:
121 case OpFwidthCoarse:
122 case OpFwidthFine:
124 // Anything implicit LOD
125 case OpImageSampleImplicitLod:
126 case OpImageSampleDrefImplicitLod:
127 case OpImageSampleProjImplicitLod:
128 case OpImageSampleProjDrefImplicitLod:
129 case OpImageSparseSampleImplicitLod:
130 case OpImageSparseSampleDrefImplicitLod:
131 case OpImageSparseSampleProjImplicitLod:
132 case OpImageSparseSampleProjDrefImplicitLod:
133 case OpImageQueryLod:
134 case OpImageDrefGather:
135 case OpImageGather:
136 case OpImageSparseDrefGather:
137 case OpImageSparseGather:
139 // Anything subgroups
140 case OpGroupNonUniformElect:
141 case OpGroupNonUniformAll:
142 case OpGroupNonUniformAny:
143 case OpGroupNonUniformAllEqual:
144 case OpGroupNonUniformBroadcast:
145 case OpGroupNonUniformBroadcastFirst:
146 case OpGroupNonUniformBallot:
147 case OpGroupNonUniformInverseBallot:
148 case OpGroupNonUniformBallotBitExtract:
149 case OpGroupNonUniformBallotBitCount:
150 case OpGroupNonUniformBallotFindLSB:
151 case OpGroupNonUniformBallotFindMSB:
152 case OpGroupNonUniformShuffle:
153 case OpGroupNonUniformShuffleXor:
154 case OpGroupNonUniformShuffleUp:
155 case OpGroupNonUniformShuffleDown:
156 case OpGroupNonUniformIAdd:
157 case OpGroupNonUniformFAdd:
158 case OpGroupNonUniformIMul:
159 case OpGroupNonUniformFMul:
160 case OpGroupNonUniformSMin:
161 case OpGroupNonUniformUMin:
162 case OpGroupNonUniformFMin:
163 case OpGroupNonUniformSMax:
164 case OpGroupNonUniformUMax:
165 case OpGroupNonUniformFMax:
166 case OpGroupNonUniformBitwiseAnd:
167 case OpGroupNonUniformBitwiseOr:
168 case OpGroupNonUniformBitwiseXor:
169 case OpGroupNonUniformLogicalAnd:
170 case OpGroupNonUniformLogicalOr:
171 case OpGroupNonUniformLogicalXor:
172 case OpGroupNonUniformQuadBroadcast:
173 case OpGroupNonUniformQuadSwap:
175 // Control barriers
176 case OpControlBarrier:
177 return true;
179 default:
180 break;
184 return false;
187 bool Compiler::block_is_pure(const SPIRBlock &block)
189 // This is a global side effect of the function.
190 if (block.terminator == SPIRBlock::Kill ||
191 block.terminator == SPIRBlock::TerminateRay ||
192 block.terminator == SPIRBlock::IgnoreIntersection ||
193 block.terminator == SPIRBlock::EmitMeshTasks)
194 return false;
196 for (auto &i : block.ops)
198 auto ops = stream(i);
199 auto op = static_cast<Op>(i.op);
201 switch (op)
203 case OpFunctionCall:
205 uint32_t func = ops[2];
206 if (!function_is_pure(get<SPIRFunction>(func)))
207 return false;
208 break;
211 case OpCopyMemory:
212 case OpStore:
214 auto &type = expression_type(ops[0]);
215 if (type.storage != StorageClassFunction)
216 return false;
217 break;
220 case OpImageWrite:
221 return false;
223 // Atomics are impure.
224 case OpAtomicLoad:
225 case OpAtomicStore:
226 case OpAtomicExchange:
227 case OpAtomicCompareExchange:
228 case OpAtomicCompareExchangeWeak:
229 case OpAtomicIIncrement:
230 case OpAtomicIDecrement:
231 case OpAtomicIAdd:
232 case OpAtomicISub:
233 case OpAtomicSMin:
234 case OpAtomicUMin:
235 case OpAtomicSMax:
236 case OpAtomicUMax:
237 case OpAtomicAnd:
238 case OpAtomicOr:
239 case OpAtomicXor:
240 return false;
242 // Geometry shader builtins modify global state.
243 case OpEndPrimitive:
244 case OpEmitStreamVertex:
245 case OpEndStreamPrimitive:
246 case OpEmitVertex:
247 return false;
249 // Mesh shader functions modify global state.
250 // (EmitMeshTasks is a terminator).
251 case OpSetMeshOutputsEXT:
252 return false;
254 // Barriers disallow any reordering, so we should treat blocks with barrier as writing.
255 case OpControlBarrier:
256 case OpMemoryBarrier:
257 return false;
259 // Ray tracing builtins are impure.
260 case OpReportIntersectionKHR:
261 case OpIgnoreIntersectionNV:
262 case OpTerminateRayNV:
263 case OpTraceNV:
264 case OpTraceRayKHR:
265 case OpExecuteCallableNV:
266 case OpExecuteCallableKHR:
267 case OpRayQueryInitializeKHR:
268 case OpRayQueryTerminateKHR:
269 case OpRayQueryGenerateIntersectionKHR:
270 case OpRayQueryConfirmIntersectionKHR:
271 case OpRayQueryProceedKHR:
272 // There are various getters in ray query, but they are considered pure.
273 return false;
275 // OpExtInst is potentially impure depending on extension, but GLSL builtins are at least pure.
277 case OpDemoteToHelperInvocationEXT:
278 // This is a global side effect of the function.
279 return false;
281 case OpExtInst:
283 uint32_t extension_set = ops[2];
284 if (get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
286 auto op_450 = static_cast<GLSLstd450>(ops[3]);
287 switch (op_450)
289 case GLSLstd450Modf:
290 case GLSLstd450Frexp:
292 auto &type = expression_type(ops[5]);
293 if (type.storage != StorageClassFunction)
294 return false;
295 break;
298 default:
299 break;
302 break;
305 default:
306 break;
310 return true;
313 string Compiler::to_name(uint32_t id, bool allow_alias) const
315 if (allow_alias && ir.ids[id].get_type() == TypeType)
317 // If this type is a simple alias, emit the
318 // name of the original type instead.
319 // We don't want to override the meta alias
320 // as that can be overridden by the reflection APIs after parse.
321 auto &type = get<SPIRType>(id);
322 if (type.type_alias)
324 // If the alias master has been specially packed, we will have emitted a clean variant as well,
325 // so skip the name aliasing here.
326 if (!has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
327 return to_name(type.type_alias);
331 auto &alias = ir.get_name(id);
332 if (alias.empty())
333 return join("_", id);
334 else
335 return alias;
338 bool Compiler::function_is_pure(const SPIRFunction &func)
340 for (auto block : func.blocks)
341 if (!block_is_pure(get<SPIRBlock>(block)))
342 return false;
344 return true;
347 bool Compiler::function_is_control_dependent(const SPIRFunction &func)
349 for (auto block : func.blocks)
350 if (block_is_control_dependent(get<SPIRBlock>(block)))
351 return true;
353 return false;
356 void Compiler::register_global_read_dependencies(const SPIRBlock &block, uint32_t id)
358 for (auto &i : block.ops)
360 auto ops = stream(i);
361 auto op = static_cast<Op>(i.op);
363 switch (op)
365 case OpFunctionCall:
367 uint32_t func = ops[2];
368 register_global_read_dependencies(get<SPIRFunction>(func), id);
369 break;
372 case OpLoad:
373 case OpImageRead:
375 // If we're in a storage class which does not get invalidated, adding dependencies here is no big deal.
376 auto *var = maybe_get_backing_variable(ops[2]);
377 if (var && var->storage != StorageClassFunction)
379 auto &type = get<SPIRType>(var->basetype);
381 // InputTargets are immutable.
382 if (type.basetype != SPIRType::Image && type.image.dim != DimSubpassData)
383 var->dependees.push_back(id);
385 break;
388 default:
389 break;
394 void Compiler::register_global_read_dependencies(const SPIRFunction &func, uint32_t id)
396 for (auto block : func.blocks)
397 register_global_read_dependencies(get<SPIRBlock>(block), id);
400 SPIRVariable *Compiler::maybe_get_backing_variable(uint32_t chain)
402 auto *var = maybe_get<SPIRVariable>(chain);
403 if (!var)
405 auto *cexpr = maybe_get<SPIRExpression>(chain);
406 if (cexpr)
407 var = maybe_get<SPIRVariable>(cexpr->loaded_from);
409 auto *access_chain = maybe_get<SPIRAccessChain>(chain);
410 if (access_chain)
411 var = maybe_get<SPIRVariable>(access_chain->loaded_from);
414 return var;
417 void Compiler::register_read(uint32_t expr, uint32_t chain, bool forwarded)
419 auto &e = get<SPIRExpression>(expr);
420 auto *var = maybe_get_backing_variable(chain);
422 if (var)
424 e.loaded_from = var->self;
426 // If the backing variable is immutable, we do not need to depend on the variable.
427 if (forwarded && !is_immutable(var->self))
428 var->dependees.push_back(e.self);
430 // If we load from a parameter, make sure we create "inout" if we also write to the parameter.
431 // The default is "in" however, so we never invalidate our compilation by reading.
432 if (var && var->parameter)
433 var->parameter->read_count++;
437 void Compiler::register_write(uint32_t chain)
439 auto *var = maybe_get<SPIRVariable>(chain);
440 if (!var)
442 // If we're storing through an access chain, invalidate the backing variable instead.
443 auto *expr = maybe_get<SPIRExpression>(chain);
444 if (expr && expr->loaded_from)
445 var = maybe_get<SPIRVariable>(expr->loaded_from);
447 auto *access_chain = maybe_get<SPIRAccessChain>(chain);
448 if (access_chain && access_chain->loaded_from)
449 var = maybe_get<SPIRVariable>(access_chain->loaded_from);
452 auto &chain_type = expression_type(chain);
454 if (var)
456 bool check_argument_storage_qualifier = true;
457 auto &type = expression_type(chain);
459 // If our variable is in a storage class which can alias with other buffers,
460 // invalidate all variables which depend on aliased variables. And if this is a
461 // variable pointer, then invalidate all variables regardless.
462 if (get_variable_data_type(*var).pointer)
464 flush_all_active_variables();
466 if (type.pointer_depth == 1)
468 // We have a backing variable which is a pointer-to-pointer type.
469 // We are storing some data through a pointer acquired through that variable,
470 // but we are not writing to the value of the variable itself,
471 // i.e., we are not modifying the pointer directly.
472 // If we are storing a non-pointer type (pointer_depth == 1),
473 // we know that we are storing some unrelated data.
474 // A case here would be
475 // void foo(Foo * const *arg) {
476 // Foo *bar = *arg;
477 // bar->unrelated = 42;
478 // }
479 // arg, the argument is constant.
480 check_argument_storage_qualifier = false;
484 if (type.storage == StorageClassPhysicalStorageBufferEXT || variable_storage_is_aliased(*var))
485 flush_all_aliased_variables();
486 else if (var)
487 flush_dependees(*var);
489 // We tried to write to a parameter which is not marked with out qualifier, force a recompile.
490 if (check_argument_storage_qualifier && var->parameter && var->parameter->write_count == 0)
492 var->parameter->write_count++;
493 force_recompile();
496 else if (chain_type.pointer)
498 // If we stored through a variable pointer, then we don't know which
499 // variable we stored to. So *all* expressions after this point need to
500 // be invalidated.
501 // FIXME: If we can prove that the variable pointer will point to
502 // only certain variables, we can invalidate only those.
503 flush_all_active_variables();
506 // If chain_type.pointer is false, we're not writing to memory backed variables, but temporaries instead.
507 // This can happen in copy_logical_type where we unroll complex reads and writes to temporaries.
510 void Compiler::flush_dependees(SPIRVariable &var)
512 for (auto expr : var.dependees)
513 invalid_expressions.insert(expr);
514 var.dependees.clear();
517 void Compiler::flush_all_aliased_variables()
519 for (auto aliased : aliased_variables)
520 flush_dependees(get<SPIRVariable>(aliased));
523 void Compiler::flush_all_atomic_capable_variables()
525 for (auto global : global_variables)
526 flush_dependees(get<SPIRVariable>(global));
527 flush_all_aliased_variables();
530 void Compiler::flush_control_dependent_expressions(uint32_t block_id)
532 auto &block = get<SPIRBlock>(block_id);
533 for (auto &expr : block.invalidate_expressions)
534 invalid_expressions.insert(expr);
535 block.invalidate_expressions.clear();
538 void Compiler::flush_all_active_variables()
540 // Invalidate all temporaries we read from variables in this block since they were forwarded.
541 // Invalidate all temporaries we read from globals.
542 for (auto &v : current_function->local_variables)
543 flush_dependees(get<SPIRVariable>(v));
544 for (auto &arg : current_function->arguments)
545 flush_dependees(get<SPIRVariable>(arg.id));
546 for (auto global : global_variables)
547 flush_dependees(get<SPIRVariable>(global));
549 flush_all_aliased_variables();
552 uint32_t Compiler::expression_type_id(uint32_t id) const
554 switch (ir.ids[id].get_type())
556 case TypeVariable:
557 return get<SPIRVariable>(id).basetype;
559 case TypeExpression:
560 return get<SPIRExpression>(id).expression_type;
562 case TypeConstant:
563 return get<SPIRConstant>(id).constant_type;
565 case TypeConstantOp:
566 return get<SPIRConstantOp>(id).basetype;
568 case TypeUndef:
569 return get<SPIRUndef>(id).basetype;
571 case TypeCombinedImageSampler:
572 return get<SPIRCombinedImageSampler>(id).combined_type;
574 case TypeAccessChain:
575 return get<SPIRAccessChain>(id).basetype;
577 default:
578 SPIRV_CROSS_THROW("Cannot resolve expression type.");
582 const SPIRType &Compiler::expression_type(uint32_t id) const
584 return get<SPIRType>(expression_type_id(id));
587 bool Compiler::expression_is_lvalue(uint32_t id) const
589 auto &type = expression_type(id);
590 switch (type.basetype)
592 case SPIRType::SampledImage:
593 case SPIRType::Image:
594 case SPIRType::Sampler:
595 return false;
597 default:
598 return true;
602 bool Compiler::is_immutable(uint32_t id) const
604 if (ir.ids[id].get_type() == TypeVariable)
606 auto &var = get<SPIRVariable>(id);
608 // Anything we load from the UniformConstant address space is guaranteed to be immutable.
609 bool pointer_to_const = var.storage == StorageClassUniformConstant;
610 return pointer_to_const || var.phi_variable || !expression_is_lvalue(id);
612 else if (ir.ids[id].get_type() == TypeAccessChain)
613 return get<SPIRAccessChain>(id).immutable;
614 else if (ir.ids[id].get_type() == TypeExpression)
615 return get<SPIRExpression>(id).immutable;
616 else if (ir.ids[id].get_type() == TypeConstant || ir.ids[id].get_type() == TypeConstantOp ||
617 ir.ids[id].get_type() == TypeUndef)
618 return true;
619 else
620 return false;
623 static inline bool storage_class_is_interface(spv::StorageClass storage)
625 switch (storage)
627 case StorageClassInput:
628 case StorageClassOutput:
629 case StorageClassUniform:
630 case StorageClassUniformConstant:
631 case StorageClassAtomicCounter:
632 case StorageClassPushConstant:
633 case StorageClassStorageBuffer:
634 return true;
636 default:
637 return false;
641 bool Compiler::is_hidden_variable(const SPIRVariable &var, bool include_builtins) const
643 if ((is_builtin_variable(var) && !include_builtins) || var.remapped_variable)
644 return true;
646 // Combined image samplers are always considered active as they are "magic" variables.
647 if (find_if(begin(combined_image_samplers), end(combined_image_samplers), [&var](const CombinedImageSampler &samp) {
648 return samp.combined_id == var.self;
649 }) != end(combined_image_samplers))
651 return false;
654 // In SPIR-V 1.4 and up we must also use the active variable interface to disable global variables
655 // which are not part of the entry point.
656 if (ir.get_spirv_version() >= 0x10400 && var.storage != spv::StorageClassGeneric &&
657 var.storage != spv::StorageClassFunction && !interface_variable_exists_in_entry_point(var.self))
659 return true;
662 return check_active_interface_variables && storage_class_is_interface(var.storage) &&
663 active_interface_variables.find(var.self) == end(active_interface_variables);
666 bool Compiler::is_builtin_type(const SPIRType &type) const
668 auto *type_meta = ir.find_meta(type.self);
670 // We can have builtin structs as well. If one member of a struct is builtin, the struct must also be builtin.
671 if (type_meta)
672 for (auto &m : type_meta->members)
673 if (m.builtin)
674 return true;
676 return false;
679 bool Compiler::is_builtin_variable(const SPIRVariable &var) const
681 auto *m = ir.find_meta(var.self);
683 if (var.compat_builtin || (m && m->decoration.builtin))
684 return true;
685 else
686 return is_builtin_type(get<SPIRType>(var.basetype));
689 bool Compiler::is_member_builtin(const SPIRType &type, uint32_t index, BuiltIn *builtin) const
691 auto *type_meta = ir.find_meta(type.self);
693 if (type_meta)
695 auto &memb = type_meta->members;
696 if (index < memb.size() && memb[index].builtin)
698 if (builtin)
699 *builtin = memb[index].builtin_type;
700 return true;
704 return false;
707 bool Compiler::is_scalar(const SPIRType &type) const
709 return type.basetype != SPIRType::Struct && type.vecsize == 1 && type.columns == 1;
712 bool Compiler::is_vector(const SPIRType &type) const
714 return type.vecsize > 1 && type.columns == 1;
717 bool Compiler::is_matrix(const SPIRType &type) const
719 return type.vecsize > 1 && type.columns > 1;
722 bool Compiler::is_array(const SPIRType &type) const
724 return type.op == OpTypeArray || type.op == OpTypeRuntimeArray;
727 bool Compiler::is_pointer(const SPIRType &type) const
729 return type.op == OpTypePointer && type.basetype != SPIRType::Unknown; // Ignore function pointers.
732 bool Compiler::is_physical_pointer(const SPIRType &type) const
734 return type.op == OpTypePointer && type.storage == StorageClassPhysicalStorageBuffer;
737 bool Compiler::is_physical_pointer_to_buffer_block(const SPIRType &type) const
739 return is_physical_pointer(type) && get_pointee_type(type).self == type.parent_type &&
740 (has_decoration(type.self, DecorationBlock) ||
741 has_decoration(type.self, DecorationBufferBlock));
744 bool Compiler::is_runtime_size_array(const SPIRType &type)
746 return type.op == OpTypeRuntimeArray;
749 ShaderResources Compiler::get_shader_resources() const
751 return get_shader_resources(nullptr);
754 ShaderResources Compiler::get_shader_resources(const unordered_set<VariableID> &active_variables) const
756 return get_shader_resources(&active_variables);
759 bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
761 uint32_t variable = 0;
762 switch (opcode)
764 // Need this first, otherwise, GCC complains about unhandled switch statements.
765 default:
766 break;
768 case OpFunctionCall:
770 // Invalid SPIR-V.
771 if (length < 3)
772 return false;
774 uint32_t count = length - 3;
775 args += 3;
776 for (uint32_t i = 0; i < count; i++)
778 auto *var = compiler.maybe_get<SPIRVariable>(args[i]);
779 if (var && storage_class_is_interface(var->storage))
780 variables.insert(args[i]);
782 break;
785 case OpSelect:
787 // Invalid SPIR-V.
788 if (length < 5)
789 return false;
791 uint32_t count = length - 3;
792 args += 3;
793 for (uint32_t i = 0; i < count; i++)
795 auto *var = compiler.maybe_get<SPIRVariable>(args[i]);
796 if (var && storage_class_is_interface(var->storage))
797 variables.insert(args[i]);
799 break;
802 case OpPhi:
804 // Invalid SPIR-V.
805 if (length < 2)
806 return false;
808 uint32_t count = length - 2;
809 args += 2;
810 for (uint32_t i = 0; i < count; i += 2)
812 auto *var = compiler.maybe_get<SPIRVariable>(args[i]);
813 if (var && storage_class_is_interface(var->storage))
814 variables.insert(args[i]);
816 break;
819 case OpAtomicStore:
820 case OpStore:
821 // Invalid SPIR-V.
822 if (length < 1)
823 return false;
824 variable = args[0];
825 break;
827 case OpCopyMemory:
829 if (length < 2)
830 return false;
832 auto *var = compiler.maybe_get<SPIRVariable>(args[0]);
833 if (var && storage_class_is_interface(var->storage))
834 variables.insert(args[0]);
836 var = compiler.maybe_get<SPIRVariable>(args[1]);
837 if (var && storage_class_is_interface(var->storage))
838 variables.insert(args[1]);
839 break;
842 case OpExtInst:
844 if (length < 3)
845 return false;
846 auto &extension_set = compiler.get<SPIRExtension>(args[2]);
847 switch (extension_set.ext)
849 case SPIRExtension::GLSL:
851 auto op = static_cast<GLSLstd450>(args[3]);
853 switch (op)
855 case GLSLstd450InterpolateAtCentroid:
856 case GLSLstd450InterpolateAtSample:
857 case GLSLstd450InterpolateAtOffset:
859 auto *var = compiler.maybe_get<SPIRVariable>(args[4]);
860 if (var && storage_class_is_interface(var->storage))
861 variables.insert(args[4]);
862 break;
865 case GLSLstd450Modf:
866 case GLSLstd450Fract:
868 auto *var = compiler.maybe_get<SPIRVariable>(args[5]);
869 if (var && storage_class_is_interface(var->storage))
870 variables.insert(args[5]);
871 break;
874 default:
875 break;
877 break;
879 case SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter:
881 enum AMDShaderExplicitVertexParameter
883 InterpolateAtVertexAMD = 1
886 auto op = static_cast<AMDShaderExplicitVertexParameter>(args[3]);
888 switch (op)
890 case InterpolateAtVertexAMD:
892 auto *var = compiler.maybe_get<SPIRVariable>(args[4]);
893 if (var && storage_class_is_interface(var->storage))
894 variables.insert(args[4]);
895 break;
898 default:
899 break;
901 break;
903 default:
904 break;
906 break;
909 case OpAccessChain:
910 case OpInBoundsAccessChain:
911 case OpPtrAccessChain:
912 case OpLoad:
913 case OpCopyObject:
914 case OpImageTexelPointer:
915 case OpAtomicLoad:
916 case OpAtomicExchange:
917 case OpAtomicCompareExchange:
918 case OpAtomicCompareExchangeWeak:
919 case OpAtomicIIncrement:
920 case OpAtomicIDecrement:
921 case OpAtomicIAdd:
922 case OpAtomicISub:
923 case OpAtomicSMin:
924 case OpAtomicUMin:
925 case OpAtomicSMax:
926 case OpAtomicUMax:
927 case OpAtomicAnd:
928 case OpAtomicOr:
929 case OpAtomicXor:
930 case OpArrayLength:
931 // Invalid SPIR-V.
932 if (length < 3)
933 return false;
934 variable = args[2];
935 break;
938 if (variable)
940 auto *var = compiler.maybe_get<SPIRVariable>(variable);
941 if (var && storage_class_is_interface(var->storage))
942 variables.insert(variable);
944 return true;
947 unordered_set<VariableID> Compiler::get_active_interface_variables() const
949 // Traverse the call graph and find all interface variables which are in use.
950 unordered_set<VariableID> variables;
951 InterfaceVariableAccessHandler handler(*this, variables);
952 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
954 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
955 if (var.storage != StorageClassOutput)
956 return;
957 if (!interface_variable_exists_in_entry_point(var.self))
958 return;
960 // An output variable which is just declared (but uninitialized) might be read by subsequent stages
961 // so we should force-enable these outputs,
962 // since compilation will fail if a subsequent stage attempts to read from the variable in question.
963 // Also, make sure we preserve output variables which are only initialized, but never accessed by any code.
964 if (var.initializer != ID(0) || get_execution_model() != ExecutionModelFragment)
965 variables.insert(var.self);
968 // If we needed to create one, we'll need it.
969 if (dummy_sampler_id)
970 variables.insert(dummy_sampler_id);
972 return variables;
975 void Compiler::set_enabled_interface_variables(std::unordered_set<VariableID> active_variables)
977 active_interface_variables = std::move(active_variables);
978 check_active_interface_variables = true;
981 ShaderResources Compiler::get_shader_resources(const unordered_set<VariableID> *active_variables) const
983 ShaderResources res;
985 bool ssbo_instance_name = reflection_ssbo_instance_name_is_significant();
987 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
988 auto &type = this->get<SPIRType>(var.basetype);
990 // It is possible for uniform storage classes to be passed as function parameters, so detect
991 // that. To detect function parameters, check of StorageClass of variable is function scope.
992 if (var.storage == StorageClassFunction || !type.pointer)
993 return;
995 if (active_variables && active_variables->find(var.self) == end(*active_variables))
996 return;
998 // In SPIR-V 1.4 and up, every global must be present in the entry point interface list,
999 // not just IO variables.
1000 bool active_in_entry_point = true;
1001 if (ir.get_spirv_version() < 0x10400)
1003 if (var.storage == StorageClassInput || var.storage == StorageClassOutput)
1004 active_in_entry_point = interface_variable_exists_in_entry_point(var.self);
1006 else
1007 active_in_entry_point = interface_variable_exists_in_entry_point(var.self);
1009 if (!active_in_entry_point)
1010 return;
1012 bool is_builtin = is_builtin_variable(var);
1014 if (is_builtin)
1016 if (var.storage != StorageClassInput && var.storage != StorageClassOutput)
1017 return;
1019 auto &list = var.storage == StorageClassInput ? res.builtin_inputs : res.builtin_outputs;
1020 BuiltInResource resource;
1022 if (has_decoration(type.self, DecorationBlock))
1024 resource.resource = { var.self, var.basetype, type.self,
1025 get_remapped_declared_block_name(var.self, false) };
1027 for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
1029 resource.value_type_id = type.member_types[i];
1030 resource.builtin = BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn));
1031 list.push_back(resource);
1034 else
1036 bool strip_array =
1037 !has_decoration(var.self, DecorationPatch) && (
1038 get_execution_model() == ExecutionModelTessellationControl ||
1039 (get_execution_model() == ExecutionModelTessellationEvaluation &&
1040 var.storage == StorageClassInput));
1042 resource.resource = { var.self, var.basetype, type.self, get_name(var.self) };
1044 if (strip_array && !type.array.empty())
1045 resource.value_type_id = get_variable_data_type(var).parent_type;
1046 else
1047 resource.value_type_id = get_variable_data_type_id(var);
1049 assert(resource.value_type_id);
1051 resource.builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
1052 list.push_back(std::move(resource));
1054 return;
1057 // Input
1058 if (var.storage == StorageClassInput)
1060 if (has_decoration(type.self, DecorationBlock))
1062 res.stage_inputs.push_back(
1063 { var.self, var.basetype, type.self,
1064 get_remapped_declared_block_name(var.self, false) });
1066 else
1067 res.stage_inputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
1069 // Subpass inputs
1070 else if (var.storage == StorageClassUniformConstant && type.image.dim == DimSubpassData)
1072 res.subpass_inputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
1074 // Outputs
1075 else if (var.storage == StorageClassOutput)
1077 if (has_decoration(type.self, DecorationBlock))
1079 res.stage_outputs.push_back(
1080 { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) });
1082 else
1083 res.stage_outputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
1085 // UBOs
1086 else if (type.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock))
1088 res.uniform_buffers.push_back(
1089 { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) });
1091 // Old way to declare SSBOs.
1092 else if (type.storage == StorageClassUniform && has_decoration(type.self, DecorationBufferBlock))
1094 res.storage_buffers.push_back(
1095 { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, ssbo_instance_name) });
1097 // Modern way to declare SSBOs.
1098 else if (type.storage == StorageClassStorageBuffer)
1100 res.storage_buffers.push_back(
1101 { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, ssbo_instance_name) });
1103 // Push constant blocks
1104 else if (type.storage == StorageClassPushConstant)
1106 // There can only be one push constant block, but keep the vector in case this restriction is lifted
1107 // in the future.
1108 res.push_constant_buffers.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
1110 else if (type.storage == StorageClassShaderRecordBufferKHR)
1112 res.shader_record_buffers.push_back({ var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, ssbo_instance_name) });
1114 // Atomic counters
1115 else if (type.storage == StorageClassAtomicCounter)
1117 res.atomic_counters.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
1119 else if (type.storage == StorageClassUniformConstant)
1121 if (type.basetype == SPIRType::Image)
1123 // Images
1124 if (type.image.sampled == 2)
1126 res.storage_images.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
1128 // Separate images
1129 else if (type.image.sampled == 1)
1131 res.separate_images.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
1134 // Separate samplers
1135 else if (type.basetype == SPIRType::Sampler)
1137 res.separate_samplers.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
1139 // Textures
1140 else if (type.basetype == SPIRType::SampledImage)
1142 res.sampled_images.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
1144 // Acceleration structures
1145 else if (type.basetype == SPIRType::AccelerationStructure)
1147 res.acceleration_structures.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
1149 else
1151 res.gl_plain_uniforms.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
1156 return res;
1159 bool Compiler::type_is_top_level_block(const SPIRType &type) const
1161 if (type.basetype != SPIRType::Struct)
1162 return false;
1163 return has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
1166 bool Compiler::type_is_block_like(const SPIRType &type) const
1168 if (type_is_top_level_block(type))
1169 return true;
1171 if (type.basetype == SPIRType::Struct)
1173 // Block-like types may have Offset decorations.
1174 for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
1175 if (has_member_decoration(type.self, i, DecorationOffset))
1176 return true;
1179 return false;
1182 void Compiler::parse_fixup()
1184 // Figure out specialization constants for work group sizes.
1185 for (auto id_ : ir.ids_for_constant_or_variable)
1187 auto &id = ir.ids[id_];
1189 if (id.get_type() == TypeConstant)
1191 auto &c = id.get<SPIRConstant>();
1192 if (has_decoration(c.self, DecorationBuiltIn) &&
1193 BuiltIn(get_decoration(c.self, DecorationBuiltIn)) == BuiltInWorkgroupSize)
1195 // In current SPIR-V, there can be just one constant like this.
1196 // All entry points will receive the constant value.
1197 // WorkgroupSize take precedence over LocalSizeId.
1198 for (auto &entry : ir.entry_points)
1200 entry.second.workgroup_size.constant = c.self;
1201 entry.second.workgroup_size.x = c.scalar(0, 0);
1202 entry.second.workgroup_size.y = c.scalar(0, 1);
1203 entry.second.workgroup_size.z = c.scalar(0, 2);
1207 else if (id.get_type() == TypeVariable)
1209 auto &var = id.get<SPIRVariable>();
1210 if (var.storage == StorageClassPrivate || var.storage == StorageClassWorkgroup ||
1211 var.storage == StorageClassTaskPayloadWorkgroupEXT ||
1212 var.storage == StorageClassOutput)
1214 global_variables.push_back(var.self);
1216 if (variable_storage_is_aliased(var))
1217 aliased_variables.push_back(var.self);
1222 void Compiler::update_name_cache(unordered_set<string> &cache_primary, const unordered_set<string> &cache_secondary,
1223 string &name)
1225 if (name.empty())
1226 return;
1228 const auto find_name = [&](const string &n) -> bool {
1229 if (cache_primary.find(n) != end(cache_primary))
1230 return true;
1232 if (&cache_primary != &cache_secondary)
1233 if (cache_secondary.find(n) != end(cache_secondary))
1234 return true;
1236 return false;
1239 const auto insert_name = [&](const string &n) { cache_primary.insert(n); };
1241 if (!find_name(name))
1243 insert_name(name);
1244 return;
1247 uint32_t counter = 0;
1248 auto tmpname = name;
1250 bool use_linked_underscore = true;
1252 if (tmpname == "_")
1254 // We cannot just append numbers, as we will end up creating internally reserved names.
1255 // Make it like _0_<counter> instead.
1256 tmpname += "0";
1258 else if (tmpname.back() == '_')
1260 // The last_character is an underscore, so we don't need to link in underscore.
1261 // This would violate double underscore rules.
1262 use_linked_underscore = false;
1265 // If there is a collision (very rare),
1266 // keep tacking on extra identifier until it's unique.
1269 counter++;
1270 name = tmpname + (use_linked_underscore ? "_" : "") + convert_to_string(counter);
1271 } while (find_name(name));
1272 insert_name(name);
1275 void Compiler::update_name_cache(unordered_set<string> &cache, string &name)
1277 update_name_cache(cache, cache, name);
1280 void Compiler::set_name(ID id, const std::string &name)
1282 ir.set_name(id, name);
1285 const SPIRType &Compiler::get_type(TypeID id) const
1287 return get<SPIRType>(id);
1290 const SPIRType &Compiler::get_type_from_variable(VariableID id) const
1292 return get<SPIRType>(get<SPIRVariable>(id).basetype);
1295 uint32_t Compiler::get_pointee_type_id(uint32_t type_id) const
1297 auto *p_type = &get<SPIRType>(type_id);
1298 if (p_type->pointer)
1300 assert(p_type->parent_type);
1301 type_id = p_type->parent_type;
1303 return type_id;
1306 const SPIRType &Compiler::get_pointee_type(const SPIRType &type) const
1308 auto *p_type = &type;
1309 if (p_type->pointer)
1311 assert(p_type->parent_type);
1312 p_type = &get<SPIRType>(p_type->parent_type);
1314 return *p_type;
1317 const SPIRType &Compiler::get_pointee_type(uint32_t type_id) const
1319 return get_pointee_type(get<SPIRType>(type_id));
1322 uint32_t Compiler::get_variable_data_type_id(const SPIRVariable &var) const
1324 if (var.phi_variable || var.storage == spv::StorageClass::StorageClassAtomicCounter)
1325 return var.basetype;
1326 return get_pointee_type_id(var.basetype);
1329 SPIRType &Compiler::get_variable_data_type(const SPIRVariable &var)
1331 return get<SPIRType>(get_variable_data_type_id(var));
1334 const SPIRType &Compiler::get_variable_data_type(const SPIRVariable &var) const
1336 return get<SPIRType>(get_variable_data_type_id(var));
1339 SPIRType &Compiler::get_variable_element_type(const SPIRVariable &var)
1341 SPIRType *type = &get_variable_data_type(var);
1342 if (is_array(*type))
1343 type = &get<SPIRType>(type->parent_type);
1344 return *type;
1347 const SPIRType &Compiler::get_variable_element_type(const SPIRVariable &var) const
1349 const SPIRType *type = &get_variable_data_type(var);
1350 if (is_array(*type))
1351 type = &get<SPIRType>(type->parent_type);
1352 return *type;
1355 bool Compiler::is_sampled_image_type(const SPIRType &type)
1357 return (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage) && type.image.sampled == 1 &&
1358 type.image.dim != DimBuffer;
1361 void Compiler::set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration,
1362 const std::string &argument)
1364 ir.set_member_decoration_string(id, index, decoration, argument);
1367 void Compiler::set_member_decoration(TypeID id, uint32_t index, Decoration decoration, uint32_t argument)
1369 ir.set_member_decoration(id, index, decoration, argument);
1372 void Compiler::set_member_name(TypeID id, uint32_t index, const std::string &name)
1374 ir.set_member_name(id, index, name);
1377 const std::string &Compiler::get_member_name(TypeID id, uint32_t index) const
1379 return ir.get_member_name(id, index);
1382 void Compiler::set_qualified_name(uint32_t id, const string &name)
1384 ir.meta[id].decoration.qualified_alias = name;
1387 void Compiler::set_member_qualified_name(uint32_t type_id, uint32_t index, const std::string &name)
1389 ir.meta[type_id].members.resize(max(ir.meta[type_id].members.size(), size_t(index) + 1));
1390 ir.meta[type_id].members[index].qualified_alias = name;
1393 const string &Compiler::get_member_qualified_name(TypeID type_id, uint32_t index) const
1395 auto *m = ir.find_meta(type_id);
1396 if (m && index < m->members.size())
1397 return m->members[index].qualified_alias;
1398 else
1399 return ir.get_empty_string();
1402 uint32_t Compiler::get_member_decoration(TypeID id, uint32_t index, Decoration decoration) const
1404 return ir.get_member_decoration(id, index, decoration);
1407 const Bitset &Compiler::get_member_decoration_bitset(TypeID id, uint32_t index) const
1409 return ir.get_member_decoration_bitset(id, index);
1412 bool Compiler::has_member_decoration(TypeID id, uint32_t index, Decoration decoration) const
1414 return ir.has_member_decoration(id, index, decoration);
1417 void Compiler::unset_member_decoration(TypeID id, uint32_t index, Decoration decoration)
1419 ir.unset_member_decoration(id, index, decoration);
1422 void Compiler::set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument)
1424 ir.set_decoration_string(id, decoration, argument);
1427 void Compiler::set_decoration(ID id, Decoration decoration, uint32_t argument)
1429 ir.set_decoration(id, decoration, argument);
1432 void Compiler::set_extended_decoration(uint32_t id, ExtendedDecorations decoration, uint32_t value)
1434 auto &dec = ir.meta[id].decoration;
1435 dec.extended.flags.set(decoration);
1436 dec.extended.values[decoration] = value;
1439 void Compiler::set_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration,
1440 uint32_t value)
1442 ir.meta[type].members.resize(max(ir.meta[type].members.size(), size_t(index) + 1));
1443 auto &dec = ir.meta[type].members[index];
1444 dec.extended.flags.set(decoration);
1445 dec.extended.values[decoration] = value;
1448 static uint32_t get_default_extended_decoration(ExtendedDecorations decoration)
1450 switch (decoration)
1452 case SPIRVCrossDecorationResourceIndexPrimary:
1453 case SPIRVCrossDecorationResourceIndexSecondary:
1454 case SPIRVCrossDecorationResourceIndexTertiary:
1455 case SPIRVCrossDecorationResourceIndexQuaternary:
1456 case SPIRVCrossDecorationInterfaceMemberIndex:
1457 return ~(0u);
1459 default:
1460 return 0;
1464 uint32_t Compiler::get_extended_decoration(uint32_t id, ExtendedDecorations decoration) const
1466 auto *m = ir.find_meta(id);
1467 if (!m)
1468 return 0;
1470 auto &dec = m->decoration;
1472 if (!dec.extended.flags.get(decoration))
1473 return get_default_extended_decoration(decoration);
1475 return dec.extended.values[decoration];
1478 uint32_t Compiler::get_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const
1480 auto *m = ir.find_meta(type);
1481 if (!m)
1482 return 0;
1484 if (index >= m->members.size())
1485 return 0;
1487 auto &dec = m->members[index];
1488 if (!dec.extended.flags.get(decoration))
1489 return get_default_extended_decoration(decoration);
1490 return dec.extended.values[decoration];
1493 bool Compiler::has_extended_decoration(uint32_t id, ExtendedDecorations decoration) const
1495 auto *m = ir.find_meta(id);
1496 if (!m)
1497 return false;
1499 auto &dec = m->decoration;
1500 return dec.extended.flags.get(decoration);
1503 bool Compiler::has_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const
1505 auto *m = ir.find_meta(type);
1506 if (!m)
1507 return false;
1509 if (index >= m->members.size())
1510 return false;
1512 auto &dec = m->members[index];
1513 return dec.extended.flags.get(decoration);
1516 void Compiler::unset_extended_decoration(uint32_t id, ExtendedDecorations decoration)
1518 auto &dec = ir.meta[id].decoration;
1519 dec.extended.flags.clear(decoration);
1520 dec.extended.values[decoration] = 0;
1523 void Compiler::unset_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration)
1525 ir.meta[type].members.resize(max(ir.meta[type].members.size(), size_t(index) + 1));
1526 auto &dec = ir.meta[type].members[index];
1527 dec.extended.flags.clear(decoration);
1528 dec.extended.values[decoration] = 0;
1531 StorageClass Compiler::get_storage_class(VariableID id) const
1533 return get<SPIRVariable>(id).storage;
1536 const std::string &Compiler::get_name(ID id) const
1538 return ir.get_name(id);
1541 const std::string Compiler::get_fallback_name(ID id) const
1543 return join("_", id);
1546 const std::string Compiler::get_block_fallback_name(VariableID id) const
1548 auto &var = get<SPIRVariable>(id);
1549 if (get_name(id).empty())
1550 return join("_", get<SPIRType>(var.basetype).self, "_", id);
1551 else
1552 return get_name(id);
1555 const Bitset &Compiler::get_decoration_bitset(ID id) const
1557 return ir.get_decoration_bitset(id);
1560 bool Compiler::has_decoration(ID id, Decoration decoration) const
1562 return ir.has_decoration(id, decoration);
1565 const string &Compiler::get_decoration_string(ID id, Decoration decoration) const
1567 return ir.get_decoration_string(id, decoration);
1570 const string &Compiler::get_member_decoration_string(TypeID id, uint32_t index, Decoration decoration) const
1572 return ir.get_member_decoration_string(id, index, decoration);
1575 uint32_t Compiler::get_decoration(ID id, Decoration decoration) const
1577 return ir.get_decoration(id, decoration);
1580 void Compiler::unset_decoration(ID id, Decoration decoration)
1582 ir.unset_decoration(id, decoration);
1585 bool Compiler::get_binary_offset_for_decoration(VariableID id, spv::Decoration decoration, uint32_t &word_offset) const
1587 auto *m = ir.find_meta(id);
1588 if (!m)
1589 return false;
1591 auto &word_offsets = m->decoration_word_offset;
1592 auto itr = word_offsets.find(decoration);
1593 if (itr == end(word_offsets))
1594 return false;
1596 word_offset = itr->second;
1597 return true;
1600 bool Compiler::block_is_noop(const SPIRBlock &block) const
1602 if (block.terminator != SPIRBlock::Direct)
1603 return false;
1605 auto &child = get<SPIRBlock>(block.next_block);
1607 // If this block participates in PHI, the block isn't really noop.
1608 for (auto &phi : block.phi_variables)
1609 if (phi.parent == block.self || phi.parent == child.self)
1610 return false;
1612 for (auto &phi : child.phi_variables)
1613 if (phi.parent == block.self)
1614 return false;
1616 // Verify all instructions have no semantic impact.
1617 for (auto &i : block.ops)
1619 auto op = static_cast<Op>(i.op);
1621 switch (op)
1623 // Non-Semantic instructions.
1624 case OpLine:
1625 case OpNoLine:
1626 break;
1628 case OpExtInst:
1630 auto *ops = stream(i);
1631 auto ext = get<SPIRExtension>(ops[2]).ext;
1633 bool ext_is_nonsemantic_only =
1634 ext == SPIRExtension::NonSemanticShaderDebugInfo ||
1635 ext == SPIRExtension::SPV_debug_info ||
1636 ext == SPIRExtension::NonSemanticGeneric;
1638 if (!ext_is_nonsemantic_only)
1639 return false;
1641 break;
1644 default:
1645 return false;
1649 return true;
1652 bool Compiler::block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method method) const
1654 // Tried and failed.
1655 if (block.disable_block_optimization || block.complex_continue)
1656 return false;
1658 if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop)
1660 // Try to detect common for loop pattern
1661 // which the code backend can use to create cleaner code.
1662 // for(;;) { if (cond) { some_body; } else { break; } }
1663 // is the pattern we're looking for.
1664 const auto *false_block = maybe_get<SPIRBlock>(block.false_block);
1665 const auto *true_block = maybe_get<SPIRBlock>(block.true_block);
1666 const auto *merge_block = maybe_get<SPIRBlock>(block.merge_block);
1668 bool false_block_is_merge = block.false_block == block.merge_block ||
1669 (false_block && merge_block && execution_is_noop(*false_block, *merge_block));
1671 bool true_block_is_merge = block.true_block == block.merge_block ||
1672 (true_block && merge_block && execution_is_noop(*true_block, *merge_block));
1674 bool positive_candidate =
1675 block.true_block != block.merge_block && block.true_block != block.self && false_block_is_merge;
1677 bool negative_candidate =
1678 block.false_block != block.merge_block && block.false_block != block.self && true_block_is_merge;
1680 bool ret = block.terminator == SPIRBlock::Select && block.merge == SPIRBlock::MergeLoop &&
1681 (positive_candidate || negative_candidate);
1683 if (ret && positive_candidate && method == SPIRBlock::MergeToSelectContinueForLoop)
1684 ret = block.true_block == block.continue_block;
1685 else if (ret && negative_candidate && method == SPIRBlock::MergeToSelectContinueForLoop)
1686 ret = block.false_block == block.continue_block;
1688 // If we have OpPhi which depends on branches which came from our own block,
1689 // we need to flush phi variables in else block instead of a trivial break,
1690 // so we cannot assume this is a for loop candidate.
1691 if (ret)
1693 for (auto &phi : block.phi_variables)
1694 if (phi.parent == block.self)
1695 return false;
1697 auto *merge = maybe_get<SPIRBlock>(block.merge_block);
1698 if (merge)
1699 for (auto &phi : merge->phi_variables)
1700 if (phi.parent == block.self)
1701 return false;
1703 return ret;
1705 else if (method == SPIRBlock::MergeToDirectForLoop)
1707 // Empty loop header that just sets up merge target
1708 // and branches to loop body.
1709 bool ret = block.terminator == SPIRBlock::Direct && block.merge == SPIRBlock::MergeLoop && block_is_noop(block);
1711 if (!ret)
1712 return false;
1714 auto &child = get<SPIRBlock>(block.next_block);
1716 const auto *false_block = maybe_get<SPIRBlock>(child.false_block);
1717 const auto *true_block = maybe_get<SPIRBlock>(child.true_block);
1718 const auto *merge_block = maybe_get<SPIRBlock>(block.merge_block);
1720 bool false_block_is_merge = child.false_block == block.merge_block ||
1721 (false_block && merge_block && execution_is_noop(*false_block, *merge_block));
1723 bool true_block_is_merge = child.true_block == block.merge_block ||
1724 (true_block && merge_block && execution_is_noop(*true_block, *merge_block));
1726 bool positive_candidate =
1727 child.true_block != block.merge_block && child.true_block != block.self && false_block_is_merge;
1729 bool negative_candidate =
1730 child.false_block != block.merge_block && child.false_block != block.self && true_block_is_merge;
1732 ret = child.terminator == SPIRBlock::Select && child.merge == SPIRBlock::MergeNone &&
1733 (positive_candidate || negative_candidate);
1735 if (ret)
1737 auto *merge = maybe_get<SPIRBlock>(block.merge_block);
1738 if (merge)
1739 for (auto &phi : merge->phi_variables)
1740 if (phi.parent == block.self || phi.parent == child.false_block)
1741 return false;
1744 return ret;
1746 else
1747 return false;
1750 bool Compiler::execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) const
1752 if (!execution_is_branchless(from, to))
1753 return false;
1755 auto *start = &from;
1756 for (;;)
1758 if (start->self == to.self)
1759 return true;
1761 if (!block_is_noop(*start))
1762 return false;
1764 auto &next = get<SPIRBlock>(start->next_block);
1765 start = &next;
1769 bool Compiler::execution_is_branchless(const SPIRBlock &from, const SPIRBlock &to) const
1771 auto *start = &from;
1772 for (;;)
1774 if (start->self == to.self)
1775 return true;
1777 if (start->terminator == SPIRBlock::Direct && start->merge == SPIRBlock::MergeNone)
1778 start = &get<SPIRBlock>(start->next_block);
1779 else
1780 return false;
1784 bool Compiler::execution_is_direct_branch(const SPIRBlock &from, const SPIRBlock &to) const
1786 return from.terminator == SPIRBlock::Direct && from.merge == SPIRBlock::MergeNone && from.next_block == to.self;
1789 SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &block) const
1791 // The block was deemed too complex during code emit, pick conservative fallback paths.
1792 if (block.complex_continue)
1793 return SPIRBlock::ComplexLoop;
1795 // In older glslang output continue block can be equal to the loop header.
1796 // In this case, execution is clearly branchless, so just assume a while loop header here.
1797 if (block.merge == SPIRBlock::MergeLoop)
1798 return SPIRBlock::WhileLoop;
1800 if (block.loop_dominator == BlockID(SPIRBlock::NoDominator))
1802 // Continue block is never reached from CFG.
1803 return SPIRBlock::ComplexLoop;
1806 auto &dominator = get<SPIRBlock>(block.loop_dominator);
1808 if (execution_is_noop(block, dominator))
1809 return SPIRBlock::WhileLoop;
1810 else if (execution_is_branchless(block, dominator))
1811 return SPIRBlock::ForLoop;
1812 else
1814 const auto *false_block = maybe_get<SPIRBlock>(block.false_block);
1815 const auto *true_block = maybe_get<SPIRBlock>(block.true_block);
1816 const auto *merge_block = maybe_get<SPIRBlock>(dominator.merge_block);
1818 // If we need to flush Phi in this block, we cannot have a DoWhile loop.
1819 bool flush_phi_to_false = false_block && flush_phi_required(block.self, block.false_block);
1820 bool flush_phi_to_true = true_block && flush_phi_required(block.self, block.true_block);
1821 if (flush_phi_to_false || flush_phi_to_true)
1822 return SPIRBlock::ComplexLoop;
1824 bool positive_do_while = block.true_block == dominator.self &&
1825 (block.false_block == dominator.merge_block ||
1826 (false_block && merge_block && execution_is_noop(*false_block, *merge_block)));
1828 bool negative_do_while = block.false_block == dominator.self &&
1829 (block.true_block == dominator.merge_block ||
1830 (true_block && merge_block && execution_is_noop(*true_block, *merge_block)));
1832 if (block.merge == SPIRBlock::MergeNone && block.terminator == SPIRBlock::Select &&
1833 (positive_do_while || negative_do_while))
1835 return SPIRBlock::DoWhileLoop;
1837 else
1838 return SPIRBlock::ComplexLoop;
1842 const SmallVector<SPIRBlock::Case> &Compiler::get_case_list(const SPIRBlock &block) const
1844 uint32_t width = 0;
1846 // First we check if we can get the type directly from the block.condition
1847 // since it can be a SPIRConstant or a SPIRVariable.
1848 if (const auto *constant = maybe_get<SPIRConstant>(block.condition))
1850 const auto &type = get<SPIRType>(constant->constant_type);
1851 width = type.width;
1853 else if (const auto *op = maybe_get<SPIRConstantOp>(block.condition))
1855 const auto &type = get<SPIRType>(op->basetype);
1856 width = type.width;
1858 else if (const auto *var = maybe_get<SPIRVariable>(block.condition))
1860 const auto &type = get<SPIRType>(var->basetype);
1861 width = type.width;
1863 else if (const auto *undef = maybe_get<SPIRUndef>(block.condition))
1865 const auto &type = get<SPIRType>(undef->basetype);
1866 width = type.width;
1868 else
1870 auto search = ir.load_type_width.find(block.condition);
1871 if (search == ir.load_type_width.end())
1873 SPIRV_CROSS_THROW("Use of undeclared variable on a switch statement.");
1876 width = search->second;
1879 if (width > 32)
1880 return block.cases_64bit;
1882 return block.cases_32bit;
1885 bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const
1887 handler.set_current_block(block);
1888 handler.rearm_current_block(block);
1890 // Ideally, perhaps traverse the CFG instead of all blocks in order to eliminate dead blocks,
1891 // but this shouldn't be a problem in practice unless the SPIR-V is doing insane things like recursing
1892 // inside dead blocks ...
1893 for (auto &i : block.ops)
1895 auto ops = stream(i);
1896 auto op = static_cast<Op>(i.op);
1898 if (!handler.handle(op, ops, i.length))
1899 return false;
1901 if (op == OpFunctionCall)
1903 auto &func = get<SPIRFunction>(ops[2]);
1904 if (handler.follow_function_call(func))
1906 if (!handler.begin_function_scope(ops, i.length))
1907 return false;
1908 if (!traverse_all_reachable_opcodes(get<SPIRFunction>(ops[2]), handler))
1909 return false;
1910 if (!handler.end_function_scope(ops, i.length))
1911 return false;
1913 handler.rearm_current_block(block);
1918 if (!handler.handle_terminator(block))
1919 return false;
1921 return true;
1924 bool Compiler::traverse_all_reachable_opcodes(const SPIRFunction &func, OpcodeHandler &handler) const
1926 for (auto block : func.blocks)
1927 if (!traverse_all_reachable_opcodes(get<SPIRBlock>(block), handler))
1928 return false;
1930 return true;
1933 uint32_t Compiler::type_struct_member_offset(const SPIRType &type, uint32_t index) const
1935 auto *type_meta = ir.find_meta(type.self);
1936 if (type_meta)
1938 // Decoration must be set in valid SPIR-V, otherwise throw.
1939 auto &dec = type_meta->members[index];
1940 if (dec.decoration_flags.get(DecorationOffset))
1941 return dec.offset;
1942 else
1943 SPIRV_CROSS_THROW("Struct member does not have Offset set.");
1945 else
1946 SPIRV_CROSS_THROW("Struct member does not have Offset set.");
1949 uint32_t Compiler::type_struct_member_array_stride(const SPIRType &type, uint32_t index) const
1951 auto *type_meta = ir.find_meta(type.member_types[index]);
1952 if (type_meta)
1954 // Decoration must be set in valid SPIR-V, otherwise throw.
1955 // ArrayStride is part of the array type not OpMemberDecorate.
1956 auto &dec = type_meta->decoration;
1957 if (dec.decoration_flags.get(DecorationArrayStride))
1958 return dec.array_stride;
1959 else
1960 SPIRV_CROSS_THROW("Struct member does not have ArrayStride set.");
1962 else
1963 SPIRV_CROSS_THROW("Struct member does not have ArrayStride set.");
1966 uint32_t Compiler::type_struct_member_matrix_stride(const SPIRType &type, uint32_t index) const
1968 auto *type_meta = ir.find_meta(type.self);
1969 if (type_meta)
1971 // Decoration must be set in valid SPIR-V, otherwise throw.
1972 // MatrixStride is part of OpMemberDecorate.
1973 auto &dec = type_meta->members[index];
1974 if (dec.decoration_flags.get(DecorationMatrixStride))
1975 return dec.matrix_stride;
1976 else
1977 SPIRV_CROSS_THROW("Struct member does not have MatrixStride set.");
1979 else
1980 SPIRV_CROSS_THROW("Struct member does not have MatrixStride set.");
1983 size_t Compiler::get_declared_struct_size(const SPIRType &type) const
1985 if (type.member_types.empty())
1986 SPIRV_CROSS_THROW("Declared struct in block cannot be empty.");
1988 // Offsets can be declared out of order, so we need to deduce the actual size
1989 // based on last member instead.
1990 uint32_t member_index = 0;
1991 size_t highest_offset = 0;
1992 for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
1994 size_t offset = type_struct_member_offset(type, i);
1995 if (offset > highest_offset)
1997 highest_offset = offset;
1998 member_index = i;
2002 size_t size = get_declared_struct_member_size(type, member_index);
2003 return highest_offset + size;
2006 size_t Compiler::get_declared_struct_size_runtime_array(const SPIRType &type, size_t array_size) const
2008 if (type.member_types.empty())
2009 SPIRV_CROSS_THROW("Declared struct in block cannot be empty.");
2011 size_t size = get_declared_struct_size(type);
2012 auto &last_type = get<SPIRType>(type.member_types.back());
2013 if (!last_type.array.empty() && last_type.array_size_literal[0] && last_type.array[0] == 0) // Runtime array
2014 size += array_size * type_struct_member_array_stride(type, uint32_t(type.member_types.size() - 1));
2016 return size;
2019 uint32_t Compiler::evaluate_spec_constant_u32(const SPIRConstantOp &spec) const
2021 auto &result_type = get<SPIRType>(spec.basetype);
2022 if (result_type.basetype != SPIRType::UInt && result_type.basetype != SPIRType::Int &&
2023 result_type.basetype != SPIRType::Boolean)
2025 SPIRV_CROSS_THROW(
2026 "Only 32-bit integers and booleans are currently supported when evaluating specialization constants.\n");
2029 if (!is_scalar(result_type))
2030 SPIRV_CROSS_THROW("Spec constant evaluation must be a scalar.\n");
2032 uint32_t value = 0;
2034 const auto eval_u32 = [&](uint32_t id) -> uint32_t {
2035 auto &type = expression_type(id);
2036 if (type.basetype != SPIRType::UInt && type.basetype != SPIRType::Int && type.basetype != SPIRType::Boolean)
2038 SPIRV_CROSS_THROW("Only 32-bit integers and booleans are currently supported when evaluating "
2039 "specialization constants.\n");
2042 if (!is_scalar(type))
2043 SPIRV_CROSS_THROW("Spec constant evaluation must be a scalar.\n");
2044 if (const auto *c = this->maybe_get<SPIRConstant>(id))
2045 return c->scalar();
2046 else
2047 return evaluate_spec_constant_u32(this->get<SPIRConstantOp>(id));
2050 #define binary_spec_op(op, binary_op) \
2051 case Op##op: \
2052 value = eval_u32(spec.arguments[0]) binary_op eval_u32(spec.arguments[1]); \
2053 break
2054 #define binary_spec_op_cast(op, binary_op, type) \
2055 case Op##op: \
2056 value = uint32_t(type(eval_u32(spec.arguments[0])) binary_op type(eval_u32(spec.arguments[1]))); \
2057 break
2059 // Support the basic opcodes which are typically used when computing array sizes.
2060 switch (spec.opcode)
2062 binary_spec_op(IAdd, +);
2063 binary_spec_op(ISub, -);
2064 binary_spec_op(IMul, *);
2065 binary_spec_op(BitwiseAnd, &);
2066 binary_spec_op(BitwiseOr, |);
2067 binary_spec_op(BitwiseXor, ^);
2068 binary_spec_op(LogicalAnd, &);
2069 binary_spec_op(LogicalOr, |);
2070 binary_spec_op(ShiftLeftLogical, <<);
2071 binary_spec_op(ShiftRightLogical, >>);
2072 binary_spec_op_cast(ShiftRightArithmetic, >>, int32_t);
2073 binary_spec_op(LogicalEqual, ==);
2074 binary_spec_op(LogicalNotEqual, !=);
2075 binary_spec_op(IEqual, ==);
2076 binary_spec_op(INotEqual, !=);
2077 binary_spec_op(ULessThan, <);
2078 binary_spec_op(ULessThanEqual, <=);
2079 binary_spec_op(UGreaterThan, >);
2080 binary_spec_op(UGreaterThanEqual, >=);
2081 binary_spec_op_cast(SLessThan, <, int32_t);
2082 binary_spec_op_cast(SLessThanEqual, <=, int32_t);
2083 binary_spec_op_cast(SGreaterThan, >, int32_t);
2084 binary_spec_op_cast(SGreaterThanEqual, >=, int32_t);
2085 #undef binary_spec_op
2086 #undef binary_spec_op_cast
2088 case OpLogicalNot:
2089 value = uint32_t(!eval_u32(spec.arguments[0]));
2090 break;
2092 case OpNot:
2093 value = ~eval_u32(spec.arguments[0]);
2094 break;
2096 case OpSNegate:
2097 value = uint32_t(-int32_t(eval_u32(spec.arguments[0])));
2098 break;
2100 case OpSelect:
2101 value = eval_u32(spec.arguments[0]) ? eval_u32(spec.arguments[1]) : eval_u32(spec.arguments[2]);
2102 break;
2104 case OpUMod:
2106 uint32_t a = eval_u32(spec.arguments[0]);
2107 uint32_t b = eval_u32(spec.arguments[1]);
2108 if (b == 0)
2109 SPIRV_CROSS_THROW("Undefined behavior in UMod, b == 0.\n");
2110 value = a % b;
2111 break;
2114 case OpSRem:
2116 auto a = int32_t(eval_u32(spec.arguments[0]));
2117 auto b = int32_t(eval_u32(spec.arguments[1]));
2118 if (b == 0)
2119 SPIRV_CROSS_THROW("Undefined behavior in SRem, b == 0.\n");
2120 value = a % b;
2121 break;
2124 case OpSMod:
2126 auto a = int32_t(eval_u32(spec.arguments[0]));
2127 auto b = int32_t(eval_u32(spec.arguments[1]));
2128 if (b == 0)
2129 SPIRV_CROSS_THROW("Undefined behavior in SMod, b == 0.\n");
2130 auto v = a % b;
2132 // Makes sure we match the sign of b, not a.
2133 if ((b < 0 && v > 0) || (b > 0 && v < 0))
2134 v += b;
2135 value = v;
2136 break;
2139 case OpUDiv:
2141 uint32_t a = eval_u32(spec.arguments[0]);
2142 uint32_t b = eval_u32(spec.arguments[1]);
2143 if (b == 0)
2144 SPIRV_CROSS_THROW("Undefined behavior in UDiv, b == 0.\n");
2145 value = a / b;
2146 break;
2149 case OpSDiv:
2151 auto a = int32_t(eval_u32(spec.arguments[0]));
2152 auto b = int32_t(eval_u32(spec.arguments[1]));
2153 if (b == 0)
2154 SPIRV_CROSS_THROW("Undefined behavior in SDiv, b == 0.\n");
2155 value = a / b;
2156 break;
2159 default:
2160 SPIRV_CROSS_THROW("Unsupported spec constant opcode for evaluation.\n");
2163 return value;
2166 uint32_t Compiler::evaluate_constant_u32(uint32_t id) const
2168 if (const auto *c = maybe_get<SPIRConstant>(id))
2169 return c->scalar();
2170 else
2171 return evaluate_spec_constant_u32(get<SPIRConstantOp>(id));
2174 size_t Compiler::get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const
2176 if (struct_type.member_types.empty())
2177 SPIRV_CROSS_THROW("Declared struct in block cannot be empty.");
2179 auto &flags = get_member_decoration_bitset(struct_type.self, index);
2180 auto &type = get<SPIRType>(struct_type.member_types[index]);
2182 switch (type.basetype)
2184 case SPIRType::Unknown:
2185 case SPIRType::Void:
2186 case SPIRType::Boolean: // Bools are purely logical, and cannot be used for externally visible types.
2187 case SPIRType::AtomicCounter:
2188 case SPIRType::Image:
2189 case SPIRType::SampledImage:
2190 case SPIRType::Sampler:
2191 SPIRV_CROSS_THROW("Querying size for object with opaque size.");
2193 default:
2194 break;
2197 if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer)
2199 // Check if this is a top-level pointer type, and not an array of pointers.
2200 if (type.pointer_depth > get<SPIRType>(type.parent_type).pointer_depth)
2201 return 8;
2204 if (!type.array.empty())
2206 // For arrays, we can use ArrayStride to get an easy check.
2207 bool array_size_literal = type.array_size_literal.back();
2208 uint32_t array_size = array_size_literal ? type.array.back() : evaluate_constant_u32(type.array.back());
2209 return type_struct_member_array_stride(struct_type, index) * array_size;
2211 else if (type.basetype == SPIRType::Struct)
2213 return get_declared_struct_size(type);
2215 else
2217 unsigned vecsize = type.vecsize;
2218 unsigned columns = type.columns;
2220 // Vectors.
2221 if (columns == 1)
2223 size_t component_size = type.width / 8;
2224 return vecsize * component_size;
2226 else
2228 uint32_t matrix_stride = type_struct_member_matrix_stride(struct_type, index);
2230 // Per SPIR-V spec, matrices must be tightly packed and aligned up for vec3 accesses.
2231 if (flags.get(DecorationRowMajor))
2232 return matrix_stride * vecsize;
2233 else if (flags.get(DecorationColMajor))
2234 return matrix_stride * columns;
2235 else
2236 SPIRV_CROSS_THROW("Either row-major or column-major must be declared for matrices.");
2241 bool Compiler::BufferAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
2243 if (opcode != OpAccessChain && opcode != OpInBoundsAccessChain && opcode != OpPtrAccessChain)
2244 return true;
2246 bool ptr_chain = (opcode == OpPtrAccessChain);
2248 // Invalid SPIR-V.
2249 if (length < (ptr_chain ? 5u : 4u))
2250 return false;
2252 if (args[2] != id)
2253 return true;
2255 // Don't bother traversing the entire access chain tree yet.
2256 // If we access a struct member, assume we access the entire member.
2257 uint32_t index = compiler.get<SPIRConstant>(args[ptr_chain ? 4 : 3]).scalar();
2259 // Seen this index already.
2260 if (seen.find(index) != end(seen))
2261 return true;
2262 seen.insert(index);
2264 auto &type = compiler.expression_type(id);
2265 uint32_t offset = compiler.type_struct_member_offset(type, index);
2267 size_t range;
2268 // If we have another member in the struct, deduce the range by looking at the next member.
2269 // This is okay since structs in SPIR-V can have padding, but Offset decoration must be
2270 // monotonically increasing.
2271 // Of course, this doesn't take into account if the SPIR-V for some reason decided to add
2272 // very large amounts of padding, but that's not really a big deal.
2273 if (index + 1 < type.member_types.size())
2275 range = compiler.type_struct_member_offset(type, index + 1) - offset;
2277 else
2279 // No padding, so just deduce it from the size of the member directly.
2280 range = compiler.get_declared_struct_member_size(type, index);
2283 ranges.push_back({ index, offset, range });
2284 return true;
2287 SmallVector<BufferRange> Compiler::get_active_buffer_ranges(VariableID id) const
2289 SmallVector<BufferRange> ranges;
2290 BufferAccessHandler handler(*this, ranges, id);
2291 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
2292 return ranges;
2295 bool Compiler::types_are_logically_equivalent(const SPIRType &a, const SPIRType &b) const
2297 if (a.basetype != b.basetype)
2298 return false;
2299 if (a.width != b.width)
2300 return false;
2301 if (a.vecsize != b.vecsize)
2302 return false;
2303 if (a.columns != b.columns)
2304 return false;
2305 if (a.array.size() != b.array.size())
2306 return false;
2308 size_t array_count = a.array.size();
2309 if (array_count && memcmp(a.array.data(), b.array.data(), array_count * sizeof(uint32_t)) != 0)
2310 return false;
2312 if (a.basetype == SPIRType::Image || a.basetype == SPIRType::SampledImage)
2314 if (memcmp(&a.image, &b.image, sizeof(SPIRType::Image)) != 0)
2315 return false;
2318 if (a.member_types.size() != b.member_types.size())
2319 return false;
2321 size_t member_types = a.member_types.size();
2322 for (size_t i = 0; i < member_types; i++)
2324 if (!types_are_logically_equivalent(get<SPIRType>(a.member_types[i]), get<SPIRType>(b.member_types[i])))
2325 return false;
2328 return true;
2331 const Bitset &Compiler::get_execution_mode_bitset() const
2333 return get_entry_point().flags;
2336 void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t arg1, uint32_t arg2)
2338 auto &execution = get_entry_point();
2340 execution.flags.set(mode);
2341 switch (mode)
2343 case ExecutionModeLocalSize:
2344 execution.workgroup_size.x = arg0;
2345 execution.workgroup_size.y = arg1;
2346 execution.workgroup_size.z = arg2;
2347 break;
2349 case ExecutionModeLocalSizeId:
2350 execution.workgroup_size.id_x = arg0;
2351 execution.workgroup_size.id_y = arg1;
2352 execution.workgroup_size.id_z = arg2;
2353 break;
2355 case ExecutionModeInvocations:
2356 execution.invocations = arg0;
2357 break;
2359 case ExecutionModeOutputVertices:
2360 execution.output_vertices = arg0;
2361 break;
2363 case ExecutionModeOutputPrimitivesEXT:
2364 execution.output_primitives = arg0;
2365 break;
2367 default:
2368 break;
2372 void Compiler::unset_execution_mode(ExecutionMode mode)
2374 auto &execution = get_entry_point();
2375 execution.flags.clear(mode);
2378 uint32_t Compiler::get_work_group_size_specialization_constants(SpecializationConstant &x, SpecializationConstant &y,
2379 SpecializationConstant &z) const
2381 auto &execution = get_entry_point();
2382 x = { 0, 0 };
2383 y = { 0, 0 };
2384 z = { 0, 0 };
2386 // WorkgroupSize builtin takes precedence over LocalSize / LocalSizeId.
2387 if (execution.workgroup_size.constant != 0)
2389 auto &c = get<SPIRConstant>(execution.workgroup_size.constant);
2391 if (c.m.c[0].id[0] != ID(0))
2393 x.id = c.m.c[0].id[0];
2394 x.constant_id = get_decoration(c.m.c[0].id[0], DecorationSpecId);
2397 if (c.m.c[0].id[1] != ID(0))
2399 y.id = c.m.c[0].id[1];
2400 y.constant_id = get_decoration(c.m.c[0].id[1], DecorationSpecId);
2403 if (c.m.c[0].id[2] != ID(0))
2405 z.id = c.m.c[0].id[2];
2406 z.constant_id = get_decoration(c.m.c[0].id[2], DecorationSpecId);
2409 else if (execution.flags.get(ExecutionModeLocalSizeId))
2411 auto &cx = get<SPIRConstant>(execution.workgroup_size.id_x);
2412 if (cx.specialization)
2414 x.id = execution.workgroup_size.id_x;
2415 x.constant_id = get_decoration(execution.workgroup_size.id_x, DecorationSpecId);
2418 auto &cy = get<SPIRConstant>(execution.workgroup_size.id_y);
2419 if (cy.specialization)
2421 y.id = execution.workgroup_size.id_y;
2422 y.constant_id = get_decoration(execution.workgroup_size.id_y, DecorationSpecId);
2425 auto &cz = get<SPIRConstant>(execution.workgroup_size.id_z);
2426 if (cz.specialization)
2428 z.id = execution.workgroup_size.id_z;
2429 z.constant_id = get_decoration(execution.workgroup_size.id_z, DecorationSpecId);
2433 return execution.workgroup_size.constant;
2436 uint32_t Compiler::get_execution_mode_argument(spv::ExecutionMode mode, uint32_t index) const
2438 auto &execution = get_entry_point();
2439 switch (mode)
2441 case ExecutionModeLocalSizeId:
2442 if (execution.flags.get(ExecutionModeLocalSizeId))
2444 switch (index)
2446 case 0:
2447 return execution.workgroup_size.id_x;
2448 case 1:
2449 return execution.workgroup_size.id_y;
2450 case 2:
2451 return execution.workgroup_size.id_z;
2452 default:
2453 return 0;
2456 else
2457 return 0;
2459 case ExecutionModeLocalSize:
2460 switch (index)
2462 case 0:
2463 if (execution.flags.get(ExecutionModeLocalSizeId) && execution.workgroup_size.id_x != 0)
2464 return get<SPIRConstant>(execution.workgroup_size.id_x).scalar();
2465 else
2466 return execution.workgroup_size.x;
2467 case 1:
2468 if (execution.flags.get(ExecutionModeLocalSizeId) && execution.workgroup_size.id_y != 0)
2469 return get<SPIRConstant>(execution.workgroup_size.id_y).scalar();
2470 else
2471 return execution.workgroup_size.y;
2472 case 2:
2473 if (execution.flags.get(ExecutionModeLocalSizeId) && execution.workgroup_size.id_z != 0)
2474 return get<SPIRConstant>(execution.workgroup_size.id_z).scalar();
2475 else
2476 return execution.workgroup_size.z;
2477 default:
2478 return 0;
2481 case ExecutionModeInvocations:
2482 return execution.invocations;
2484 case ExecutionModeOutputVertices:
2485 return execution.output_vertices;
2487 case ExecutionModeOutputPrimitivesEXT:
2488 return execution.output_primitives;
2490 default:
2491 return 0;
2495 ExecutionModel Compiler::get_execution_model() const
2497 auto &execution = get_entry_point();
2498 return execution.model;
2501 bool Compiler::is_tessellation_shader(ExecutionModel model)
2503 return model == ExecutionModelTessellationControl || model == ExecutionModelTessellationEvaluation;
2506 bool Compiler::is_vertex_like_shader() const
2508 auto model = get_execution_model();
2509 return model == ExecutionModelVertex || model == ExecutionModelGeometry ||
2510 model == ExecutionModelTessellationControl || model == ExecutionModelTessellationEvaluation;
2513 bool Compiler::is_tessellation_shader() const
2515 return is_tessellation_shader(get_execution_model());
2518 bool Compiler::is_tessellating_triangles() const
2520 return get_execution_mode_bitset().get(ExecutionModeTriangles);
2523 void Compiler::set_remapped_variable_state(VariableID id, bool remap_enable)
2525 get<SPIRVariable>(id).remapped_variable = remap_enable;
2528 bool Compiler::get_remapped_variable_state(VariableID id) const
2530 return get<SPIRVariable>(id).remapped_variable;
2533 void Compiler::set_subpass_input_remapped_components(VariableID id, uint32_t components)
2535 get<SPIRVariable>(id).remapped_components = components;
2538 uint32_t Compiler::get_subpass_input_remapped_components(VariableID id) const
2540 return get<SPIRVariable>(id).remapped_components;
2543 void Compiler::add_implied_read_expression(SPIRExpression &e, uint32_t source)
2545 auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), ID(source));
2546 if (itr == end(e.implied_read_expressions))
2547 e.implied_read_expressions.push_back(source);
2550 void Compiler::add_implied_read_expression(SPIRAccessChain &e, uint32_t source)
2552 auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), ID(source));
2553 if (itr == end(e.implied_read_expressions))
2554 e.implied_read_expressions.push_back(source);
2557 void Compiler::add_active_interface_variable(uint32_t var_id)
2559 active_interface_variables.insert(var_id);
2561 // In SPIR-V 1.4 and up we must also track the interface variable in the entry point.
2562 if (ir.get_spirv_version() >= 0x10400)
2564 auto &vars = get_entry_point().interface_variables;
2565 if (find(begin(vars), end(vars), VariableID(var_id)) == end(vars))
2566 vars.push_back(var_id);
2570 void Compiler::inherit_expression_dependencies(uint32_t dst, uint32_t source_expression)
2572 auto *ptr_e = maybe_get<SPIRExpression>(dst);
2574 if (is_position_invariant() && ptr_e && maybe_get<SPIRExpression>(source_expression))
2576 auto &deps = ptr_e->invariance_dependencies;
2577 if (std::find(deps.begin(), deps.end(), source_expression) == deps.end())
2578 deps.push_back(source_expression);
2581 // Don't inherit any expression dependencies if the expression in dst
2582 // is not a forwarded temporary.
2583 if (forwarded_temporaries.find(dst) == end(forwarded_temporaries) ||
2584 forced_temporaries.find(dst) != end(forced_temporaries))
2586 return;
2589 auto &e = *ptr_e;
2590 auto *phi = maybe_get<SPIRVariable>(source_expression);
2591 if (phi && phi->phi_variable)
2593 // We have used a phi variable, which can change at the end of the block,
2594 // so make sure we take a dependency on this phi variable.
2595 phi->dependees.push_back(dst);
2598 auto *s = maybe_get<SPIRExpression>(source_expression);
2599 if (!s)
2600 return;
2602 auto &e_deps = e.expression_dependencies;
2603 auto &s_deps = s->expression_dependencies;
2605 // If we depend on a expression, we also depend on all sub-dependencies from source.
2606 e_deps.push_back(source_expression);
2607 e_deps.insert(end(e_deps), begin(s_deps), end(s_deps));
2609 // Eliminate duplicated dependencies.
2610 sort(begin(e_deps), end(e_deps));
2611 e_deps.erase(unique(begin(e_deps), end(e_deps)), end(e_deps));
2614 SmallVector<EntryPoint> Compiler::get_entry_points_and_stages() const
2616 SmallVector<EntryPoint> entries;
2617 for (auto &entry : ir.entry_points)
2618 entries.push_back({ entry.second.orig_name, entry.second.model });
2619 return entries;
2622 void Compiler::rename_entry_point(const std::string &old_name, const std::string &new_name, spv::ExecutionModel model)
2624 auto &entry = get_entry_point(old_name, model);
2625 entry.orig_name = new_name;
2626 entry.name = new_name;
2629 void Compiler::set_entry_point(const std::string &name, spv::ExecutionModel model)
2631 auto &entry = get_entry_point(name, model);
2632 ir.default_entry_point = entry.self;
2635 SPIREntryPoint &Compiler::get_first_entry_point(const std::string &name)
2637 auto itr = find_if(
2638 begin(ir.entry_points), end(ir.entry_points),
2639 [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool { return entry.second.orig_name == name; });
2641 if (itr == end(ir.entry_points))
2642 SPIRV_CROSS_THROW("Entry point does not exist.");
2644 return itr->second;
2647 const SPIREntryPoint &Compiler::get_first_entry_point(const std::string &name) const
2649 auto itr = find_if(
2650 begin(ir.entry_points), end(ir.entry_points),
2651 [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool { return entry.second.orig_name == name; });
2653 if (itr == end(ir.entry_points))
2654 SPIRV_CROSS_THROW("Entry point does not exist.");
2656 return itr->second;
2659 SPIREntryPoint &Compiler::get_entry_point(const std::string &name, ExecutionModel model)
2661 auto itr = find_if(begin(ir.entry_points), end(ir.entry_points),
2662 [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool {
2663 return entry.second.orig_name == name && entry.second.model == model;
2666 if (itr == end(ir.entry_points))
2667 SPIRV_CROSS_THROW("Entry point does not exist.");
2669 return itr->second;
2672 const SPIREntryPoint &Compiler::get_entry_point(const std::string &name, ExecutionModel model) const
2674 auto itr = find_if(begin(ir.entry_points), end(ir.entry_points),
2675 [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool {
2676 return entry.second.orig_name == name && entry.second.model == model;
2679 if (itr == end(ir.entry_points))
2680 SPIRV_CROSS_THROW("Entry point does not exist.");
2682 return itr->second;
2685 const string &Compiler::get_cleansed_entry_point_name(const std::string &name, ExecutionModel model) const
2687 return get_entry_point(name, model).name;
2690 const SPIREntryPoint &Compiler::get_entry_point() const
2692 return ir.entry_points.find(ir.default_entry_point)->second;
2695 SPIREntryPoint &Compiler::get_entry_point()
2697 return ir.entry_points.find(ir.default_entry_point)->second;
2700 bool Compiler::interface_variable_exists_in_entry_point(uint32_t id) const
2702 auto &var = get<SPIRVariable>(id);
2704 if (ir.get_spirv_version() < 0x10400)
2706 if (var.storage != StorageClassInput && var.storage != StorageClassOutput &&
2707 var.storage != StorageClassUniformConstant)
2708 SPIRV_CROSS_THROW("Only Input, Output variables and Uniform constants are part of a shader linking interface.");
2710 // This is to avoid potential problems with very old glslang versions which did
2711 // not emit input/output interfaces properly.
2712 // We can assume they only had a single entry point, and single entry point
2713 // shaders could easily be assumed to use every interface variable anyways.
2714 if (ir.entry_points.size() <= 1)
2715 return true;
2718 // In SPIR-V 1.4 and later, all global resource variables must be present.
2720 auto &execution = get_entry_point();
2721 return find(begin(execution.interface_variables), end(execution.interface_variables), VariableID(id)) !=
2722 end(execution.interface_variables);
2725 void Compiler::CombinedImageSamplerHandler::push_remap_parameters(const SPIRFunction &func, const uint32_t *args,
2726 uint32_t length)
2728 // If possible, pipe through a remapping table so that parameters know
2729 // which variables they actually bind to in this scope.
2730 unordered_map<uint32_t, uint32_t> remapping;
2731 for (uint32_t i = 0; i < length; i++)
2732 remapping[func.arguments[i].id] = remap_parameter(args[i]);
2733 parameter_remapping.push(std::move(remapping));
2736 void Compiler::CombinedImageSamplerHandler::pop_remap_parameters()
2738 parameter_remapping.pop();
2741 uint32_t Compiler::CombinedImageSamplerHandler::remap_parameter(uint32_t id)
2743 auto *var = compiler.maybe_get_backing_variable(id);
2744 if (var)
2745 id = var->self;
2747 if (parameter_remapping.empty())
2748 return id;
2750 auto &remapping = parameter_remapping.top();
2751 auto itr = remapping.find(id);
2752 if (itr != end(remapping))
2753 return itr->second;
2754 else
2755 return id;
2758 bool Compiler::CombinedImageSamplerHandler::begin_function_scope(const uint32_t *args, uint32_t length)
2760 if (length < 3)
2761 return false;
2763 auto &callee = compiler.get<SPIRFunction>(args[2]);
2764 args += 3;
2765 length -= 3;
2766 push_remap_parameters(callee, args, length);
2767 functions.push(&callee);
2768 return true;
2771 bool Compiler::CombinedImageSamplerHandler::end_function_scope(const uint32_t *args, uint32_t length)
2773 if (length < 3)
2774 return false;
2776 auto &callee = compiler.get<SPIRFunction>(args[2]);
2777 args += 3;
2779 // There are two types of cases we have to handle,
2780 // a callee might call sampler2D(texture2D, sampler) directly where
2781 // one or more parameters originate from parameters.
2782 // Alternatively, we need to provide combined image samplers to our callees,
2783 // and in this case we need to add those as well.
2785 pop_remap_parameters();
2787 // Our callee has now been processed at least once.
2788 // No point in doing it again.
2789 callee.do_combined_parameters = false;
2791 auto &params = functions.top()->combined_parameters;
2792 functions.pop();
2793 if (functions.empty())
2794 return true;
2796 auto &caller = *functions.top();
2797 if (caller.do_combined_parameters)
2799 for (auto &param : params)
2801 VariableID image_id = param.global_image ? param.image_id : VariableID(args[param.image_id]);
2802 VariableID sampler_id = param.global_sampler ? param.sampler_id : VariableID(args[param.sampler_id]);
2804 auto *i = compiler.maybe_get_backing_variable(image_id);
2805 auto *s = compiler.maybe_get_backing_variable(sampler_id);
2806 if (i)
2807 image_id = i->self;
2808 if (s)
2809 sampler_id = s->self;
2811 register_combined_image_sampler(caller, 0, image_id, sampler_id, param.depth);
2815 return true;
2818 void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIRFunction &caller,
2819 VariableID combined_module_id,
2820 VariableID image_id, VariableID sampler_id,
2821 bool depth)
2823 // We now have a texture ID and a sampler ID which will either be found as a global
2824 // or a parameter in our own function. If both are global, they will not need a parameter,
2825 // otherwise, add it to our list.
2826 SPIRFunction::CombinedImageSamplerParameter param = {
2827 0u, image_id, sampler_id, true, true, depth,
2830 auto texture_itr = find_if(begin(caller.arguments), end(caller.arguments),
2831 [image_id](const SPIRFunction::Parameter &p) { return p.id == image_id; });
2832 auto sampler_itr = find_if(begin(caller.arguments), end(caller.arguments),
2833 [sampler_id](const SPIRFunction::Parameter &p) { return p.id == sampler_id; });
2835 if (texture_itr != end(caller.arguments))
2837 param.global_image = false;
2838 param.image_id = uint32_t(texture_itr - begin(caller.arguments));
2841 if (sampler_itr != end(caller.arguments))
2843 param.global_sampler = false;
2844 param.sampler_id = uint32_t(sampler_itr - begin(caller.arguments));
2847 if (param.global_image && param.global_sampler)
2848 return;
2850 auto itr = find_if(begin(caller.combined_parameters), end(caller.combined_parameters),
2851 [&param](const SPIRFunction::CombinedImageSamplerParameter &p) {
2852 return param.image_id == p.image_id && param.sampler_id == p.sampler_id &&
2853 param.global_image == p.global_image && param.global_sampler == p.global_sampler;
2856 if (itr == end(caller.combined_parameters))
2858 uint32_t id = compiler.ir.increase_bound_by(3);
2859 auto type_id = id + 0;
2860 auto ptr_type_id = id + 1;
2861 auto combined_id = id + 2;
2862 auto &base = compiler.expression_type(image_id);
2863 auto &type = compiler.set<SPIRType>(type_id, OpTypeSampledImage);
2864 auto &ptr_type = compiler.set<SPIRType>(ptr_type_id, OpTypePointer);
2866 type = base;
2867 type.self = type_id;
2868 type.basetype = SPIRType::SampledImage;
2869 type.pointer = false;
2870 type.storage = StorageClassGeneric;
2871 type.image.depth = depth;
2873 ptr_type = type;
2874 ptr_type.pointer = true;
2875 ptr_type.storage = StorageClassUniformConstant;
2876 ptr_type.parent_type = type_id;
2878 // Build new variable.
2879 compiler.set<SPIRVariable>(combined_id, ptr_type_id, StorageClassFunction, 0);
2881 // Inherit RelaxedPrecision.
2882 // If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration.
2883 bool relaxed_precision =
2884 compiler.has_decoration(sampler_id, DecorationRelaxedPrecision) ||
2885 compiler.has_decoration(image_id, DecorationRelaxedPrecision) ||
2886 (combined_module_id && compiler.has_decoration(combined_module_id, DecorationRelaxedPrecision));
2888 if (relaxed_precision)
2889 compiler.set_decoration(combined_id, DecorationRelaxedPrecision);
2891 param.id = combined_id;
2893 compiler.set_name(combined_id,
2894 join("SPIRV_Cross_Combined", compiler.to_name(image_id), compiler.to_name(sampler_id)));
2896 caller.combined_parameters.push_back(param);
2897 caller.shadow_arguments.push_back({ ptr_type_id, combined_id, 0u, 0u, true });
2901 bool Compiler::DummySamplerForCombinedImageHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
2903 if (need_dummy_sampler)
2905 // No need to traverse further, we know the result.
2906 return false;
2909 switch (opcode)
2911 case OpLoad:
2913 if (length < 3)
2914 return false;
2916 uint32_t result_type = args[0];
2918 auto &type = compiler.get<SPIRType>(result_type);
2919 bool separate_image =
2920 type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer;
2922 // If not separate image, don't bother.
2923 if (!separate_image)
2924 return true;
2926 uint32_t id = args[1];
2927 uint32_t ptr = args[2];
2928 compiler.set<SPIRExpression>(id, "", result_type, true);
2929 compiler.register_read(id, ptr, true);
2930 break;
2933 case OpImageFetch:
2934 case OpImageQuerySizeLod:
2935 case OpImageQuerySize:
2936 case OpImageQueryLevels:
2937 case OpImageQuerySamples:
2939 // If we are fetching or querying LOD from a plain OpTypeImage, we must pre-combine with our dummy sampler.
2940 auto *var = compiler.maybe_get_backing_variable(args[2]);
2941 if (var)
2943 auto &type = compiler.get<SPIRType>(var->basetype);
2944 if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
2945 need_dummy_sampler = true;
2948 break;
2951 case OpInBoundsAccessChain:
2952 case OpAccessChain:
2953 case OpPtrAccessChain:
2955 if (length < 3)
2956 return false;
2958 uint32_t result_type = args[0];
2959 auto &type = compiler.get<SPIRType>(result_type);
2960 bool separate_image =
2961 type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer;
2962 if (!separate_image)
2963 return true;
2965 uint32_t id = args[1];
2966 uint32_t ptr = args[2];
2967 compiler.set<SPIRExpression>(id, "", result_type, true);
2968 compiler.register_read(id, ptr, true);
2970 // Other backends might use SPIRAccessChain for this later.
2971 compiler.ir.ids[id].set_allow_type_rewrite();
2972 break;
2975 default:
2976 break;
2979 return true;
2982 bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
2984 // We need to figure out where samplers and images are loaded from, so do only the bare bones compilation we need.
2985 bool is_fetch = false;
2987 switch (opcode)
2989 case OpLoad:
2991 if (length < 3)
2992 return false;
2994 uint32_t result_type = args[0];
2996 auto &type = compiler.get<SPIRType>(result_type);
2997 bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
2998 bool separate_sampler = type.basetype == SPIRType::Sampler;
3000 // If not separate image or sampler, don't bother.
3001 if (!separate_image && !separate_sampler)
3002 return true;
3004 uint32_t id = args[1];
3005 uint32_t ptr = args[2];
3006 compiler.set<SPIRExpression>(id, "", result_type, true);
3007 compiler.register_read(id, ptr, true);
3008 return true;
3011 case OpInBoundsAccessChain:
3012 case OpAccessChain:
3013 case OpPtrAccessChain:
3015 if (length < 3)
3016 return false;
3018 // Technically, it is possible to have arrays of textures and arrays of samplers and combine them, but this becomes essentially
3019 // impossible to implement, since we don't know which concrete sampler we are accessing.
3020 // One potential way is to create a combinatorial explosion where N textures and M samplers are combined into N * M sampler2Ds,
3021 // but this seems ridiculously complicated for a problem which is easy to work around.
3022 // Checking access chains like this assumes we don't have samplers or textures inside uniform structs, but this makes no sense.
3024 uint32_t result_type = args[0];
3026 auto &type = compiler.get<SPIRType>(result_type);
3027 bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
3028 bool separate_sampler = type.basetype == SPIRType::Sampler;
3029 if (separate_sampler)
3030 SPIRV_CROSS_THROW(
3031 "Attempting to use arrays or structs of separate samplers. This is not possible to statically "
3032 "remap to plain GLSL.");
3034 if (separate_image)
3036 uint32_t id = args[1];
3037 uint32_t ptr = args[2];
3038 compiler.set<SPIRExpression>(id, "", result_type, true);
3039 compiler.register_read(id, ptr, true);
3041 return true;
3044 case OpImageFetch:
3045 case OpImageQuerySizeLod:
3046 case OpImageQuerySize:
3047 case OpImageQueryLevels:
3048 case OpImageQuerySamples:
3050 // If we are fetching from a plain OpTypeImage or querying LOD, we must pre-combine with our dummy sampler.
3051 auto *var = compiler.maybe_get_backing_variable(args[2]);
3052 if (!var)
3053 return true;
3055 auto &type = compiler.get<SPIRType>(var->basetype);
3056 if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
3058 if (compiler.dummy_sampler_id == 0)
3059 SPIRV_CROSS_THROW("texelFetch without sampler was found, but no dummy sampler has been created with "
3060 "build_dummy_sampler_for_combined_images().");
3062 // Do it outside.
3063 is_fetch = true;
3064 break;
3067 return true;
3070 case OpSampledImage:
3071 // Do it outside.
3072 break;
3074 default:
3075 return true;
3078 // Registers sampler2D calls used in case they are parameters so
3079 // that their callees know which combined image samplers to propagate down the call stack.
3080 if (!functions.empty())
3082 auto &callee = *functions.top();
3083 if (callee.do_combined_parameters)
3085 uint32_t image_id = args[2];
3087 auto *image = compiler.maybe_get_backing_variable(image_id);
3088 if (image)
3089 image_id = image->self;
3091 uint32_t sampler_id = is_fetch ? compiler.dummy_sampler_id : args[3];
3092 auto *sampler = compiler.maybe_get_backing_variable(sampler_id);
3093 if (sampler)
3094 sampler_id = sampler->self;
3096 uint32_t combined_id = args[1];
3098 auto &combined_type = compiler.get<SPIRType>(args[0]);
3099 register_combined_image_sampler(callee, combined_id, image_id, sampler_id, combined_type.image.depth);
3103 // For function calls, we need to remap IDs which are function parameters into global variables.
3104 // This information is statically known from the current place in the call stack.
3105 // Function parameters are not necessarily pointers, so if we don't have a backing variable, remapping will know
3106 // which backing variable the image/sample came from.
3107 VariableID image_id = remap_parameter(args[2]);
3108 VariableID sampler_id = is_fetch ? compiler.dummy_sampler_id : remap_parameter(args[3]);
3110 auto itr = find_if(begin(compiler.combined_image_samplers), end(compiler.combined_image_samplers),
3111 [image_id, sampler_id](const CombinedImageSampler &combined) {
3112 return combined.image_id == image_id && combined.sampler_id == sampler_id;
3115 if (itr == end(compiler.combined_image_samplers))
3117 uint32_t sampled_type;
3118 uint32_t combined_module_id;
3119 if (is_fetch)
3121 // Have to invent the sampled image type.
3122 sampled_type = compiler.ir.increase_bound_by(1);
3123 auto &type = compiler.set<SPIRType>(sampled_type, OpTypeSampledImage);
3124 type = compiler.expression_type(args[2]);
3125 type.self = sampled_type;
3126 type.basetype = SPIRType::SampledImage;
3127 type.image.depth = false;
3128 combined_module_id = 0;
3130 else
3132 sampled_type = args[0];
3133 combined_module_id = args[1];
3136 auto id = compiler.ir.increase_bound_by(2);
3137 auto type_id = id + 0;
3138 auto combined_id = id + 1;
3140 // Make a new type, pointer to OpTypeSampledImage, so we can make a variable of this type.
3141 // We will probably have this type lying around, but it doesn't hurt to make duplicates for internal purposes.
3142 auto &type = compiler.set<SPIRType>(type_id, OpTypePointer);
3143 auto &base = compiler.get<SPIRType>(sampled_type);
3144 type = base;
3145 type.pointer = true;
3146 type.storage = StorageClassUniformConstant;
3147 type.parent_type = type_id;
3149 // Build new variable.
3150 compiler.set<SPIRVariable>(combined_id, type_id, StorageClassUniformConstant, 0);
3152 // Inherit RelaxedPrecision (and potentially other useful flags if deemed relevant).
3153 // If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration.
3154 bool relaxed_precision =
3155 (sampler_id && compiler.has_decoration(sampler_id, DecorationRelaxedPrecision)) ||
3156 (image_id && compiler.has_decoration(image_id, DecorationRelaxedPrecision)) ||
3157 (combined_module_id && compiler.has_decoration(combined_module_id, DecorationRelaxedPrecision));
3159 if (relaxed_precision)
3160 compiler.set_decoration(combined_id, DecorationRelaxedPrecision);
3162 // Propagate the array type for the original image as well.
3163 auto *var = compiler.maybe_get_backing_variable(image_id);
3164 if (var)
3166 auto &parent_type = compiler.get<SPIRType>(var->basetype);
3167 type.array = parent_type.array;
3168 type.array_size_literal = parent_type.array_size_literal;
3171 compiler.combined_image_samplers.push_back({ combined_id, image_id, sampler_id });
3174 return true;
3177 VariableID Compiler::build_dummy_sampler_for_combined_images()
3179 DummySamplerForCombinedImageHandler handler(*this);
3180 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
3181 if (handler.need_dummy_sampler)
3183 uint32_t offset = ir.increase_bound_by(3);
3184 auto type_id = offset + 0;
3185 auto ptr_type_id = offset + 1;
3186 auto var_id = offset + 2;
3188 auto &sampler = set<SPIRType>(type_id, OpTypeSampler);
3189 sampler.basetype = SPIRType::Sampler;
3191 auto &ptr_sampler = set<SPIRType>(ptr_type_id, OpTypePointer);
3192 ptr_sampler = sampler;
3193 ptr_sampler.self = type_id;
3194 ptr_sampler.storage = StorageClassUniformConstant;
3195 ptr_sampler.pointer = true;
3196 ptr_sampler.parent_type = type_id;
3198 set<SPIRVariable>(var_id, ptr_type_id, StorageClassUniformConstant, 0);
3199 set_name(var_id, "SPIRV_Cross_DummySampler");
3200 dummy_sampler_id = var_id;
3201 return var_id;
3203 else
3204 return 0;
3207 void Compiler::build_combined_image_samplers()
3209 ir.for_each_typed_id<SPIRFunction>([&](uint32_t, SPIRFunction &func) {
3210 func.combined_parameters.clear();
3211 func.shadow_arguments.clear();
3212 func.do_combined_parameters = true;
3215 combined_image_samplers.clear();
3216 CombinedImageSamplerHandler handler(*this);
3217 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
3220 SmallVector<SpecializationConstant> Compiler::get_specialization_constants() const
3222 SmallVector<SpecializationConstant> spec_consts;
3223 ir.for_each_typed_id<SPIRConstant>([&](uint32_t, const SPIRConstant &c) {
3224 if (c.specialization && has_decoration(c.self, DecorationSpecId))
3225 spec_consts.push_back({ c.self, get_decoration(c.self, DecorationSpecId) });
3227 return spec_consts;
3230 SPIRConstant &Compiler::get_constant(ConstantID id)
3232 return get<SPIRConstant>(id);
3235 const SPIRConstant &Compiler::get_constant(ConstantID id) const
3237 return get<SPIRConstant>(id);
3240 static bool exists_unaccessed_path_to_return(const CFG &cfg, uint32_t block, const unordered_set<uint32_t> &blocks,
3241 unordered_set<uint32_t> &visit_cache)
3243 // This block accesses the variable.
3244 if (blocks.find(block) != end(blocks))
3245 return false;
3247 // We are at the end of the CFG.
3248 if (cfg.get_succeeding_edges(block).empty())
3249 return true;
3251 // If any of our successors have a path to the end, there exists a path from block.
3252 for (auto &succ : cfg.get_succeeding_edges(block))
3254 if (visit_cache.count(succ) == 0)
3256 if (exists_unaccessed_path_to_return(cfg, succ, blocks, visit_cache))
3257 return true;
3258 visit_cache.insert(succ);
3262 return false;
3265 void Compiler::analyze_parameter_preservation(
3266 SPIRFunction &entry, const CFG &cfg, const unordered_map<uint32_t, unordered_set<uint32_t>> &variable_to_blocks,
3267 const unordered_map<uint32_t, unordered_set<uint32_t>> &complete_write_blocks)
3269 for (auto &arg : entry.arguments)
3271 // Non-pointers are always inputs.
3272 auto &type = get<SPIRType>(arg.type);
3273 if (!type.pointer)
3274 continue;
3276 // Opaque argument types are always in
3277 bool potential_preserve;
3278 switch (type.basetype)
3280 case SPIRType::Sampler:
3281 case SPIRType::Image:
3282 case SPIRType::SampledImage:
3283 case SPIRType::AtomicCounter:
3284 potential_preserve = false;
3285 break;
3287 default:
3288 potential_preserve = true;
3289 break;
3292 if (!potential_preserve)
3293 continue;
3295 auto itr = variable_to_blocks.find(arg.id);
3296 if (itr == end(variable_to_blocks))
3298 // Variable is never accessed.
3299 continue;
3302 // We have accessed a variable, but there was no complete writes to that variable.
3303 // We deduce that we must preserve the argument.
3304 itr = complete_write_blocks.find(arg.id);
3305 if (itr == end(complete_write_blocks))
3307 arg.read_count++;
3308 continue;
3311 // If there is a path through the CFG where no block completely writes to the variable, the variable will be in an undefined state
3312 // when the function returns. We therefore need to implicitly preserve the variable in case there are writers in the function.
3313 // Major case here is if a function is
3314 // void foo(int &var) { if (cond) var = 10; }
3315 // Using read/write counts, we will think it's just an out variable, but it really needs to be inout,
3316 // because if we don't write anything whatever we put into the function must return back to the caller.
3317 unordered_set<uint32_t> visit_cache;
3318 if (exists_unaccessed_path_to_return(cfg, entry.entry_block, itr->second, visit_cache))
3319 arg.read_count++;
3323 Compiler::AnalyzeVariableScopeAccessHandler::AnalyzeVariableScopeAccessHandler(Compiler &compiler_,
3324 SPIRFunction &entry_)
3325 : compiler(compiler_)
3326 , entry(entry_)
3330 bool Compiler::AnalyzeVariableScopeAccessHandler::follow_function_call(const SPIRFunction &)
3332 // Only analyze within this function.
3333 return false;
3336 void Compiler::AnalyzeVariableScopeAccessHandler::set_current_block(const SPIRBlock &block)
3338 current_block = &block;
3340 // If we're branching to a block which uses OpPhi, in GLSL
3341 // this will be a variable write when we branch,
3342 // so we need to track access to these variables as well to
3343 // have a complete picture.
3344 const auto test_phi = [this, &block](uint32_t to) {
3345 auto &next = compiler.get<SPIRBlock>(to);
3346 for (auto &phi : next.phi_variables)
3348 if (phi.parent == block.self)
3350 accessed_variables_to_block[phi.function_variable].insert(block.self);
3351 // Phi variables are also accessed in our target branch block.
3352 accessed_variables_to_block[phi.function_variable].insert(next.self);
3354 notify_variable_access(phi.local_variable, block.self);
3359 switch (block.terminator)
3361 case SPIRBlock::Direct:
3362 notify_variable_access(block.condition, block.self);
3363 test_phi(block.next_block);
3364 break;
3366 case SPIRBlock::Select:
3367 notify_variable_access(block.condition, block.self);
3368 test_phi(block.true_block);
3369 test_phi(block.false_block);
3370 break;
3372 case SPIRBlock::MultiSelect:
3374 notify_variable_access(block.condition, block.self);
3375 auto &cases = compiler.get_case_list(block);
3376 for (auto &target : cases)
3377 test_phi(target.block);
3378 if (block.default_block)
3379 test_phi(block.default_block);
3380 break;
3383 default:
3384 break;
3388 void Compiler::AnalyzeVariableScopeAccessHandler::notify_variable_access(uint32_t id, uint32_t block)
3390 if (id == 0)
3391 return;
3393 // Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers.
3394 auto itr = rvalue_forward_children.find(id);
3395 if (itr != end(rvalue_forward_children))
3396 for (auto child_id : itr->second)
3397 notify_variable_access(child_id, block);
3399 if (id_is_phi_variable(id))
3400 accessed_variables_to_block[id].insert(block);
3401 else if (id_is_potential_temporary(id))
3402 accessed_temporaries_to_block[id].insert(block);
3405 bool Compiler::AnalyzeVariableScopeAccessHandler::id_is_phi_variable(uint32_t id) const
3407 if (id >= compiler.get_current_id_bound())
3408 return false;
3409 auto *var = compiler.maybe_get<SPIRVariable>(id);
3410 return var && var->phi_variable;
3413 bool Compiler::AnalyzeVariableScopeAccessHandler::id_is_potential_temporary(uint32_t id) const
3415 if (id >= compiler.get_current_id_bound())
3416 return false;
3418 // Temporaries are not created before we start emitting code.
3419 return compiler.ir.ids[id].empty() || (compiler.ir.ids[id].get_type() == TypeExpression);
3422 bool Compiler::AnalyzeVariableScopeAccessHandler::handle_terminator(const SPIRBlock &block)
3424 switch (block.terminator)
3426 case SPIRBlock::Return:
3427 if (block.return_value)
3428 notify_variable_access(block.return_value, block.self);
3429 break;
3431 case SPIRBlock::Select:
3432 case SPIRBlock::MultiSelect:
3433 notify_variable_access(block.condition, block.self);
3434 break;
3436 default:
3437 break;
3440 return true;
3443 bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint32_t *args, uint32_t length)
3445 // Keep track of the types of temporaries, so we can hoist them out as necessary.
3446 uint32_t result_type = 0, result_id = 0;
3447 if (compiler.instruction_to_result_type(result_type, result_id, op, args, length))
3449 // For some opcodes, we will need to override the result id.
3450 // If we need to hoist the temporary, the temporary type is the input, not the result.
3451 if (op == OpConvertUToAccelerationStructureKHR)
3453 auto itr = result_id_to_type.find(args[2]);
3454 if (itr != result_id_to_type.end())
3455 result_type = itr->second;
3458 result_id_to_type[result_id] = result_type;
3461 switch (op)
3463 case OpStore:
3465 if (length < 2)
3466 return false;
3468 ID ptr = args[0];
3469 auto *var = compiler.maybe_get_backing_variable(ptr);
3471 // If we store through an access chain, we have a partial write.
3472 if (var)
3474 accessed_variables_to_block[var->self].insert(current_block->self);
3475 if (var->self == ptr)
3476 complete_write_variables_to_block[var->self].insert(current_block->self);
3477 else
3478 partial_write_variables_to_block[var->self].insert(current_block->self);
3481 // args[0] might be an access chain we have to track use of.
3482 notify_variable_access(args[0], current_block->self);
3483 // Might try to store a Phi variable here.
3484 notify_variable_access(args[1], current_block->self);
3485 break;
3488 case OpAccessChain:
3489 case OpInBoundsAccessChain:
3490 case OpPtrAccessChain:
3492 if (length < 3)
3493 return false;
3495 // Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers.
3496 uint32_t ptr = args[2];
3497 auto *var = compiler.maybe_get<SPIRVariable>(ptr);
3498 if (var)
3500 accessed_variables_to_block[var->self].insert(current_block->self);
3501 rvalue_forward_children[args[1]].insert(var->self);
3504 // args[2] might be another access chain we have to track use of.
3505 for (uint32_t i = 2; i < length; i++)
3507 notify_variable_access(args[i], current_block->self);
3508 rvalue_forward_children[args[1]].insert(args[i]);
3511 // Also keep track of the access chain pointer itself.
3512 // In exceptionally rare cases, we can end up with a case where
3513 // the access chain is generated in the loop body, but is consumed in continue block.
3514 // This means we need complex loop workarounds, and we must detect this via CFG analysis.
3515 notify_variable_access(args[1], current_block->self);
3517 // The result of an access chain is a fixed expression and is not really considered a temporary.
3518 auto &e = compiler.set<SPIRExpression>(args[1], "", args[0], true);
3519 auto *backing_variable = compiler.maybe_get_backing_variable(ptr);
3520 e.loaded_from = backing_variable ? VariableID(backing_variable->self) : VariableID(0);
3522 // Other backends might use SPIRAccessChain for this later.
3523 compiler.ir.ids[args[1]].set_allow_type_rewrite();
3524 access_chain_expressions.insert(args[1]);
3525 break;
3528 case OpCopyMemory:
3530 if (length < 2)
3531 return false;
3533 ID lhs = args[0];
3534 ID rhs = args[1];
3535 auto *var = compiler.maybe_get_backing_variable(lhs);
3537 // If we store through an access chain, we have a partial write.
3538 if (var)
3540 accessed_variables_to_block[var->self].insert(current_block->self);
3541 if (var->self == lhs)
3542 complete_write_variables_to_block[var->self].insert(current_block->self);
3543 else
3544 partial_write_variables_to_block[var->self].insert(current_block->self);
3547 // args[0:1] might be access chains we have to track use of.
3548 for (uint32_t i = 0; i < 2; i++)
3549 notify_variable_access(args[i], current_block->self);
3551 var = compiler.maybe_get_backing_variable(rhs);
3552 if (var)
3553 accessed_variables_to_block[var->self].insert(current_block->self);
3554 break;
3557 case OpCopyObject:
3559 // OpCopyObject copies the underlying non-pointer type,
3560 // so any temp variable should be declared using the underlying type.
3561 // If the type is a pointer, get its base type and overwrite the result type mapping.
3562 auto &type = compiler.get<SPIRType>(result_type);
3563 if (type.pointer)
3564 result_id_to_type[result_id] = type.parent_type;
3566 if (length < 3)
3567 return false;
3569 auto *var = compiler.maybe_get_backing_variable(args[2]);
3570 if (var)
3571 accessed_variables_to_block[var->self].insert(current_block->self);
3573 // Might be an access chain which we have to keep track of.
3574 notify_variable_access(args[1], current_block->self);
3575 if (access_chain_expressions.count(args[2]))
3576 access_chain_expressions.insert(args[1]);
3578 // Might try to copy a Phi variable here.
3579 notify_variable_access(args[2], current_block->self);
3580 break;
3583 case OpLoad:
3585 if (length < 3)
3586 return false;
3587 uint32_t ptr = args[2];
3588 auto *var = compiler.maybe_get_backing_variable(ptr);
3589 if (var)
3590 accessed_variables_to_block[var->self].insert(current_block->self);
3592 // Loaded value is a temporary.
3593 notify_variable_access(args[1], current_block->self);
3595 // Might be an access chain we have to track use of.
3596 notify_variable_access(args[2], current_block->self);
3598 // If we're loading an opaque type we cannot lower it to a temporary,
3599 // we must defer access of args[2] until it's used.
3600 auto &type = compiler.get<SPIRType>(args[0]);
3601 if (compiler.type_is_opaque_value(type))
3602 rvalue_forward_children[args[1]].insert(args[2]);
3603 break;
3606 case OpFunctionCall:
3608 if (length < 3)
3609 return false;
3611 // Return value may be a temporary.
3612 if (compiler.get_type(args[0]).basetype != SPIRType::Void)
3613 notify_variable_access(args[1], current_block->self);
3615 length -= 3;
3616 args += 3;
3618 for (uint32_t i = 0; i < length; i++)
3620 auto *var = compiler.maybe_get_backing_variable(args[i]);
3621 if (var)
3623 accessed_variables_to_block[var->self].insert(current_block->self);
3624 // Assume we can get partial writes to this variable.
3625 partial_write_variables_to_block[var->self].insert(current_block->self);
3628 // Cannot easily prove if argument we pass to a function is completely written.
3629 // Usually, functions write to a dummy variable,
3630 // which is then copied to in full to the real argument.
3632 // Might try to copy a Phi variable here.
3633 notify_variable_access(args[i], current_block->self);
3635 break;
3638 case OpSelect:
3640 // In case of variable pointers, we might access a variable here.
3641 // We cannot prove anything about these accesses however.
3642 for (uint32_t i = 1; i < length; i++)
3644 if (i >= 3)
3646 auto *var = compiler.maybe_get_backing_variable(args[i]);
3647 if (var)
3649 accessed_variables_to_block[var->self].insert(current_block->self);
3650 // Assume we can get partial writes to this variable.
3651 partial_write_variables_to_block[var->self].insert(current_block->self);
3655 // Might try to copy a Phi variable here.
3656 notify_variable_access(args[i], current_block->self);
3658 break;
3661 case OpExtInst:
3663 for (uint32_t i = 4; i < length; i++)
3664 notify_variable_access(args[i], current_block->self);
3665 notify_variable_access(args[1], current_block->self);
3667 uint32_t extension_set = args[2];
3668 if (compiler.get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
3670 auto op_450 = static_cast<GLSLstd450>(args[3]);
3671 switch (op_450)
3673 case GLSLstd450Modf:
3674 case GLSLstd450Frexp:
3676 uint32_t ptr = args[5];
3677 auto *var = compiler.maybe_get_backing_variable(ptr);
3678 if (var)
3680 accessed_variables_to_block[var->self].insert(current_block->self);
3681 if (var->self == ptr)
3682 complete_write_variables_to_block[var->self].insert(current_block->self);
3683 else
3684 partial_write_variables_to_block[var->self].insert(current_block->self);
3686 break;
3689 default:
3690 break;
3693 break;
3696 case OpArrayLength:
3697 // Only result is a temporary.
3698 notify_variable_access(args[1], current_block->self);
3699 break;
3701 case OpLine:
3702 case OpNoLine:
3703 // Uses literals, but cannot be a phi variable or temporary, so ignore.
3704 break;
3706 // Atomics shouldn't be able to access function-local variables.
3707 // Some GLSL builtins access a pointer.
3709 case OpCompositeInsert:
3710 case OpVectorShuffle:
3711 // Specialize for opcode which contains literals.
3712 for (uint32_t i = 1; i < 4; i++)
3713 notify_variable_access(args[i], current_block->self);
3714 break;
3716 case OpCompositeExtract:
3717 // Specialize for opcode which contains literals.
3718 for (uint32_t i = 1; i < 3; i++)
3719 notify_variable_access(args[i], current_block->self);
3720 break;
3722 case OpImageWrite:
3723 for (uint32_t i = 0; i < length; i++)
3725 // Argument 3 is a literal.
3726 if (i != 3)
3727 notify_variable_access(args[i], current_block->self);
3729 break;
3731 case OpImageSampleImplicitLod:
3732 case OpImageSampleExplicitLod:
3733 case OpImageSparseSampleImplicitLod:
3734 case OpImageSparseSampleExplicitLod:
3735 case OpImageSampleProjImplicitLod:
3736 case OpImageSampleProjExplicitLod:
3737 case OpImageSparseSampleProjImplicitLod:
3738 case OpImageSparseSampleProjExplicitLod:
3739 case OpImageFetch:
3740 case OpImageSparseFetch:
3741 case OpImageRead:
3742 case OpImageSparseRead:
3743 for (uint32_t i = 1; i < length; i++)
3745 // Argument 4 is a literal.
3746 if (i != 4)
3747 notify_variable_access(args[i], current_block->self);
3749 break;
3751 case OpImageSampleDrefImplicitLod:
3752 case OpImageSampleDrefExplicitLod:
3753 case OpImageSparseSampleDrefImplicitLod:
3754 case OpImageSparseSampleDrefExplicitLod:
3755 case OpImageSampleProjDrefImplicitLod:
3756 case OpImageSampleProjDrefExplicitLod:
3757 case OpImageSparseSampleProjDrefImplicitLod:
3758 case OpImageSparseSampleProjDrefExplicitLod:
3759 case OpImageGather:
3760 case OpImageSparseGather:
3761 case OpImageDrefGather:
3762 case OpImageSparseDrefGather:
3763 for (uint32_t i = 1; i < length; i++)
3765 // Argument 5 is a literal.
3766 if (i != 5)
3767 notify_variable_access(args[i], current_block->self);
3769 break;
3771 default:
3773 // Rather dirty way of figuring out where Phi variables are used.
3774 // As long as only IDs are used, we can scan through instructions and try to find any evidence that
3775 // the ID of a variable has been used.
3776 // There are potential false positives here where a literal is used in-place of an ID,
3777 // but worst case, it does not affect the correctness of the compile.
3778 // Exhaustive analysis would be better here, but it's not worth it for now.
3779 for (uint32_t i = 0; i < length; i++)
3780 notify_variable_access(args[i], current_block->self);
3781 break;
3784 return true;
3787 Compiler::StaticExpressionAccessHandler::StaticExpressionAccessHandler(Compiler &compiler_, uint32_t variable_id_)
3788 : compiler(compiler_)
3789 , variable_id(variable_id_)
3793 bool Compiler::StaticExpressionAccessHandler::follow_function_call(const SPIRFunction &)
3795 return false;
3798 bool Compiler::StaticExpressionAccessHandler::handle(spv::Op op, const uint32_t *args, uint32_t length)
3800 switch (op)
3802 case OpStore:
3803 if (length < 2)
3804 return false;
3805 if (args[0] == variable_id)
3807 static_expression = args[1];
3808 write_count++;
3810 break;
3812 case OpLoad:
3813 if (length < 3)
3814 return false;
3815 if (args[2] == variable_id && static_expression == 0) // Tried to read from variable before it was initialized.
3816 return false;
3817 break;
3819 case OpAccessChain:
3820 case OpInBoundsAccessChain:
3821 case OpPtrAccessChain:
3822 if (length < 3)
3823 return false;
3824 if (args[2] == variable_id) // If we try to access chain our candidate variable before we store to it, bail.
3825 return false;
3826 break;
3828 default:
3829 break;
3832 return true;
3835 void Compiler::find_function_local_luts(SPIRFunction &entry, const AnalyzeVariableScopeAccessHandler &handler,
3836 bool single_function)
3838 auto &cfg = *function_cfgs.find(entry.self)->second;
3840 // For each variable which is statically accessed.
3841 for (auto &accessed_var : handler.accessed_variables_to_block)
3843 auto &blocks = accessed_var.second;
3844 auto &var = get<SPIRVariable>(accessed_var.first);
3845 auto &type = expression_type(accessed_var.first);
3847 // First check if there are writes to the variable. Later, if there are none, we'll
3848 // reconsider it as globally accessed LUT.
3849 if (!var.is_written_to)
3851 var.is_written_to = handler.complete_write_variables_to_block.count(var.self) != 0 ||
3852 handler.partial_write_variables_to_block.count(var.self) != 0;
3855 // Only consider function local variables here.
3856 // If we only have a single function in our CFG, private storage is also fine,
3857 // since it behaves like a function local variable.
3858 bool allow_lut = var.storage == StorageClassFunction || (single_function && var.storage == StorageClassPrivate);
3859 if (!allow_lut)
3860 continue;
3862 // We cannot be a phi variable.
3863 if (var.phi_variable)
3864 continue;
3866 // Only consider arrays here.
3867 if (type.array.empty())
3868 continue;
3870 // If the variable has an initializer, make sure it is a constant expression.
3871 uint32_t static_constant_expression = 0;
3872 if (var.initializer)
3874 if (ir.ids[var.initializer].get_type() != TypeConstant)
3875 continue;
3876 static_constant_expression = var.initializer;
3878 // There can be no stores to this variable, we have now proved we have a LUT.
3879 if (var.is_written_to)
3880 continue;
3882 else
3884 // We can have one, and only one write to the variable, and that write needs to be a constant.
3886 // No partial writes allowed.
3887 if (handler.partial_write_variables_to_block.count(var.self) != 0)
3888 continue;
3890 auto itr = handler.complete_write_variables_to_block.find(var.self);
3892 // No writes?
3893 if (itr == end(handler.complete_write_variables_to_block))
3894 continue;
3896 // We write to the variable in more than one block.
3897 auto &write_blocks = itr->second;
3898 if (write_blocks.size() != 1)
3899 continue;
3901 // The write needs to happen in the dominating block.
3902 DominatorBuilder builder(cfg);
3903 for (auto &block : blocks)
3904 builder.add_block(block);
3905 uint32_t dominator = builder.get_dominator();
3907 // The complete write happened in a branch or similar, cannot deduce static expression.
3908 if (write_blocks.count(dominator) == 0)
3909 continue;
3911 // Find the static expression for this variable.
3912 StaticExpressionAccessHandler static_expression_handler(*this, var.self);
3913 traverse_all_reachable_opcodes(get<SPIRBlock>(dominator), static_expression_handler);
3915 // We want one, and exactly one write
3916 if (static_expression_handler.write_count != 1 || static_expression_handler.static_expression == 0)
3917 continue;
3919 // Is it a constant expression?
3920 if (ir.ids[static_expression_handler.static_expression].get_type() != TypeConstant)
3921 continue;
3923 // We found a LUT!
3924 static_constant_expression = static_expression_handler.static_expression;
3927 get<SPIRConstant>(static_constant_expression).is_used_as_lut = true;
3928 var.static_expression = static_constant_expression;
3929 var.statically_assigned = true;
3930 var.remapped_variable = true;
3934 void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeAccessHandler &handler)
3936 // First, we map out all variable access within a function.
3937 // Essentially a map of block -> { variables accessed in the basic block }
3938 traverse_all_reachable_opcodes(entry, handler);
3940 auto &cfg = *function_cfgs.find(entry.self)->second;
3942 // Analyze if there are parameters which need to be implicitly preserved with an "in" qualifier.
3943 analyze_parameter_preservation(entry, cfg, handler.accessed_variables_to_block,
3944 handler.complete_write_variables_to_block);
3946 unordered_map<uint32_t, uint32_t> potential_loop_variables;
3948 // Find the loop dominator block for each block.
3949 for (auto &block_id : entry.blocks)
3951 auto &block = get<SPIRBlock>(block_id);
3953 auto itr = ir.continue_block_to_loop_header.find(block_id);
3954 if (itr != end(ir.continue_block_to_loop_header) && itr->second != block_id)
3956 // Continue block might be unreachable in the CFG, but we still like to know the loop dominator.
3957 // Edge case is when continue block is also the loop header, don't set the dominator in this case.
3958 block.loop_dominator = itr->second;
3960 else
3962 uint32_t loop_dominator = cfg.find_loop_dominator(block_id);
3963 if (loop_dominator != block_id)
3964 block.loop_dominator = loop_dominator;
3965 else
3966 block.loop_dominator = SPIRBlock::NoDominator;
3970 // For each variable which is statically accessed.
3971 for (auto &var : handler.accessed_variables_to_block)
3973 // Only deal with variables which are considered local variables in this function.
3974 if (find(begin(entry.local_variables), end(entry.local_variables), VariableID(var.first)) ==
3975 end(entry.local_variables))
3976 continue;
3978 DominatorBuilder builder(cfg);
3979 auto &blocks = var.second;
3980 auto &type = expression_type(var.first);
3981 BlockID potential_continue_block = 0;
3983 // Figure out which block is dominating all accesses of those variables.
3984 for (auto &block : blocks)
3986 // If we're accessing a variable inside a continue block, this variable might be a loop variable.
3987 // We can only use loop variables with scalars, as we cannot track static expressions for vectors.
3988 if (is_continue(block))
3990 // Potentially awkward case to check for.
3991 // We might have a variable inside a loop, which is touched by the continue block,
3992 // but is not actually a loop variable.
3993 // The continue block is dominated by the inner part of the loop, which does not make sense in high-level
3994 // language output because it will be declared before the body,
3995 // so we will have to lift the dominator up to the relevant loop header instead.
3996 builder.add_block(ir.continue_block_to_loop_header[block]);
3998 // Arrays or structs cannot be loop variables.
3999 if (type.vecsize == 1 && type.columns == 1 && type.basetype != SPIRType::Struct && type.array.empty())
4001 // The variable is used in multiple continue blocks, this is not a loop
4002 // candidate, signal that by setting block to -1u.
4003 if (potential_continue_block == 0)
4004 potential_continue_block = block;
4005 else
4006 potential_continue_block = ~(0u);
4010 builder.add_block(block);
4013 builder.lift_continue_block_dominator();
4015 // Add it to a per-block list of variables.
4016 BlockID dominating_block = builder.get_dominator();
4018 if (dominating_block && potential_continue_block != 0 && potential_continue_block != ~0u)
4020 auto &inner_block = get<SPIRBlock>(dominating_block);
4022 BlockID merge_candidate = 0;
4024 // Analyze the dominator. If it lives in a different loop scope than the candidate continue
4025 // block, reject the loop variable candidate.
4026 if (inner_block.merge == SPIRBlock::MergeLoop)
4027 merge_candidate = inner_block.merge_block;
4028 else if (inner_block.loop_dominator != SPIRBlock::NoDominator)
4029 merge_candidate = get<SPIRBlock>(inner_block.loop_dominator).merge_block;
4031 if (merge_candidate != 0 && cfg.is_reachable(merge_candidate))
4033 // If the merge block has a higher post-visit order, we know that continue candidate
4034 // cannot reach the merge block, and we have two separate scopes.
4035 if (!cfg.is_reachable(potential_continue_block) ||
4036 cfg.get_visit_order(merge_candidate) > cfg.get_visit_order(potential_continue_block))
4038 potential_continue_block = 0;
4043 if (potential_continue_block != 0 && potential_continue_block != ~0u)
4044 potential_loop_variables[var.first] = potential_continue_block;
4046 // For variables whose dominating block is inside a loop, there is a risk that these variables
4047 // actually need to be preserved across loop iterations. We can express this by adding
4048 // a "read" access to the loop header.
4049 // In the dominating block, we must see an OpStore or equivalent as the first access of an OpVariable.
4050 // Should that fail, we look for the outermost loop header and tack on an access there.
4051 // Phi nodes cannot have this problem.
4052 if (dominating_block)
4054 auto &variable = get<SPIRVariable>(var.first);
4055 if (!variable.phi_variable)
4057 auto *block = &get<SPIRBlock>(dominating_block);
4058 bool preserve = may_read_undefined_variable_in_block(*block, var.first);
4059 if (preserve)
4061 // Find the outermost loop scope.
4062 while (block->loop_dominator != BlockID(SPIRBlock::NoDominator))
4063 block = &get<SPIRBlock>(block->loop_dominator);
4065 if (block->self != dominating_block)
4067 builder.add_block(block->self);
4068 dominating_block = builder.get_dominator();
4074 // If all blocks here are dead code, this will be 0, so the variable in question
4075 // will be completely eliminated.
4076 if (dominating_block)
4078 auto &block = get<SPIRBlock>(dominating_block);
4079 block.dominated_variables.push_back(var.first);
4080 get<SPIRVariable>(var.first).dominator = dominating_block;
4084 for (auto &var : handler.accessed_temporaries_to_block)
4086 auto itr = handler.result_id_to_type.find(var.first);
4088 if (itr == end(handler.result_id_to_type))
4090 // We found a false positive ID being used, ignore.
4091 // This should probably be an assert.
4092 continue;
4095 // There is no point in doing domination analysis for opaque types.
4096 auto &type = get<SPIRType>(itr->second);
4097 if (type_is_opaque_value(type))
4098 continue;
4100 DominatorBuilder builder(cfg);
4101 bool force_temporary = false;
4102 bool used_in_header_hoisted_continue_block = false;
4104 // Figure out which block is dominating all accesses of those temporaries.
4105 auto &blocks = var.second;
4106 for (auto &block : blocks)
4108 builder.add_block(block);
4110 if (blocks.size() != 1 && is_continue(block))
4112 // The risk here is that inner loop can dominate the continue block.
4113 // Any temporary we access in the continue block must be declared before the loop.
4114 // This is moot for complex loops however.
4115 auto &loop_header_block = get<SPIRBlock>(ir.continue_block_to_loop_header[block]);
4116 assert(loop_header_block.merge == SPIRBlock::MergeLoop);
4117 builder.add_block(loop_header_block.self);
4118 used_in_header_hoisted_continue_block = true;
4122 uint32_t dominating_block = builder.get_dominator();
4124 if (blocks.size() != 1 && is_single_block_loop(dominating_block))
4126 // Awkward case, because the loop header is also the continue block,
4127 // so hoisting to loop header does not help.
4128 force_temporary = true;
4131 if (dominating_block)
4133 // If we touch a variable in the dominating block, this is the expected setup.
4134 // SPIR-V normally mandates this, but we have extra cases for temporary use inside loops.
4135 bool first_use_is_dominator = blocks.count(dominating_block) != 0;
4137 if (!first_use_is_dominator || force_temporary)
4139 if (handler.access_chain_expressions.count(var.first))
4141 // Exceptionally rare case.
4142 // We cannot declare temporaries of access chains (except on MSL perhaps with pointers).
4143 // Rather than do that, we force the indexing expressions to be declared in the right scope by
4144 // tracking their usage to that end. There is no temporary to hoist.
4145 // However, we still need to observe declaration order of the access chain.
4147 if (used_in_header_hoisted_continue_block)
4149 // For this scenario, we used an access chain inside a continue block where we also registered an access to header block.
4150 // This is a problem as we need to declare an access chain properly first with full definition.
4151 // We cannot use temporaries for these expressions,
4152 // so we must make sure the access chain is declared ahead of time.
4153 // Force a complex for loop to deal with this.
4154 // TODO: Out-of-order declaring for loops where continue blocks are emitted last might be another option.
4155 auto &loop_header_block = get<SPIRBlock>(dominating_block);
4156 assert(loop_header_block.merge == SPIRBlock::MergeLoop);
4157 loop_header_block.complex_continue = true;
4160 else
4162 // This should be very rare, but if we try to declare a temporary inside a loop,
4163 // and that temporary is used outside the loop as well (spirv-opt inliner likes this)
4164 // we should actually emit the temporary outside the loop.
4165 hoisted_temporaries.insert(var.first);
4166 forced_temporaries.insert(var.first);
4168 auto &block_temporaries = get<SPIRBlock>(dominating_block).declare_temporary;
4169 block_temporaries.emplace_back(handler.result_id_to_type[var.first], var.first);
4172 else if (blocks.size() > 1)
4174 // Keep track of the temporary as we might have to declare this temporary.
4175 // This can happen if the loop header dominates a temporary, but we have a complex fallback loop.
4176 // In this case, the header is actually inside the for (;;) {} block, and we have problems.
4177 // What we need to do is hoist the temporaries outside the for (;;) {} block in case the header block
4178 // declares the temporary.
4179 auto &block_temporaries = get<SPIRBlock>(dominating_block).potential_declare_temporary;
4180 block_temporaries.emplace_back(handler.result_id_to_type[var.first], var.first);
4185 unordered_set<uint32_t> seen_blocks;
4187 // Now, try to analyze whether or not these variables are actually loop variables.
4188 for (auto &loop_variable : potential_loop_variables)
4190 auto &var = get<SPIRVariable>(loop_variable.first);
4191 auto dominator = var.dominator;
4192 BlockID block = loop_variable.second;
4194 // The variable was accessed in multiple continue blocks, ignore.
4195 if (block == BlockID(~(0u)) || block == BlockID(0))
4196 continue;
4198 // Dead code.
4199 if (dominator == ID(0))
4200 continue;
4202 BlockID header = 0;
4204 // Find the loop header for this block if we are a continue block.
4206 auto itr = ir.continue_block_to_loop_header.find(block);
4207 if (itr != end(ir.continue_block_to_loop_header))
4209 header = itr->second;
4211 else if (get<SPIRBlock>(block).continue_block == block)
4213 // Also check for self-referential continue block.
4214 header = block;
4218 assert(header);
4219 auto &header_block = get<SPIRBlock>(header);
4220 auto &blocks = handler.accessed_variables_to_block[loop_variable.first];
4222 // If a loop variable is not used before the loop, it's probably not a loop variable.
4223 bool has_accessed_variable = blocks.count(header) != 0;
4225 // Now, there are two conditions we need to meet for the variable to be a loop variable.
4226 // 1. The dominating block must have a branch-free path to the loop header,
4227 // this way we statically know which expression should be part of the loop variable initializer.
4229 // Walk from the dominator, if there is one straight edge connecting
4230 // dominator and loop header, we statically know the loop initializer.
4231 bool static_loop_init = true;
4232 while (dominator != header)
4234 if (blocks.count(dominator) != 0)
4235 has_accessed_variable = true;
4237 auto &succ = cfg.get_succeeding_edges(dominator);
4238 if (succ.size() != 1)
4240 static_loop_init = false;
4241 break;
4244 auto &pred = cfg.get_preceding_edges(succ.front());
4245 if (pred.size() != 1 || pred.front() != dominator)
4247 static_loop_init = false;
4248 break;
4251 dominator = succ.front();
4254 if (!static_loop_init || !has_accessed_variable)
4255 continue;
4257 // The second condition we need to meet is that no access after the loop
4258 // merge can occur. Walk the CFG to see if we find anything.
4260 seen_blocks.clear();
4261 cfg.walk_from(seen_blocks, header_block.merge_block, [&](uint32_t walk_block) -> bool {
4262 // We found a block which accesses the variable outside the loop.
4263 if (blocks.find(walk_block) != end(blocks))
4264 static_loop_init = false;
4265 return true;
4268 if (!static_loop_init)
4269 continue;
4271 // We have a loop variable.
4272 header_block.loop_variables.push_back(loop_variable.first);
4273 // Need to sort here as variables come from an unordered container, and pushing stuff in wrong order
4274 // will break reproducability in regression runs.
4275 sort(begin(header_block.loop_variables), end(header_block.loop_variables));
4276 get<SPIRVariable>(loop_variable.first).loop_variable = true;
4280 bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var)
4282 for (auto &op : block.ops)
4284 auto *ops = stream(op);
4285 switch (op.op)
4287 case OpStore:
4288 case OpCopyMemory:
4289 if (ops[0] == var)
4290 return false;
4291 break;
4293 case OpAccessChain:
4294 case OpInBoundsAccessChain:
4295 case OpPtrAccessChain:
4296 // Access chains are generally used to partially read and write. It's too hard to analyze
4297 // if all constituents are written fully before continuing, so just assume it's preserved.
4298 // This is the same as the parameter preservation analysis.
4299 if (ops[2] == var)
4300 return true;
4301 break;
4303 case OpSelect:
4304 // Variable pointers.
4305 // We might read before writing.
4306 if (ops[3] == var || ops[4] == var)
4307 return true;
4308 break;
4310 case OpPhi:
4312 // Variable pointers.
4313 // We might read before writing.
4314 if (op.length < 2)
4315 break;
4317 uint32_t count = op.length - 2;
4318 for (uint32_t i = 0; i < count; i += 2)
4319 if (ops[i + 2] == var)
4320 return true;
4321 break;
4324 case OpCopyObject:
4325 case OpLoad:
4326 if (ops[2] == var)
4327 return true;
4328 break;
4330 case OpFunctionCall:
4332 if (op.length < 3)
4333 break;
4335 // May read before writing.
4336 uint32_t count = op.length - 3;
4337 for (uint32_t i = 0; i < count; i++)
4338 if (ops[i + 3] == var)
4339 return true;
4340 break;
4343 default:
4344 break;
4348 // Not accessed somehow, at least not in a usual fashion.
4349 // It's likely accessed in a branch, so assume we must preserve.
4350 return true;
4353 Bitset Compiler::get_buffer_block_flags(VariableID id) const
4355 return ir.get_buffer_block_flags(get<SPIRVariable>(id));
4358 bool Compiler::get_common_basic_type(const SPIRType &type, SPIRType::BaseType &base_type)
4360 if (type.basetype == SPIRType::Struct)
4362 base_type = SPIRType::Unknown;
4363 for (auto &member_type : type.member_types)
4365 SPIRType::BaseType member_base;
4366 if (!get_common_basic_type(get<SPIRType>(member_type), member_base))
4367 return false;
4369 if (base_type == SPIRType::Unknown)
4370 base_type = member_base;
4371 else if (base_type != member_base)
4372 return false;
4374 return true;
4376 else
4378 base_type = type.basetype;
4379 return true;
4383 void Compiler::ActiveBuiltinHandler::handle_builtin(const SPIRType &type, BuiltIn builtin,
4384 const Bitset &decoration_flags)
4386 // If used, we will need to explicitly declare a new array size for these builtins.
4388 if (builtin == BuiltInClipDistance)
4390 if (!type.array_size_literal[0])
4391 SPIRV_CROSS_THROW("Array size for ClipDistance must be a literal.");
4392 uint32_t array_size = type.array[0];
4393 if (array_size == 0)
4394 SPIRV_CROSS_THROW("Array size for ClipDistance must not be unsized.");
4395 compiler.clip_distance_count = array_size;
4397 else if (builtin == BuiltInCullDistance)
4399 if (!type.array_size_literal[0])
4400 SPIRV_CROSS_THROW("Array size for CullDistance must be a literal.");
4401 uint32_t array_size = type.array[0];
4402 if (array_size == 0)
4403 SPIRV_CROSS_THROW("Array size for CullDistance must not be unsized.");
4404 compiler.cull_distance_count = array_size;
4406 else if (builtin == BuiltInPosition)
4408 if (decoration_flags.get(DecorationInvariant))
4409 compiler.position_invariant = true;
4413 void Compiler::ActiveBuiltinHandler::add_if_builtin(uint32_t id, bool allow_blocks)
4415 // Only handle plain variables here.
4416 // Builtins which are part of a block are handled in AccessChain.
4417 // If allow_blocks is used however, this is to handle initializers of blocks,
4418 // which implies that all members are written to.
4420 auto *var = compiler.maybe_get<SPIRVariable>(id);
4421 auto *m = compiler.ir.find_meta(id);
4422 if (var && m)
4424 auto &type = compiler.get<SPIRType>(var->basetype);
4425 auto &decorations = m->decoration;
4426 auto &flags = type.storage == StorageClassInput ?
4427 compiler.active_input_builtins : compiler.active_output_builtins;
4428 if (decorations.builtin)
4430 flags.set(decorations.builtin_type);
4431 handle_builtin(type, decorations.builtin_type, decorations.decoration_flags);
4433 else if (allow_blocks && compiler.has_decoration(type.self, DecorationBlock))
4435 uint32_t member_count = uint32_t(type.member_types.size());
4436 for (uint32_t i = 0; i < member_count; i++)
4438 if (compiler.has_member_decoration(type.self, i, DecorationBuiltIn))
4440 auto &member_type = compiler.get<SPIRType>(type.member_types[i]);
4441 BuiltIn builtin = BuiltIn(compiler.get_member_decoration(type.self, i, DecorationBuiltIn));
4442 flags.set(builtin);
4443 handle_builtin(member_type, builtin, compiler.get_member_decoration_bitset(type.self, i));
4450 void Compiler::ActiveBuiltinHandler::add_if_builtin(uint32_t id)
4452 add_if_builtin(id, false);
4455 void Compiler::ActiveBuiltinHandler::add_if_builtin_or_block(uint32_t id)
4457 add_if_builtin(id, true);
4460 bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args, uint32_t length)
4462 switch (opcode)
4464 case OpStore:
4465 if (length < 1)
4466 return false;
4468 add_if_builtin(args[0]);
4469 break;
4471 case OpCopyMemory:
4472 if (length < 2)
4473 return false;
4475 add_if_builtin(args[0]);
4476 add_if_builtin(args[1]);
4477 break;
4479 case OpCopyObject:
4480 case OpLoad:
4481 if (length < 3)
4482 return false;
4484 add_if_builtin(args[2]);
4485 break;
4487 case OpSelect:
4488 if (length < 5)
4489 return false;
4491 add_if_builtin(args[3]);
4492 add_if_builtin(args[4]);
4493 break;
4495 case OpPhi:
4497 if (length < 2)
4498 return false;
4500 uint32_t count = length - 2;
4501 args += 2;
4502 for (uint32_t i = 0; i < count; i += 2)
4503 add_if_builtin(args[i]);
4504 break;
4507 case OpFunctionCall:
4509 if (length < 3)
4510 return false;
4512 uint32_t count = length - 3;
4513 args += 3;
4514 for (uint32_t i = 0; i < count; i++)
4515 add_if_builtin(args[i]);
4516 break;
4519 case OpAccessChain:
4520 case OpInBoundsAccessChain:
4521 case OpPtrAccessChain:
4523 if (length < 4)
4524 return false;
4526 // Only consider global variables, cannot consider variables in functions yet, or other
4527 // access chains as they have not been created yet.
4528 auto *var = compiler.maybe_get<SPIRVariable>(args[2]);
4529 if (!var)
4530 break;
4532 // Required if we access chain into builtins like gl_GlobalInvocationID.
4533 add_if_builtin(args[2]);
4535 // Start traversing type hierarchy at the proper non-pointer types.
4536 auto *type = &compiler.get_variable_data_type(*var);
4538 auto &flags =
4539 var->storage == StorageClassInput ? compiler.active_input_builtins : compiler.active_output_builtins;
4541 uint32_t count = length - 3;
4542 args += 3;
4543 for (uint32_t i = 0; i < count; i++)
4545 // Pointers
4546 // PtrAccessChain functions more like a pointer offset. Type remains the same.
4547 if (opcode == OpPtrAccessChain && i == 0)
4548 continue;
4550 // Arrays
4551 if (!type->array.empty())
4553 type = &compiler.get<SPIRType>(type->parent_type);
4555 // Structs
4556 else if (type->basetype == SPIRType::Struct)
4558 uint32_t index = compiler.get<SPIRConstant>(args[i]).scalar();
4560 if (index < uint32_t(compiler.ir.meta[type->self].members.size()))
4562 auto &decorations = compiler.ir.meta[type->self].members[index];
4563 if (decorations.builtin)
4565 flags.set(decorations.builtin_type);
4566 handle_builtin(compiler.get<SPIRType>(type->member_types[index]), decorations.builtin_type,
4567 decorations.decoration_flags);
4571 type = &compiler.get<SPIRType>(type->member_types[index]);
4573 else
4575 // No point in traversing further. We won't find any extra builtins.
4576 break;
4579 break;
4582 default:
4583 break;
4586 return true;
4589 void Compiler::update_active_builtins()
4591 active_input_builtins.reset();
4592 active_output_builtins.reset();
4593 cull_distance_count = 0;
4594 clip_distance_count = 0;
4595 ActiveBuiltinHandler handler(*this);
4596 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
4598 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
4599 if (var.storage != StorageClassOutput)
4600 return;
4601 if (!interface_variable_exists_in_entry_point(var.self))
4602 return;
4604 // Also, make sure we preserve output variables which are only initialized, but never accessed by any code.
4605 if (var.initializer != ID(0))
4606 handler.add_if_builtin_or_block(var.self);
4610 // Returns whether this shader uses a builtin of the storage class
4611 bool Compiler::has_active_builtin(BuiltIn builtin, StorageClass storage) const
4613 const Bitset *flags;
4614 switch (storage)
4616 case StorageClassInput:
4617 flags = &active_input_builtins;
4618 break;
4619 case StorageClassOutput:
4620 flags = &active_output_builtins;
4621 break;
4623 default:
4624 return false;
4626 return flags->get(builtin);
4629 void Compiler::analyze_image_and_sampler_usage()
4631 CombinedImageSamplerDrefHandler dref_handler(*this);
4632 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), dref_handler);
4634 CombinedImageSamplerUsageHandler handler(*this, dref_handler.dref_combined_samplers);
4635 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
4637 // Need to run this traversal twice. First time, we propagate any comparison sampler usage from leaf functions
4638 // down to main().
4639 // In the second pass, we can propagate up forced depth state coming from main() up into leaf functions.
4640 handler.dependency_hierarchy.clear();
4641 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
4643 comparison_ids = std::move(handler.comparison_ids);
4644 need_subpass_input = handler.need_subpass_input;
4645 need_subpass_input_ms = handler.need_subpass_input_ms;
4647 // Forward information from separate images and samplers into combined image samplers.
4648 for (auto &combined : combined_image_samplers)
4649 if (comparison_ids.count(combined.sampler_id))
4650 comparison_ids.insert(combined.combined_id);
4653 bool Compiler::CombinedImageSamplerDrefHandler::handle(spv::Op opcode, const uint32_t *args, uint32_t)
4655 // Mark all sampled images which are used with Dref.
4656 switch (opcode)
4658 case OpImageSampleDrefExplicitLod:
4659 case OpImageSampleDrefImplicitLod:
4660 case OpImageSampleProjDrefExplicitLod:
4661 case OpImageSampleProjDrefImplicitLod:
4662 case OpImageSparseSampleProjDrefImplicitLod:
4663 case OpImageSparseSampleDrefImplicitLod:
4664 case OpImageSparseSampleProjDrefExplicitLod:
4665 case OpImageSparseSampleDrefExplicitLod:
4666 case OpImageDrefGather:
4667 case OpImageSparseDrefGather:
4668 dref_combined_samplers.insert(args[2]);
4669 return true;
4671 default:
4672 break;
4675 return true;
4678 const CFG &Compiler::get_cfg_for_current_function() const
4680 assert(current_function);
4681 return get_cfg_for_function(current_function->self);
4684 const CFG &Compiler::get_cfg_for_function(uint32_t id) const
4686 auto cfg_itr = function_cfgs.find(id);
4687 assert(cfg_itr != end(function_cfgs));
4688 assert(cfg_itr->second);
4689 return *cfg_itr->second;
4692 void Compiler::build_function_control_flow_graphs_and_analyze()
4694 CFGBuilder handler(*this);
4695 handler.function_cfgs[ir.default_entry_point].reset(new CFG(*this, get<SPIRFunction>(ir.default_entry_point)));
4696 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
4697 function_cfgs = std::move(handler.function_cfgs);
4698 bool single_function = function_cfgs.size() <= 1;
4700 for (auto &f : function_cfgs)
4702 auto &func = get<SPIRFunction>(f.first);
4703 AnalyzeVariableScopeAccessHandler scope_handler(*this, func);
4704 analyze_variable_scope(func, scope_handler);
4705 find_function_local_luts(func, scope_handler, single_function);
4707 // Check if we can actually use the loop variables we found in analyze_variable_scope.
4708 // To use multiple initializers, we need the same type and qualifiers.
4709 for (auto block : func.blocks)
4711 auto &b = get<SPIRBlock>(block);
4712 if (b.loop_variables.size() < 2)
4713 continue;
4715 auto &flags = get_decoration_bitset(b.loop_variables.front());
4716 uint32_t type = get<SPIRVariable>(b.loop_variables.front()).basetype;
4717 bool invalid_initializers = false;
4718 for (auto loop_variable : b.loop_variables)
4720 if (flags != get_decoration_bitset(loop_variable) ||
4721 type != get<SPIRVariable>(b.loop_variables.front()).basetype)
4723 invalid_initializers = true;
4724 break;
4728 if (invalid_initializers)
4730 for (auto loop_variable : b.loop_variables)
4731 get<SPIRVariable>(loop_variable).loop_variable = false;
4732 b.loop_variables.clear();
4737 // Find LUTs which are not function local. Only consider this case if the CFG is multi-function,
4738 // otherwise we treat Private as Function trivially.
4739 // Needs to be analyzed from the outside since we have to block the LUT optimization if at least
4740 // one function writes to it.
4741 if (!single_function)
4743 for (auto &id : global_variables)
4745 auto &var = get<SPIRVariable>(id);
4746 auto &type = get_variable_data_type(var);
4748 if (is_array(type) && var.storage == StorageClassPrivate &&
4749 var.initializer && !var.is_written_to &&
4750 ir.ids[var.initializer].get_type() == TypeConstant)
4752 get<SPIRConstant>(var.initializer).is_used_as_lut = true;
4753 var.static_expression = var.initializer;
4754 var.statically_assigned = true;
4755 var.remapped_variable = true;
4761 Compiler::CFGBuilder::CFGBuilder(Compiler &compiler_)
4762 : compiler(compiler_)
4766 bool Compiler::CFGBuilder::handle(spv::Op, const uint32_t *, uint32_t)
4768 return true;
4771 bool Compiler::CFGBuilder::follow_function_call(const SPIRFunction &func)
4773 if (function_cfgs.find(func.self) == end(function_cfgs))
4775 function_cfgs[func.self].reset(new CFG(compiler, func));
4776 return true;
4778 else
4779 return false;
4782 void Compiler::CombinedImageSamplerUsageHandler::add_dependency(uint32_t dst, uint32_t src)
4784 dependency_hierarchy[dst].insert(src);
4785 // Propagate up any comparison state if we're loading from one such variable.
4786 if (comparison_ids.count(src))
4787 comparison_ids.insert(dst);
4790 bool Compiler::CombinedImageSamplerUsageHandler::begin_function_scope(const uint32_t *args, uint32_t length)
4792 if (length < 3)
4793 return false;
4795 auto &func = compiler.get<SPIRFunction>(args[2]);
4796 const auto *arg = &args[3];
4797 length -= 3;
4799 for (uint32_t i = 0; i < length; i++)
4801 auto &argument = func.arguments[i];
4802 add_dependency(argument.id, arg[i]);
4805 return true;
4808 void Compiler::CombinedImageSamplerUsageHandler::add_hierarchy_to_comparison_ids(uint32_t id)
4810 // Traverse the variable dependency hierarchy and tag everything in its path with comparison ids.
4811 comparison_ids.insert(id);
4813 for (auto &dep_id : dependency_hierarchy[id])
4814 add_hierarchy_to_comparison_ids(dep_id);
4817 bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
4819 switch (opcode)
4821 case OpAccessChain:
4822 case OpInBoundsAccessChain:
4823 case OpPtrAccessChain:
4824 case OpLoad:
4826 if (length < 3)
4827 return false;
4829 add_dependency(args[1], args[2]);
4831 // Ideally defer this to OpImageRead, but then we'd need to track loaded IDs.
4832 // If we load an image, we're going to use it and there is little harm in declaring an unused gl_FragCoord.
4833 auto &type = compiler.get<SPIRType>(args[0]);
4834 if (type.image.dim == DimSubpassData)
4836 need_subpass_input = true;
4837 if (type.image.ms)
4838 need_subpass_input_ms = true;
4841 // If we load a SampledImage and it will be used with Dref, propagate the state up.
4842 if (dref_combined_samplers.count(args[1]) != 0)
4843 add_hierarchy_to_comparison_ids(args[1]);
4844 break;
4847 case OpSampledImage:
4849 if (length < 4)
4850 return false;
4852 // If the underlying resource has been used for comparison then duplicate loads of that resource must be too.
4853 // This image must be a depth image.
4854 uint32_t result_id = args[1];
4855 uint32_t image = args[2];
4856 uint32_t sampler = args[3];
4858 if (dref_combined_samplers.count(result_id) != 0)
4860 add_hierarchy_to_comparison_ids(image);
4862 // This sampler must be a SamplerComparisonState, and not a regular SamplerState.
4863 add_hierarchy_to_comparison_ids(sampler);
4865 // Mark the OpSampledImage itself as being comparison state.
4866 comparison_ids.insert(result_id);
4868 return true;
4871 default:
4872 break;
4875 return true;
4878 bool Compiler::buffer_is_hlsl_counter_buffer(VariableID id) const
4880 auto *m = ir.find_meta(id);
4881 return m && m->hlsl_is_magic_counter_buffer;
4884 bool Compiler::buffer_get_hlsl_counter_buffer(VariableID id, uint32_t &counter_id) const
4886 auto *m = ir.find_meta(id);
4888 // First, check for the proper decoration.
4889 if (m && m->hlsl_magic_counter_buffer != 0)
4891 counter_id = m->hlsl_magic_counter_buffer;
4892 return true;
4894 else
4895 return false;
4898 void Compiler::make_constant_null(uint32_t id, uint32_t type)
4900 auto &constant_type = get<SPIRType>(type);
4902 if (constant_type.pointer)
4904 auto &constant = set<SPIRConstant>(id, type);
4905 constant.make_null(constant_type);
4907 else if (!constant_type.array.empty())
4909 assert(constant_type.parent_type);
4910 uint32_t parent_id = ir.increase_bound_by(1);
4911 make_constant_null(parent_id, constant_type.parent_type);
4913 if (!constant_type.array_size_literal.back())
4914 SPIRV_CROSS_THROW("Array size of OpConstantNull must be a literal.");
4916 SmallVector<uint32_t> elements(constant_type.array.back());
4917 for (uint32_t i = 0; i < constant_type.array.back(); i++)
4918 elements[i] = parent_id;
4919 set<SPIRConstant>(id, type, elements.data(), uint32_t(elements.size()), false);
4921 else if (!constant_type.member_types.empty())
4923 uint32_t member_ids = ir.increase_bound_by(uint32_t(constant_type.member_types.size()));
4924 SmallVector<uint32_t> elements(constant_type.member_types.size());
4925 for (uint32_t i = 0; i < constant_type.member_types.size(); i++)
4927 make_constant_null(member_ids + i, constant_type.member_types[i]);
4928 elements[i] = member_ids + i;
4930 set<SPIRConstant>(id, type, elements.data(), uint32_t(elements.size()), false);
4932 else
4934 auto &constant = set<SPIRConstant>(id, type);
4935 constant.make_null(constant_type);
4939 const SmallVector<spv::Capability> &Compiler::get_declared_capabilities() const
4941 return ir.declared_capabilities;
4944 const SmallVector<std::string> &Compiler::get_declared_extensions() const
4946 return ir.declared_extensions;
4949 std::string Compiler::get_remapped_declared_block_name(VariableID id) const
4951 return get_remapped_declared_block_name(id, false);
4954 std::string Compiler::get_remapped_declared_block_name(uint32_t id, bool fallback_prefer_instance_name) const
4956 auto itr = declared_block_names.find(id);
4957 if (itr != end(declared_block_names))
4959 return itr->second;
4961 else
4963 auto &var = get<SPIRVariable>(id);
4965 if (fallback_prefer_instance_name)
4967 return to_name(var.self);
4969 else
4971 auto &type = get<SPIRType>(var.basetype);
4972 auto *type_meta = ir.find_meta(type.self);
4973 auto *block_name = type_meta ? &type_meta->decoration.alias : nullptr;
4974 return (!block_name || block_name->empty()) ? get_block_fallback_name(id) : *block_name;
4979 bool Compiler::reflection_ssbo_instance_name_is_significant() const
4981 if (ir.source.known)
4983 // UAVs from HLSL source tend to be declared in a way where the type is reused
4984 // but the instance name is significant, and that's the name we should report.
4985 // For GLSL, SSBOs each have their own block type as that's how GLSL is written.
4986 return ir.source.hlsl;
4989 unordered_set<uint32_t> ssbo_type_ids;
4990 bool aliased_ssbo_types = false;
4992 // If we don't have any OpSource information, we need to perform some shaky heuristics.
4993 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
4994 auto &type = this->get<SPIRType>(var.basetype);
4995 if (!type.pointer || var.storage == StorageClassFunction)
4996 return;
4998 bool ssbo = var.storage == StorageClassStorageBuffer ||
4999 (var.storage == StorageClassUniform && has_decoration(type.self, DecorationBufferBlock));
5001 if (ssbo)
5003 if (ssbo_type_ids.count(type.self))
5004 aliased_ssbo_types = true;
5005 else
5006 ssbo_type_ids.insert(type.self);
5010 // If the block name is aliased, assume we have HLSL-style UAV declarations.
5011 return aliased_ssbo_types;
5014 bool Compiler::instruction_to_result_type(uint32_t &result_type, uint32_t &result_id, spv::Op op,
5015 const uint32_t *args, uint32_t length)
5017 if (length < 2)
5018 return false;
5020 bool has_result_id = false, has_result_type = false;
5021 HasResultAndType(op, &has_result_id, &has_result_type);
5022 if (has_result_id && has_result_type)
5024 result_type = args[0];
5025 result_id = args[1];
5026 return true;
5028 else
5029 return false;
5032 Bitset Compiler::combined_decoration_for_member(const SPIRType &type, uint32_t index) const
5034 Bitset flags;
5035 auto *type_meta = ir.find_meta(type.self);
5037 if (type_meta)
5039 auto &members = type_meta->members;
5040 if (index >= members.size())
5041 return flags;
5042 auto &dec = members[index];
5044 flags.merge_or(dec.decoration_flags);
5046 auto &member_type = get<SPIRType>(type.member_types[index]);
5048 // If our member type is a struct, traverse all the child members as well recursively.
5049 auto &member_childs = member_type.member_types;
5050 for (uint32_t i = 0; i < member_childs.size(); i++)
5052 auto &child_member_type = get<SPIRType>(member_childs[i]);
5053 if (!child_member_type.pointer)
5054 flags.merge_or(combined_decoration_for_member(member_type, i));
5058 return flags;
5061 bool Compiler::is_desktop_only_format(spv::ImageFormat format)
5063 switch (format)
5065 // Desktop-only formats
5066 case ImageFormatR11fG11fB10f:
5067 case ImageFormatR16f:
5068 case ImageFormatRgb10A2:
5069 case ImageFormatR8:
5070 case ImageFormatRg8:
5071 case ImageFormatR16:
5072 case ImageFormatRg16:
5073 case ImageFormatRgba16:
5074 case ImageFormatR16Snorm:
5075 case ImageFormatRg16Snorm:
5076 case ImageFormatRgba16Snorm:
5077 case ImageFormatR8Snorm:
5078 case ImageFormatRg8Snorm:
5079 case ImageFormatR8ui:
5080 case ImageFormatRg8ui:
5081 case ImageFormatR16ui:
5082 case ImageFormatRgb10a2ui:
5083 case ImageFormatR8i:
5084 case ImageFormatRg8i:
5085 case ImageFormatR16i:
5086 return true;
5087 default:
5088 break;
5091 return false;
5094 // An image is determined to be a depth image if it is marked as a depth image and is not also
5095 // explicitly marked with a color format, or if there are any sample/gather compare operations on it.
5096 bool Compiler::is_depth_image(const SPIRType &type, uint32_t id) const
5098 return (type.image.depth && type.image.format == ImageFormatUnknown) || comparison_ids.count(id);
5101 bool Compiler::type_is_opaque_value(const SPIRType &type) const
5103 return !type.pointer && (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Image ||
5104 type.basetype == SPIRType::Sampler);
5107 // Make these member functions so we can easily break on any force_recompile events.
5108 void Compiler::force_recompile()
5110 is_force_recompile = true;
5113 void Compiler::force_recompile_guarantee_forward_progress()
5115 force_recompile();
5116 is_force_recompile_forward_progress = true;
5119 bool Compiler::is_forcing_recompilation() const
5121 return is_force_recompile;
5124 void Compiler::clear_force_recompile()
5126 is_force_recompile = false;
5127 is_force_recompile_forward_progress = false;
5130 Compiler::PhysicalStorageBufferPointerHandler::PhysicalStorageBufferPointerHandler(Compiler &compiler_)
5131 : compiler(compiler_)
5135 Compiler::PhysicalBlockMeta *Compiler::PhysicalStorageBufferPointerHandler::find_block_meta(uint32_t id) const
5137 auto chain_itr = access_chain_to_physical_block.find(id);
5138 if (chain_itr != access_chain_to_physical_block.end())
5139 return chain_itr->second;
5140 else
5141 return nullptr;
5144 void Compiler::PhysicalStorageBufferPointerHandler::mark_aligned_access(uint32_t id, const uint32_t *args, uint32_t length)
5146 uint32_t mask = *args;
5147 args++;
5148 length--;
5149 if (length && (mask & MemoryAccessVolatileMask) != 0)
5151 args++;
5152 length--;
5155 if (length && (mask & MemoryAccessAlignedMask) != 0)
5157 uint32_t alignment = *args;
5158 auto *meta = find_block_meta(id);
5160 // This makes the assumption that the application does not rely on insane edge cases like:
5161 // Bind buffer with ADDR = 8, use block offset of 8 bytes, load/store with 16 byte alignment.
5162 // If we emit the buffer with alignment = 16 here, the first element at offset = 0 should
5163 // actually have alignment of 8 bytes, but this is too theoretical and awkward to support.
5164 // We could potentially keep track of any offset in the access chain, but it's
5165 // practically impossible for high level compilers to emit code like that,
5166 // so deducing overall alignment requirement based on maximum observed Alignment value is probably fine.
5167 if (meta && alignment > meta->alignment)
5168 meta->alignment = alignment;
5172 bool Compiler::PhysicalStorageBufferPointerHandler::type_is_bda_block_entry(uint32_t type_id) const
5174 auto &type = compiler.get<SPIRType>(type_id);
5175 return compiler.is_physical_pointer(type);
5178 uint32_t Compiler::PhysicalStorageBufferPointerHandler::get_minimum_scalar_alignment(const SPIRType &type) const
5180 if (type.storage == spv::StorageClassPhysicalStorageBufferEXT)
5181 return 8;
5182 else if (type.basetype == SPIRType::Struct)
5184 uint32_t alignment = 0;
5185 for (auto &member_type : type.member_types)
5187 uint32_t member_align = get_minimum_scalar_alignment(compiler.get<SPIRType>(member_type));
5188 if (member_align > alignment)
5189 alignment = member_align;
5191 return alignment;
5193 else
5194 return type.width / 8;
5197 void Compiler::PhysicalStorageBufferPointerHandler::setup_meta_chain(uint32_t type_id, uint32_t var_id)
5199 if (type_is_bda_block_entry(type_id))
5201 auto &meta = physical_block_type_meta[type_id];
5202 access_chain_to_physical_block[var_id] = &meta;
5204 auto &type = compiler.get<SPIRType>(type_id);
5206 if (!compiler.is_physical_pointer_to_buffer_block(type))
5207 non_block_types.insert(type_id);
5209 if (meta.alignment == 0)
5210 meta.alignment = get_minimum_scalar_alignment(compiler.get_pointee_type(type));
5214 bool Compiler::PhysicalStorageBufferPointerHandler::handle(Op op, const uint32_t *args, uint32_t length)
5216 // When a BDA pointer comes to life, we need to keep a mapping of SSA ID -> type ID for the pointer type.
5217 // For every load and store, we'll need to be able to look up the type ID being accessed and mark any alignment
5218 // requirements.
5219 switch (op)
5221 case OpConvertUToPtr:
5222 case OpBitcast:
5223 case OpCompositeExtract:
5224 // Extract can begin a new chain if we had a struct or array of pointers as input.
5225 // We don't begin chains before we have a pure scalar pointer.
5226 setup_meta_chain(args[0], args[1]);
5227 break;
5229 case OpAccessChain:
5230 case OpInBoundsAccessChain:
5231 case OpPtrAccessChain:
5232 case OpCopyObject:
5234 auto itr = access_chain_to_physical_block.find(args[2]);
5235 if (itr != access_chain_to_physical_block.end())
5236 access_chain_to_physical_block[args[1]] = itr->second;
5237 break;
5240 case OpLoad:
5242 setup_meta_chain(args[0], args[1]);
5243 if (length >= 4)
5244 mark_aligned_access(args[2], args + 3, length - 3);
5245 break;
5248 case OpStore:
5250 if (length >= 3)
5251 mark_aligned_access(args[0], args + 2, length - 2);
5252 break;
5255 default:
5256 break;
5259 return true;
5262 uint32_t Compiler::PhysicalStorageBufferPointerHandler::get_base_non_block_type_id(uint32_t type_id) const
5264 auto *type = &compiler.get<SPIRType>(type_id);
5265 while (compiler.is_physical_pointer(*type) && !type_is_bda_block_entry(type_id))
5267 type_id = type->parent_type;
5268 type = &compiler.get<SPIRType>(type_id);
5271 assert(type_is_bda_block_entry(type_id));
5272 return type_id;
5275 void Compiler::PhysicalStorageBufferPointerHandler::analyze_non_block_types_from_block(const SPIRType &type)
5277 for (auto &member : type.member_types)
5279 auto &subtype = compiler.get<SPIRType>(member);
5281 if (compiler.is_physical_pointer(subtype) && !compiler.is_physical_pointer_to_buffer_block(subtype))
5282 non_block_types.insert(get_base_non_block_type_id(member));
5283 else if (subtype.basetype == SPIRType::Struct && !compiler.is_pointer(subtype))
5284 analyze_non_block_types_from_block(subtype);
5288 void Compiler::analyze_non_block_pointer_types()
5290 PhysicalStorageBufferPointerHandler handler(*this);
5291 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
5293 // Analyze any block declaration we have to make. It might contain
5294 // physical pointers to POD types which we never used, and thus never added to the list.
5295 // We'll need to add those pointer types to the set of types we declare.
5296 ir.for_each_typed_id<SPIRType>([&](uint32_t id, SPIRType &type) {
5297 // Only analyze the raw block struct, not any pointer-to-struct, since that's just redundant.
5298 if (type.self == id &&
5299 (has_decoration(type.self, DecorationBlock) ||
5300 has_decoration(type.self, DecorationBufferBlock)))
5302 handler.analyze_non_block_types_from_block(type);
5306 physical_storage_non_block_pointer_types.reserve(handler.non_block_types.size());
5307 for (auto type : handler.non_block_types)
5308 physical_storage_non_block_pointer_types.push_back(type);
5309 sort(begin(physical_storage_non_block_pointer_types), end(physical_storage_non_block_pointer_types));
5310 physical_storage_type_to_alignment = std::move(handler.physical_block_type_meta);
5313 bool Compiler::InterlockedResourceAccessPrepassHandler::handle(Op op, const uint32_t *, uint32_t)
5315 if (op == OpBeginInvocationInterlockEXT || op == OpEndInvocationInterlockEXT)
5317 if (interlock_function_id != 0 && interlock_function_id != call_stack.back())
5319 // Most complex case, we have no sensible way of dealing with this
5320 // other than taking the 100% conservative approach, exit early.
5321 split_function_case = true;
5322 return false;
5324 else
5326 interlock_function_id = call_stack.back();
5327 // If this call is performed inside control flow we have a problem.
5328 auto &cfg = compiler.get_cfg_for_function(interlock_function_id);
5330 uint32_t from_block_id = compiler.get<SPIRFunction>(interlock_function_id).entry_block;
5331 bool outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(from_block_id, current_block_id);
5332 if (!outside_control_flow)
5333 control_flow_interlock = true;
5336 return true;
5339 void Compiler::InterlockedResourceAccessPrepassHandler::rearm_current_block(const SPIRBlock &block)
5341 current_block_id = block.self;
5344 bool Compiler::InterlockedResourceAccessPrepassHandler::begin_function_scope(const uint32_t *args, uint32_t length)
5346 if (length < 3)
5347 return false;
5348 call_stack.push_back(args[2]);
5349 return true;
5352 bool Compiler::InterlockedResourceAccessPrepassHandler::end_function_scope(const uint32_t *, uint32_t)
5354 call_stack.pop_back();
5355 return true;
5358 bool Compiler::InterlockedResourceAccessHandler::begin_function_scope(const uint32_t *args, uint32_t length)
5360 if (length < 3)
5361 return false;
5363 if (args[2] == interlock_function_id)
5364 call_stack_is_interlocked = true;
5366 call_stack.push_back(args[2]);
5367 return true;
5370 bool Compiler::InterlockedResourceAccessHandler::end_function_scope(const uint32_t *, uint32_t)
5372 if (call_stack.back() == interlock_function_id)
5373 call_stack_is_interlocked = false;
5375 call_stack.pop_back();
5376 return true;
5379 void Compiler::InterlockedResourceAccessHandler::access_potential_resource(uint32_t id)
5381 if ((use_critical_section && in_crit_sec) || (control_flow_interlock && call_stack_is_interlocked) ||
5382 split_function_case)
5384 compiler.interlocked_resources.insert(id);
5388 bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
5390 // Only care about critical section analysis if we have simple case.
5391 if (use_critical_section)
5393 if (opcode == OpBeginInvocationInterlockEXT)
5395 in_crit_sec = true;
5396 return true;
5399 if (opcode == OpEndInvocationInterlockEXT)
5401 // End critical section--nothing more to do.
5402 return false;
5406 // We need to figure out where images and buffers are loaded from, so do only the bare bones compilation we need.
5407 switch (opcode)
5409 case OpLoad:
5411 if (length < 3)
5412 return false;
5414 uint32_t ptr = args[2];
5415 auto *var = compiler.maybe_get_backing_variable(ptr);
5417 // We're only concerned with buffer and image memory here.
5418 if (!var)
5419 break;
5421 switch (var->storage)
5423 default:
5424 break;
5426 case StorageClassUniformConstant:
5428 uint32_t result_type = args[0];
5429 uint32_t id = args[1];
5430 compiler.set<SPIRExpression>(id, "", result_type, true);
5431 compiler.register_read(id, ptr, true);
5432 break;
5435 case StorageClassUniform:
5436 // Must have BufferBlock; we only care about SSBOs.
5437 if (!compiler.has_decoration(compiler.get<SPIRType>(var->basetype).self, DecorationBufferBlock))
5438 break;
5439 // fallthrough
5440 case StorageClassStorageBuffer:
5441 access_potential_resource(var->self);
5442 break;
5444 break;
5447 case OpInBoundsAccessChain:
5448 case OpAccessChain:
5449 case OpPtrAccessChain:
5451 if (length < 3)
5452 return false;
5454 uint32_t result_type = args[0];
5456 auto &type = compiler.get<SPIRType>(result_type);
5457 if (type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant ||
5458 type.storage == StorageClassStorageBuffer)
5460 uint32_t id = args[1];
5461 uint32_t ptr = args[2];
5462 compiler.set<SPIRExpression>(id, "", result_type, true);
5463 compiler.register_read(id, ptr, true);
5464 compiler.ir.ids[id].set_allow_type_rewrite();
5466 break;
5469 case OpImageTexelPointer:
5471 if (length < 3)
5472 return false;
5474 uint32_t result_type = args[0];
5475 uint32_t id = args[1];
5476 uint32_t ptr = args[2];
5477 auto &e = compiler.set<SPIRExpression>(id, "", result_type, true);
5478 auto *var = compiler.maybe_get_backing_variable(ptr);
5479 if (var)
5480 e.loaded_from = var->self;
5481 break;
5484 case OpStore:
5485 case OpImageWrite:
5486 case OpAtomicStore:
5488 if (length < 1)
5489 return false;
5491 uint32_t ptr = args[0];
5492 auto *var = compiler.maybe_get_backing_variable(ptr);
5493 if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant ||
5494 var->storage == StorageClassStorageBuffer))
5496 access_potential_resource(var->self);
5499 break;
5502 case OpCopyMemory:
5504 if (length < 2)
5505 return false;
5507 uint32_t dst = args[0];
5508 uint32_t src = args[1];
5509 auto *dst_var = compiler.maybe_get_backing_variable(dst);
5510 auto *src_var = compiler.maybe_get_backing_variable(src);
5512 if (dst_var && (dst_var->storage == StorageClassUniform || dst_var->storage == StorageClassStorageBuffer))
5513 access_potential_resource(dst_var->self);
5515 if (src_var)
5517 if (src_var->storage != StorageClassUniform && src_var->storage != StorageClassStorageBuffer)
5518 break;
5520 if (src_var->storage == StorageClassUniform &&
5521 !compiler.has_decoration(compiler.get<SPIRType>(src_var->basetype).self, DecorationBufferBlock))
5523 break;
5526 access_potential_resource(src_var->self);
5529 break;
5532 case OpImageRead:
5533 case OpAtomicLoad:
5535 if (length < 3)
5536 return false;
5538 uint32_t ptr = args[2];
5539 auto *var = compiler.maybe_get_backing_variable(ptr);
5541 // We're only concerned with buffer and image memory here.
5542 if (!var)
5543 break;
5545 switch (var->storage)
5547 default:
5548 break;
5550 case StorageClassUniform:
5551 // Must have BufferBlock; we only care about SSBOs.
5552 if (!compiler.has_decoration(compiler.get<SPIRType>(var->basetype).self, DecorationBufferBlock))
5553 break;
5554 // fallthrough
5555 case StorageClassUniformConstant:
5556 case StorageClassStorageBuffer:
5557 access_potential_resource(var->self);
5558 break;
5560 break;
5563 case OpAtomicExchange:
5564 case OpAtomicCompareExchange:
5565 case OpAtomicIIncrement:
5566 case OpAtomicIDecrement:
5567 case OpAtomicIAdd:
5568 case OpAtomicISub:
5569 case OpAtomicSMin:
5570 case OpAtomicUMin:
5571 case OpAtomicSMax:
5572 case OpAtomicUMax:
5573 case OpAtomicAnd:
5574 case OpAtomicOr:
5575 case OpAtomicXor:
5577 if (length < 3)
5578 return false;
5580 uint32_t ptr = args[2];
5581 auto *var = compiler.maybe_get_backing_variable(ptr);
5582 if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant ||
5583 var->storage == StorageClassStorageBuffer))
5585 access_potential_resource(var->self);
5588 break;
5591 default:
5592 break;
5595 return true;
5598 void Compiler::analyze_interlocked_resource_usage()
5600 if (get_execution_model() == ExecutionModelFragment &&
5601 (get_entry_point().flags.get(ExecutionModePixelInterlockOrderedEXT) ||
5602 get_entry_point().flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
5603 get_entry_point().flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
5604 get_entry_point().flags.get(ExecutionModeSampleInterlockUnorderedEXT)))
5606 InterlockedResourceAccessPrepassHandler prepass_handler(*this, ir.default_entry_point);
5607 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), prepass_handler);
5609 InterlockedResourceAccessHandler handler(*this, ir.default_entry_point);
5610 handler.interlock_function_id = prepass_handler.interlock_function_id;
5611 handler.split_function_case = prepass_handler.split_function_case;
5612 handler.control_flow_interlock = prepass_handler.control_flow_interlock;
5613 handler.use_critical_section = !handler.split_function_case && !handler.control_flow_interlock;
5615 traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
5617 // For GLSL. If we hit any of these cases, we have to fall back to conservative approach.
5618 interlocked_is_complex =
5619 !handler.use_critical_section || handler.interlock_function_id != ir.default_entry_point;
5623 // Helper function
5624 bool Compiler::check_internal_recursion(const SPIRType &type, std::unordered_set<uint32_t> &checked_ids)
5626 if (type.basetype != SPIRType::Struct)
5627 return false;
5629 if (checked_ids.count(type.self))
5630 return true;
5632 // Recurse into struct members
5633 bool is_recursive = false;
5634 checked_ids.insert(type.self);
5635 uint32_t mbr_cnt = uint32_t(type.member_types.size());
5636 for (uint32_t mbr_idx = 0; !is_recursive && mbr_idx < mbr_cnt; mbr_idx++)
5638 uint32_t mbr_type_id = type.member_types[mbr_idx];
5639 auto &mbr_type = get<SPIRType>(mbr_type_id);
5640 is_recursive |= check_internal_recursion(mbr_type, checked_ids);
5642 checked_ids.erase(type.self);
5643 return is_recursive;
5646 // Return whether the struct type contains a structural recursion nested somewhere within its content.
5647 bool Compiler::type_contains_recursion(const SPIRType &type)
5649 std::unordered_set<uint32_t> checked_ids;
5650 return check_internal_recursion(type, checked_ids);
5653 bool Compiler::type_is_array_of_pointers(const SPIRType &type) const
5655 if (!is_array(type))
5656 return false;
5658 // BDA types must have parent type hierarchy.
5659 if (!type.parent_type)
5660 return false;
5662 // Punch through all array layers.
5663 auto *parent = &get<SPIRType>(type.parent_type);
5664 while (is_array(*parent))
5665 parent = &get<SPIRType>(parent->parent_type);
5667 return is_pointer(*parent);
5670 bool Compiler::flush_phi_required(BlockID from, BlockID to) const
5672 auto &child = get<SPIRBlock>(to);
5673 for (auto &phi : child.phi_variables)
5674 if (phi.parent == from)
5675 return true;
5676 return false;
5679 void Compiler::add_loop_level()
5681 current_loop_level++;