1 /*---------------------------------------------------------------------------*\
5 * Copyright (C) 2000-2013 by the OpenSG Forum *
9 * contact: dirk@opensg.org, gerrit.voss@vossg.org, carsten_neumann@gmx.net *
11 \*---------------------------------------------------------------------------*/
12 /*---------------------------------------------------------------------------*\
15 * This library is free software; you can redistribute it and/or modify it *
16 * under the terms of the GNU Library General Public License as published *
17 * by the Free Software Foundation, version 2. *
19 * This library is distributed in the hope that it will be useful, but *
20 * WITHOUT ANY WARRANTY; without even the implied warranty of *
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
22 * Library General Public License for more details. *
24 * You should have received a copy of the GNU Library General Public *
25 * License along with this library; if not, write to the Free Software *
26 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *
28 \*---------------------------------------------------------------------------*/
29 /*---------------------------------------------------------------------------*\
37 \*---------------------------------------------------------------------------*/
39 //---------------------------------------------------------------------------
41 //---------------------------------------------------------------------------
46 #include "OSGConfig.h"
47 #include "OSGAction.h"
48 #include "OSGCamera.h"
49 #include "OSGMatrixCamera.h"
50 #include "OSGOrthographicCamera.h"
51 #include "OSGRenderAction.h"
54 #include "OSGClusterShadingStage.h"
55 #include "OSGClusterShadingStageData.h"
57 #include "OSGChunkMaterial.h"
58 #include "OSGDrawEnv.h"
59 #include "OSGFrameBufferObject.h"
60 #include "OSGTextureObjChunk.h"
61 #include "OSGStateOverride.h"
63 #include "OSGMatrixUtility.h"
66 #include "OSGTextureObjChunk.h"
67 #include "OSGTextureImageChunk.h"
68 #include "OSGMultiLightChunk.h"
69 #include "OSGUniformBufferObjStd140Chunk.h"
70 #include "OSGShaderStorageBufferObjStdLayoutChunk.h"
72 #include "OSGShaderProgram.h"
73 #include "OSGShaderProgramChunk.h"
74 #include "OSGComputeShaderChunk.h"
75 #include "OSGComputeShaderAlgorithm.h"
76 #include "OSGAlgorithmComputeElement.h"
80 // Documentation for this class is emitted in the
81 // OSGClusterShadingStageBase.cpp file.
82 // To modify it, please change the .fcd file (OSGClusterShadingStage.fcd) and
83 // regenerate the base file.
85 /***************************************************************************\
87 \***************************************************************************/
89 /***************************************************************************\
91 \***************************************************************************/
93 void ClusterShadingStage::initMethod(InitPhase ePhase
)
95 Inherited::initMethod(ePhase
);
97 if(ePhase
== TypeObject::SystemPost
)
99 RenderAction::registerEnterDefault(
100 ClusterShadingStage::getClassType(),
101 reinterpret_cast<Action::Callback
>(&ClusterShadingStage::renderEnter
));
103 RenderAction::registerLeaveDefault(
104 ClusterShadingStage::getClassType(),
105 reinterpret_cast<Action::Callback
>(&ClusterShadingStage::renderLeave
));
109 /***************************************************************************\
111 \***************************************************************************/
113 /*-------------------------------------------------------------------------*\
115 \*-------------------------------------------------------------------------*/
117 /*----------------------- constructors & destructors ----------------------*/
119 ClusterShadingStage::ClusterShadingStage(void)
121 , _bSetupStage(false)
125 ClusterShadingStage::ClusterShadingStage(const ClusterShadingStage
&source
)
127 , _bSetupStage(false)
131 ClusterShadingStage::~ClusterShadingStage(void)
135 /*----------------------------- class specific ----------------------------*/
137 void ClusterShadingStage::changed(ConstFieldMaskArg whichField
,
142 // The BeaconMatrixFieldMask is deliberately omitted
147 NumClusterZFieldMask
|
148 NearPlaneOffsetFieldMask
|
150 DispatchDataBindingPntFieldMask
|
151 ClusterDataBindingPntFieldMask
|
152 LightBindingPntFieldMask
|
153 AffectedLightIndexListBindingPntFieldMask
|
154 FrustumBindingPntFieldMask
|
155 LightIndexListBindingPntFieldMask
|
156 LightIndexCounterBindingPntFieldMask
|
157 LightGridBindingPntFieldMask
|
159 AffectedLightIndexListBlockNameFieldMask
|
160 AffectedLightIndexListVariableNameFieldMask
|
161 LightIndexListBlockNameFieldMask
|
162 LightIndexListVariableNameFieldMask
|
163 ClusteringDataBlockNameFieldMask
|
164 ClusteringDataVariableNameFieldMask
|
165 LightGridVariableNameFieldMask
|
167 CalcFrustumsOnCPUFieldMask
|
168 CullLighsOnCPUFieldMask
|
169 //DisabledFieldMask |
170 MaxLightIndexListSizeFieldMask
|
171 MaxClusterLightCountFieldMask
172 //FrustNodeFieldMask |
173 //FrustCompShaderAlgoFieldMask |
174 //CullCompShaderAlgoFieldMask |
175 //MultiLightChunkFieldMask |
176 //ShaderProgChunkFieldMask
182 Inherited::changed(whichField
, origin
, details
);
185 void ClusterShadingStage::dump( UInt32
,
186 const BitVector
) const
188 SLOG
<< "Dump ClusterShadingStage NI" << std::endl
;
191 /*------------------------------- Draw ------------------------------------*/
193 Action::ResultE
ClusterShadingStage::renderEnter(Action
* action
)
195 RenderAction
* a
= dynamic_cast<RenderAction
*>(action
);
199 // ??? this->pushPartition(a, RenderPartition::CopyAll);
201 RenderPartition
* pPart
= a
->getActivePartition();
203 DrawEnv
& oDrawEnv
= pPart
->getDrawEnv();
205 Int32 iVPLeft
= oDrawEnv
.getPixelLeft();
206 Int32 iVPBottom
= oDrawEnv
.getPixelBottom();
207 Int32 iVPWidth
= oDrawEnv
.getPixelWidth();
208 Int32 iVPHeight
= oDrawEnv
.getPixelHeight();
210 this->updateData(a
, iVPLeft
, iVPBottom
, iVPWidth
, iVPHeight
);
212 ClusterShadingStageDataUnrecPtr pData
= a
->getData
<ClusterShadingStageData
*>(_iDataSlotId
);
215 // Perform the computation
219 //std::cout << "compute shader..." << std::endl;
220 this->recurse(action
, getFrustNode());
221 a
->useNodeList(false);
226 a
->addOverride( getMultiLightChunk ()->getClassId() + getLightBindingPnt(), getMultiLightChunk());
227 a
->addOverride( getShaderProgChunk ()->getClassId(), getShaderProgChunk());
228 a
->addOverride(pData
->getLightGridTexImgChunkFS ()->getClassId() + getLightGridBindingPnt(), pData
->getLightGridTexImgChunkFS());
229 a
->addOverride(pData
->getAffectedLightIndexListSSBOChunk()->getClassId() + getAffectedLightIndexListBindingPnt(), pData
->getAffectedLightIndexListSSBOChunk());
230 a
->addOverride(pData
->getLightIndexListSSBOChunk ()->getClassId() + getLightIndexListBindingPnt(), pData
->getLightIndexListSSBOChunk());
231 a
->addOverride(pData
->getCullClusterDatUBOChunk ()->getClassId() + getClusterDataBindingPnt(), pData
->getCullClusterDatUBOChunk());
233 this->recurseFromThis(a
);
234 a
->useNodeList(false);
238 // ??? this->popPartition(a);
244 Action::ResultE
ClusterShadingStage::renderLeave(Action
*action
)
249 /*------------------------------ Data -------------------------------------*/
251 void ClusterShadingStage::initData(
252 RenderAction
* pAction
,
258 Camera
* pCam
= pAction
->getCamera();
262 OSG_ASSERT(pAction
->getData
<ClusterShadingStageData
*>(_iDataSlotId
) == NULL
);
264 ClusterShadingStageDataUnrecPtr pData
= ClusterShadingStageData::createLocal();
265 this->setData(pData
, _iDataSlotId
, pAction
);
267 setupStageData(pData
, pCam
, iVPLeft
, iVPBottom
, iVPWidth
, iVPHeight
);
270 void ClusterShadingStage::updateData(
271 RenderAction
* pAction
,
277 ClusterShadingStageDataUnrecPtr pData
= pAction
->getData
<ClusterShadingStageData
*>(_iDataSlotId
);
279 Camera
* pCam
= pAction
->getCamera();
285 initData(pAction
, iVPLeft
, iVPBottom
, iVPWidth
, iVPHeight
);
287 pData
= pAction
->getData
<ClusterShadingStageData
*>(_iDataSlotId
);
289 else if (_bSetupStage
)
291 setupStageData(pData
, pCam
, iVPLeft
, iVPBottom
, iVPWidth
, iVPHeight
);
294 updateStageData(pData
, pCam
, iVPLeft
, iVPBottom
, iVPWidth
, iVPHeight
);
296 _bSetupStage
= false;
299 /*---------------------------- StageData ----------------------------------*/
301 void ClusterShadingStage::setupStageData(
302 ClusterShadingStageData
* pData
,
311 ImageUnrecPtr img_light_grid
= create_image(1,1,1, getCullLighsOnCPU());
312 TextureObjChunkUnrecPtr tex_obj_light_grid
= create_texture_state(img_light_grid
);
313 TextureImageChunkUnrecPtr tex_img_light_grid_cs
= create_texture_image_state(tex_obj_light_grid
, GL_WRITE_ONLY
);
314 TextureImageChunkUnrecPtr tex_img_light_grid_fs
= create_texture_image_state(tex_obj_light_grid
, GL_READ_ONLY
);
316 ShaderStorageBufferObjStdLayoutChunkUnrecPtr ssbo_frustums
= create_frustum_state(VecFrustumsT(1));
317 ShaderStorageBufferObjStdLayoutChunkUnrecPtr ssbo_affected_light_index_list
= create_index_state(1);
318 ShaderStorageBufferObjStdLayoutChunkUnrecPtr ssbo_light_index_list
= create_index_state(getMaxLightIndexListSize());
319 ShaderStorageBufferObjStdLayoutChunkUnrecPtr ssbo_light_index_counter
= create_light_index_counter_state();
321 UniformBufferObjStd140ChunkUnrecPtr ubo_frustum_dispatch_data
= create_dispatch_data(DispatchData());
322 UniformBufferObjStd140ChunkUnrecPtr ubo_light_culling_dispatch_data
= create_dispatch_data(DispatchData());
323 UniformBufferObjStd140ChunkUnrecPtr ubo_clustering_data
= create_clustering_data(ClusteringData());
325 ChunkMaterialUnrecPtr frust_chunk_material
= ChunkMaterial::createLocal();
326 ChunkMaterialUnrecPtr cull_chunk_material
= ChunkMaterial::createLocal();
328 frust_chunk_material
->addChunk(ubo_frustum_dispatch_data
, getDispatchDataBindingPnt());
329 frust_chunk_material
->addChunk(ssbo_frustums
, getFrustumBindingPnt());
331 cull_chunk_material
->addChunk(tex_img_light_grid_cs
, getLightGridBindingPnt());
332 cull_chunk_material
->addChunk(getMultiLightChunk(), getLightBindingPnt());
333 cull_chunk_material
->addChunk(ubo_light_culling_dispatch_data
, getDispatchDataBindingPnt());
334 cull_chunk_material
->addChunk(ubo_clustering_data
, getClusterDataBindingPnt());
335 cull_chunk_material
->addChunk(ssbo_frustums
, getFrustumBindingPnt());
336 cull_chunk_material
->addChunk(ssbo_affected_light_index_list
, getAffectedLightIndexListBindingPnt());
337 cull_chunk_material
->addChunk(ssbo_light_index_list
, getLightIndexListBindingPnt());
338 cull_chunk_material
->addChunk(ssbo_light_index_counter
, getLightIndexCounterBindingPnt());
341 // force resize in first update
348 pData
->setLightGridImage (img_light_grid
);
349 pData
->setLightGridTexObjChunk (tex_obj_light_grid
);
350 pData
->setLightGridTexImgChunkCS (tex_img_light_grid_cs
);
351 pData
->setLightGridTexImgChunkFS (tex_img_light_grid_fs
);
352 pData
->setFrustumsSSBOChunk (ssbo_frustums
);
353 pData
->setAffectedLightIndexListSSBOChunk(ssbo_affected_light_index_list
);
354 pData
->setLightIndexListSSBOChunk (ssbo_light_index_list
);
355 pData
->setCullLightIndexCounterSSBOChunk (ssbo_light_index_counter
);
356 pData
->setFrustDispDatUBOChunk (ubo_frustum_dispatch_data
);
357 pData
->setCullDispDatUBOChunk (ubo_light_culling_dispatch_data
);
358 pData
->setCullClusterDatUBOChunk (ubo_clustering_data
);
359 pData
->setFrustChunkMat (frust_chunk_material
);
360 pData
->setCullChunkMat (cull_chunk_material
);
362 bool isOrtho
= isOrthographicCamera(pCam
, iPixelWidth
, iPixelHeight
);
363 pData
->setIsOrthographicCamera(isOrtho
);
365 create_computation (pData
);
366 create_fragment_shader(pData
);
368 Thread::setCurrentLocalFlags();
372 void ClusterShadingStage::updateStageData(
373 ClusterShadingStageData
* pData
,
380 if (pData
== NULL
|| pCam
== NULL
)
383 update_computation(pData
);
387 if ( iPixelLeft
!= pData
->getLeft()
388 || iPixelBottom
!= pData
->getBottom()
389 || iPixelWidth
!= pData
->getWidth()
390 || iPixelHeight
!= pData
->getHeight())
393 pData
->setLeft (iPixelLeft
);
394 pData
->setBottom(iPixelBottom
);
395 pData
->setWidth (iPixelWidth
);
396 pData
->setHeight(iPixelHeight
);
398 Vec4u
viewport(iPixelLeft
, iPixelBottom
, iPixelWidth
, iPixelHeight
);
400 if (!check_frustum_state(pData
, viewport
))
405 //std::cout << "resize...";
407 Matrix matProjection
, matInverseProjection
, matProjectionTranslation
;
409 pCam
->getProjection (matProjection
, iPixelWidth
, iPixelHeight
);
410 pCam
->getProjectionTranslation(matProjectionTranslation
, iPixelWidth
, iPixelHeight
);
412 matProjection
.mult(matProjectionTranslation
);
413 matProjection
.inverse(matInverseProjection
);
416 // The light grid image size must be adapted.
417 // CPU memory is needed only if culling is performed on the CPU.
419 update_light_grid_image(viewport
, pData
->getLightGridImage(), getCullLighsOnCPU());
422 // Light culling on CPU forces frustum calculation on CPU
424 if (getCalcFrustumsOnCPU() == true || getCullLighsOnCPU() == true)
426 //std::cout << "...on CPU" << std::endl;
428 // We caclulate the tile frustum planes on the CPU, store them in the following
429 // vector for further evaluation.
431 VecFrustumsT frustums
;
432 if (isOrthographicCamera(pCam
, iPixelWidth
, iPixelHeight
))
433 calc_ortho_frustums_cpu(viewport
, matInverseProjection
, frustums
);
435 calc_persp_frustums_cpu(viewport
, matInverseProjection
, frustums
);
438 if (getCullLighsOnCPU())
441 // The just CPU calculated frustum must be stored in the stage
442 // data object for CPU light culling.
444 update_frustum_state(pData
, frustums
);
449 // The just CPU calculated frustum must be uploaded to the SSBO
451 update_frustum_state(pData
->getFrustumsSSBOChunk(), frustums
);
456 //std::cout << "...on GPU" << std::endl;
458 update_dispatch_config_frustums(pData
, viewport
, matInverseProjection
);
462 UInt32 tile_size
= getTileSize();
464 Real32 zNear
= pCam
->getNear();
465 Real32 zFar
= pCam
->getFar ();
467 UInt32 num_cluster_x
= UInt32(osgCeil(Real32(iPixelWidth
) / tile_size
));
468 UInt32 num_cluster_y
= UInt32(osgCeil(Real32(iPixelHeight
) / tile_size
));
469 UInt32 num_cluster_z
= getNumClusterZ();
471 FrustumVolume viewVolume
;
472 pCam
->getFrustum(viewVolume
, iPixelWidth
, iPixelHeight
);
474 Matrix matEyeFromWorld
;
475 pCam
->getViewing(matEyeFromWorld
, iPixelWidth
, iPixelHeight
);
478 // We have to update the view matrix for the light culling compute shader
480 if (getCullLighsOnCPU() == false)
482 update_dispatch_config_cull_lights(pData
, viewport
, matEyeFromWorld
);
485 VecLightIndexT vecAffectedLights
; // the lights that contribute to the view frustum illumination
486 VecLightEyeSpaceDataT vecEyeSpaceData
; // the eye space position and direction of the lights
489 // We determine an index list of all lights that are actually contributing to the
490 // shading of the complete visible view frustum...
492 calc_affected_lights(
493 matEyeFromWorld
, // in transform from world to eye space
494 viewVolume
, // in the view frustum volume in world space
495 vecAffectedLights
, // out the index list
496 vecEyeSpaceData
); // optimization: eye space light positions and directions
499 // ...and update the corresponding SSBO accordingly
501 update_index_state(pData
->getAffectedLightIndexListSSBOChunk(), vecAffectedLights
);
505 //std::cout << "not disabled" << std::endl;
507 // Clearing of the global light index counter is absolutly necessary!
509 if (getCullLighsOnCPU() == false)
511 clear_light_index_counter_state(pData
->getCullLightIndexCounterSSBOChunk());
514 if (getCullLighsOnCPU() == true)
516 //std::cout << "cull on CPU" << std::endl;
518 // These are the results of the light culling procedure below. We will fill these
519 // raw data into appropriate SSBO so that they are available in the fragment shader
522 VecImageDataT gridData
; // tuples of offset and cound integers: { o1,c1, o2,c2, o3,c3,...}
523 VecLightIndexT lightIndexList
; // pointers into the lights array.
525 Vec3u
dimensions(num_cluster_x
, num_cluster_y
, num_cluster_z
);
528 pData
, // in contains the CPU calculated tile frustum planes
529 vecAffectedLights
, // in the lights that are actually contributing to the view frustum shading
530 vecEyeSpaceData
, // in the precalculated light eye space positions and directions
531 dimensions
, // in cluster dimensions
532 matEyeFromWorld
, // in transform from world to eye space
533 zNear
, // in distance of near plane from eye point
534 zFar
, // in distance of far plane from eye point
535 getNearPlaneOffset(), // in distance offset for near plane
536 gridData
, // out the raw grid data
537 lightIndexList
); // out the index list
540 // Fill gridData to image and inform texture object about change
542 update_image_data (pData
->getLightGridImage(), gridData
);
543 update_texture_state(pData
->getLightGridTexObjChunk());
546 // Fill lightIndexList to SSBO:
548 update_index_state(pData
->getLightIndexListSSBOChunk(), lightIndexList
);
553 // Update the clustering data used in the light cull and fragment shader
555 ClusteringData clusteringData
;
556 clusteringData
.zNear
= zNear
; // positive near plane distance from eye zNear > 0
557 clusteringData
.zFar
= zFar
; // positive far plane distance from eye zFar > zNear > 0
558 //clusteringData.D = getNearPlaneOffset(); // positive near plane offset D >= 0
559 clusteringData
.nD
= zNear
+ getNearPlaneOffset(); // zNear + D : shader optimization
560 clusteringData
.lg_nD
= log2(clusteringData
.nD
); // log2(nD) : shader optimization
561 clusteringData
.a
= (num_cluster_z
-1)/log2(zFar
/clusteringData
.nD
); // precalculated factor (c-1)/log2(f/(n+D))
562 clusteringData
.b
= 1.f
/clusteringData
.a
; // precalculated factor log2(f/(n+D))/(c-1)
563 //clusteringData.c = num_cluster_z; // number of cluster planes
564 clusteringData
.c_1
= num_cluster_z
- 1; // number of cluster planes minus one : shader optimization
565 clusteringData
.p_v
= Vec2i(iPixelLeft
, iPixelBottom
); // viewport corner points
566 //clusteringData.n_c = Vec3i(num_cluster_x, num_cluster_y, num_cluster_z); // number of clusters
567 clusteringData
.enabled
= !getDisabled();
569 update_clustering_data(pData
->getCullClusterDatUBOChunk(), clusteringData
);
574 /*-------------------------- Frustum Details ------------------------------*/
577 void ClusterShadingStage::resize_frustums(
578 ClusterShadingStageData
* pData
,
581 OSG_ASSERT(pData
!= NULL
);
583 MFMatrix
* normals
= pData
->editMFFrustumPlanesNormals();
584 MFVec4f
* distances
= pData
->editMFFrustumPlanesDistances();
586 normals
->resize(size
);
587 distances
->resize(size
);
591 ClusterShadingStage::Frustum
592 ClusterShadingStage::getFrustum(
593 ClusterShadingStageData
* pData
,
594 const UInt32 idx
) const
596 OSG_ASSERT(pData
!= NULL
);
600 const Matrix
& normals
= pData
->getFrustumPlanesNormals (idx
);
601 const Vec4f
& distances
= pData
->getFrustumPlanesDistances(idx
);
603 frustum
.planes
[0] = Plane(Vec3f(normals
[0]), distances
[0]);
604 frustum
.planes
[1] = Plane(Vec3f(normals
[1]), distances
[1]);
605 frustum
.planes
[2] = Plane(Vec3f(normals
[2]), distances
[2]);
606 frustum
.planes
[3] = Plane(Vec3f(normals
[3]), distances
[3]);
612 void ClusterShadingStage::setFrustum(
613 ClusterShadingStageData
* pData
,
615 const Frustum
& frustum
)
617 OSG_ASSERT(pData
!= NULL
);
618 OSG_ASSERT(pData
->getMFFrustumPlanesNormals ()->size() > idx
);
619 OSG_ASSERT(pData
->getMFFrustumPlanesDistances()->size() > idx
);
621 MFMatrix::reference rMat
= pData
->editFrustumPlanesNormals (idx
);
622 MFVec4f ::reference rVec
= pData
->editFrustumPlanesDistances(idx
);
624 rMat
.setValue(frustum
.planes
[0].getNormal(),
625 frustum
.planes
[1].getNormal(),
626 frustum
.planes
[2].getNormal(),
627 frustum
.planes
[3].getNormal());
630 Vec4f(frustum
.planes
[0].getDistanceFromOrigin(),
631 frustum
.planes
[1].getDistanceFromOrigin(),
632 frustum
.planes
[2].getDistanceFromOrigin(),
633 frustum
.planes
[3].getDistanceFromOrigin())
637 std::size_t ClusterShadingStage::calc_frustum_buffer_size(
638 const VecFrustumsT
& vFrustums
)
640 std::size_t ao
= 0; // aligned offset
641 std::size_t bo
= 0; // base offset
643 for (std::size_t j
= 0; j
< 4; ++j
)
645 ao
= alignOffset(16, bo
); bo
= ao
+ sizeof(Vec3f
);
646 ao
= alignOffset( 4, bo
); bo
= ao
+ sizeof(Real32
);
647 ao
= alignOffset(16, bo
); bo
= ao
;
650 ao
*= vFrustums
.size(); bo
= ao
; // array
651 ao
= alignOffset( 16, bo
); bo
= ao
; // padding
656 std::vector
<UInt8
> ClusterShadingStage::create_frustum_buffer(
657 const VecFrustumsT
& vFrustums
)
659 std::size_t size
= calc_frustum_buffer_size(vFrustums
);
661 std::vector
<UInt8
> buffer(size
);
663 std::size_t ao
= 0; // aligned offset
664 std::size_t bo
= 0; // base offset
666 for (std::size_t i
= 0; i
< vFrustums
.size(); ++i
)
668 for (std::size_t j
= 0; j
< 4; ++j
)
670 ao
= alignOffset(16, bo
);
671 memcpy(&buffer
[0] + ao
, &vFrustums
[i
].planes
[j
].getNormal(), sizeof(Vec3f
));
672 bo
= ao
+ sizeof(Vec3f
);
674 ao
= alignOffset( 4, bo
);
675 *(reinterpret_cast<Real32
*>(&buffer
[0] + ao
)) = vFrustums
[i
].planes
[j
].getDistanceFromOrigin();
676 bo
= ao
+ sizeof(Real32
);
678 ao
= alignOffset(16, bo
); bo
= ao
;
681 ao
= alignOffset( 16, bo
); bo
= ao
; // padding
687 ShaderStorageBufferObjStdLayoutChunkTransitPtr
688 ClusterShadingStage::create_frustum_state(
689 const VecFrustumsT
& vFrustums
)
691 ShaderStorageBufferObjStdLayoutChunkTransitPtr ssbo
= ShaderStorageBufferObjStdLayoutChunk::createLocal();
693 std::vector
<UInt8
> buffer
= create_frustum_buffer(vFrustums
);
695 ssbo
->editMFBuffer()->setValues(buffer
);
696 ssbo
->setUsage(GL_DYNAMIC_DRAW
);
701 void ClusterShadingStage::update_frustum_state(
702 ShaderStorageBufferObjStdLayoutChunk
* ssbo
,
703 const VecFrustumsT
& vFrustums
)
706 std::vector
<UInt8
> buffer
= create_frustum_buffer(vFrustums
);
707 ssbo
->editMFBuffer()->setValues(buffer
);
711 std::size_t ClusterShadingStage::calc_frustum_buffer_size(
712 ClusterShadingStageData
* pData
)
714 std::size_t ao
= 0; // aligned offset
715 std::size_t bo
= 0; // base offset
717 for (std::size_t j
= 0; j
< 4; ++j
)
719 ao
= alignOffset(16, bo
); bo
= ao
+ sizeof(Vec3f
);
720 ao
= alignOffset( 4, bo
); bo
= ao
+ sizeof(Real32
);
721 ao
= alignOffset(16, bo
); bo
= ao
;
724 OSG_ASSERT(pData
!= NULL
);
725 OSG_ASSERT(pData
->editMFFrustumPlanesNormals()->size() == pData
->editMFFrustumPlanesDistances()->size());
727 ao
*= pData
->editMFFrustumPlanesNormals()->size(); bo
= ao
; // array
728 ao
= alignOffset( 16, bo
); bo
= ao
; // padding
733 std::vector
<UInt8
> ClusterShadingStage::create_frustum_buffer(
734 ClusterShadingStageData
* pData
)
736 std::size_t size
= calc_frustum_buffer_size(pData
);
738 std::vector
<UInt8
> buffer(size
);
740 std::size_t ao
= 0; // aligned offset
741 std::size_t bo
= 0; // base offset
743 std::size_t num_frustums
= pData
->editMFFrustumPlanesNormals()->size();
745 for (std::size_t i
= 0; i
< num_frustums
; ++i
)
747 Frustum frustum
= getFrustum(pData
, static_cast<UInt32
>(i
));
749 for (std::size_t j
= 0; j
< 4; ++j
)
751 ao
= alignOffset(16, bo
);
752 memcpy(&buffer
[0] + ao
, &frustum
.planes
[j
].getNormal(), sizeof(Vec3f
));
753 bo
= ao
+ sizeof(Vec3f
);
755 ao
= alignOffset( 4, bo
);
756 *(reinterpret_cast<Real32
*>(&buffer
[0] + ao
)) = frustum
.planes
[j
].getDistanceFromOrigin();
757 bo
= ao
+ sizeof(Real32
);
759 ao
= alignOffset(16, bo
); bo
= ao
;
762 ao
= alignOffset( 16, bo
); bo
= ao
; // padding
768 ShaderStorageBufferObjStdLayoutChunkTransitPtr
769 ClusterShadingStage::create_frustum_state(
770 ClusterShadingStageData
* pData
)
772 ShaderStorageBufferObjStdLayoutChunkTransitPtr ssbo
= ShaderStorageBufferObjStdLayoutChunk::createLocal();
774 std::vector
<UInt8
> buffer
= create_frustum_buffer(pData
);
776 ssbo
->editMFBuffer()->setValues(buffer
);
777 ssbo
->setUsage(GL_DYNAMIC_DRAW
);
782 void ClusterShadingStage::update_frustum_state(
783 ShaderStorageBufferObjStdLayoutChunk
* ssbo
,
784 ClusterShadingStageData
* pData
)
787 std::vector
<UInt8
> buffer
= create_frustum_buffer(pData
);
788 ssbo
->editMFBuffer()->setValues(buffer
);
792 void ClusterShadingStage::update_frustum_state(
793 ClusterShadingStageData
* pData
,
794 const VecFrustumsT
& vFrustums
)
796 resize_frustums(pData
, vFrustums
.size());
798 std::size_t sz
= vFrustums
.size();
800 for (std::size_t i
= 0; i
< sz
; ++i
)
802 setFrustum(pData
, static_cast<UInt32
>(i
), vFrustums
[i
]);
806 bool ClusterShadingStage::check_frustum_state(
807 ClusterShadingStageData
* pData
,
808 const Vec4u
& viewport
)
810 if (getCalcFrustumsOnCPU() == true || getCullLighsOnCPU() == true)
812 // Real32 x_v = static_cast<Real32>(viewport[0]);
813 // Real32 y_v = static_cast<Real32>(viewport[1]);
814 Real32 w
= static_cast<Real32
>(viewport
[2]);
815 Real32 h
= static_cast<Real32
>(viewport
[3]);
817 UInt32 tile_size
= getTileSize();
819 UInt32 numHorizontalTiles
= UInt32(osgCeil(w
/ tile_size
));
820 UInt32 numVerticalTiles
= UInt32(osgCeil(h
/ tile_size
));
822 std::size_t sz
= numHorizontalTiles
* numVerticalTiles
;
824 MFMatrix
* normals
= pData
->editMFFrustumPlanesNormals();
825 MFVec4f
* distances
= pData
->editMFFrustumPlanesDistances();
827 if (normals
->size() != sz
|| distances
->size() != sz
)
833 Pnt4f
ClusterShadingStage::NdcFromScreen(
835 const Vec4u
& viewport
,
839 Real32 x_v
= static_cast<Real32
>(viewport
[0]);
840 Real32 y_v
= static_cast<Real32
>(viewport
[1]);
841 Real32 w
= static_cast<Real32
>(viewport
[2]);
842 Real32 h
= static_cast<Real32
>(viewport
[3]);
845 2.f
* (p_w
.x() - x_v
) / w
- 1.f
,
846 2.f
* (p_w
.y() - y_v
) / h
- 1.f
,
847 2.f
* (p_w
.z() - zFar
- zNear
) / (zFar
- zNear
),
853 Pnt4f
ClusterShadingStage::NdcFromScreen(
854 const Pnt3f
& p_w
, // with z-coord already given in ndc coords!
855 const Vec4u
& viewport
)
857 Real32 x_v
= static_cast<Real32
>(viewport
[0]);
858 Real32 y_v
= static_cast<Real32
>(viewport
[1]);
859 Real32 w
= static_cast<Real32
>(viewport
[2]);
860 Real32 h
= static_cast<Real32
>(viewport
[3]);
863 2.f
* (p_w
.x() - x_v
) / w
- 1.f
,
864 2.f
* (p_w
.y() - y_v
) / h
- 1.f
,
871 Pnt3f
ClusterShadingStage::EyeFromNdc(
873 const Matrix
& matInvProjection
)
876 matInvProjection
.mult(p_n
, p_e
);
878 return Pnt3f(p_e
.x(), p_e
.y(), p_e
.z());
881 bool ClusterShadingStage::isOrthographicCamera(
886 OrthographicCamera
* orthoCam
= dynamic_cast<OrthographicCamera
*>(cam
);
890 MatrixCamera
* matrixCam
= dynamic_cast<MatrixCamera
*>(cam
);
893 Matrix matProjection
;
894 matrixCam
->getProjection(matProjection
, width
, height
);
896 // float m32 = matProjection[2][3];
897 // float m33 = matProjection[3][3];
899 if (matProjection
[2][3] == 0.f
&& matProjection
[3][3] == 1.f
)
906 void ClusterShadingStage::calc_ortho_frustums_cpu(
907 const Vec4u
& viewport
,
908 const Matrix
& matInvProjection
,
909 VecFrustumsT
& frustums
)
911 Real32 x_v
= static_cast<Real32
>(viewport
[0]);
912 Real32 y_v
= static_cast<Real32
>(viewport
[1]);
913 Real32 w
= static_cast<Real32
>(viewport
[2]);
914 Real32 h
= static_cast<Real32
>(viewport
[3]);
916 UInt32 tile_size
= getTileSize();
918 UInt32 numHorizontalTiles
= UInt32(osgCeil(w
/ tile_size
));
919 UInt32 numVerticalTiles
= UInt32(osgCeil(h
/ tile_size
));
921 frustums
.resize(numHorizontalTiles
* numVerticalTiles
);
930 for (UInt32 j
= 0; j
< numVerticalTiles
; ++j
)
932 Real32 y0
= y_v
+ j
* tile_size
;
933 Real32 y1
= osgMin(y_v
+ (j
+1) * tile_size
, y_v
+ h
);
935 for (UInt32 i
= 0; i
< numHorizontalTiles
; ++i
)
937 Real32 x0
= x_v
+ i
* tile_size
;
938 Real32 x1
= osgMin(x_v
+ (i
+1) * tile_size
, x_v
+ w
);
940 pnts_w
[0] = Pnt3f(x0
, y0
, 1.f
);
941 pnts_w
[1] = Pnt3f(x1
, y0
, 1.f
);
942 pnts_w
[2] = Pnt3f(x0
, y1
, 1.f
);
943 pnts_w
[3] = Pnt3f(x1
, y1
, 1.f
);
945 pnts_w
[4] = Pnt3f(x0
, y0
, -1.f
);
946 pnts_w
[5] = Pnt3f(x1
, y0
, -1.f
);
947 pnts_w
[6] = Pnt3f(x0
, y1
, -1.f
);
948 pnts_w
[7] = Pnt3f(x1
, y1
, -1.f
);
950 for (UInt32 k
= 0; k
< 8; ++k
)
952 pnts_n
[k
] = NdcFromScreen(pnts_w
[k
], viewport
);
953 pnts_e
[k
] = EyeFromNdc (pnts_n
[k
], matInvProjection
);
956 UInt32 idx
= j
* numHorizontalTiles
+ i
;
958 frustums
[idx
].planes
[0] = Plane(pnts_e
[6], pnts_e
[0], pnts_e
[2]); // left plane
959 frustums
[idx
].planes
[1] = Plane(pnts_e
[7], pnts_e
[3], pnts_e
[1]); // right plane
960 frustums
[idx
].planes
[2] = Plane(pnts_e
[6], pnts_e
[2], pnts_e
[3]); // top plane
961 frustums
[idx
].planes
[3] = Plane(pnts_e
[4], pnts_e
[1], pnts_e
[0]); // bottom plane
966 void ClusterShadingStage::calc_persp_frustums_cpu(
967 const Vec4u
& viewport
,
968 const Matrix
& matInvProjection
,
969 VecFrustumsT
& frustums
)
971 Real32 x_v
= static_cast<Real32
>(viewport
[0]);
972 Real32 y_v
= static_cast<Real32
>(viewport
[1]);
973 Real32 w
= static_cast<Real32
>(viewport
[2]);
974 Real32 h
= static_cast<Real32
>(viewport
[3]);
976 UInt32 tile_size
= getTileSize();
978 UInt32 numHorizontalTiles
= UInt32(osgCeil(w
/ tile_size
));
979 UInt32 numVerticalTiles
= UInt32(osgCeil(h
/ tile_size
));
981 frustums
.resize(numHorizontalTiles
* numVerticalTiles
);
986 Pnt3f pEye
= Pnt3f(0.f
, 0.f
, 0.f
); // eye position in view space
992 for (UInt32 j
= 0; j
< numVerticalTiles
; ++j
)
994 Real32 y0
= y_v
+ j
* tile_size
;
995 Real32 y1
= osgMin(y_v
+ (j
+1) * tile_size
, y_v
+ h
);
997 for (UInt32 i
= 0; i
< numHorizontalTiles
; ++i
)
999 Real32 x0
= x_v
+ i
* tile_size
;
1000 Real32 x1
= osgMin(x_v
+ (i
+1) * tile_size
, x_v
+ w
);
1002 pnts_w
[0] = Pnt3f(x0
, y0
, -1.f
);
1003 pnts_w
[1] = Pnt3f(x1
, y0
, -1.f
);
1004 pnts_w
[2] = Pnt3f(x0
, y1
, -1.f
);
1005 pnts_w
[3] = Pnt3f(x1
, y1
, -1.f
);
1007 for (UInt32 k
= 0; k
< 4; ++k
)
1009 pnts_n
[k
] = NdcFromScreen(pnts_w
[k
], viewport
);
1010 pnts_e
[k
] = EyeFromNdc (pnts_n
[k
], matInvProjection
);
1013 UInt32 idx
= j
* numHorizontalTiles
+ i
;
1015 frustums
[idx
].planes
[0] = Plane(pEye
, pnts_e
[0], pnts_e
[2]); // left plane
1016 frustums
[idx
].planes
[1] = Plane(pEye
, pnts_e
[3], pnts_e
[1]); // right plane
1017 frustums
[idx
].planes
[2] = Plane(pEye
, pnts_e
[2], pnts_e
[3]); // top plane
1018 frustums
[idx
].planes
[3] = Plane(pEye
, pnts_e
[1], pnts_e
[0]); // bottom plane
1023 /*-------------------------- Image Details -------------------------------*/
1025 ImageTransitPtr
ClusterShadingStage::create_image(
1029 bool allocate_memory
)
1031 ImageTransitPtr image
= Image::createLocal();
1033 GL_RG
, // pixel format
1034 width
, // width in pixel
1035 height
, // height in pixel
1036 depth
, // depth in pixel
1041 Image::OSG_UINT32_IMAGEDATA
, // type
1042 allocate_memory
, // allocate memory
1048 void ClusterShadingStage::update_image(
1053 bool allocate_memory
)
1056 GL_RG
, // pixel format
1057 width
, // width in pixel
1058 height
, // height in pixel
1059 depth
, // depth in pixel
1064 Image::OSG_UINT32_IMAGEDATA
, // type
1065 allocate_memory
, // allocate memory
1070 void ClusterShadingStage::update_image_data(
1072 const VecImageDataT
& imageData
)
1074 UInt8
* data
= image
->editData();
1076 std::size_t ao
= 0; // aligned offset
1077 std::size_t bo
= 0; // base offset
1079 for (std::size_t i
= 0; i
< imageData
.size(); ++i
)
1081 UInt32 offset
= imageData
[i
].first
;
1082 UInt32 index
= imageData
[i
].second
;
1084 ao
= alignOffset(4, bo
);
1085 *(reinterpret_cast<UInt32
*>(data
+ ao
)) = offset
;
1086 bo
= ao
+ sizeof(UInt32
);
1088 ao
= alignOffset(4, bo
);
1089 *(reinterpret_cast<UInt32
*>(data
+ ao
)) = index
;
1090 bo
= ao
+ sizeof(UInt32
);
1094 void ClusterShadingStage::write_image_data(
1098 const Vec3u
& dimensions
,
1099 const ImageDataT
& data
,
1100 VecImageDataT
& imageData
)
1102 OSG_ASSERT(i
< dimensions
.x());
1103 OSG_ASSERT(j
< dimensions
.y());
1104 OSG_ASSERT(k
< dimensions
.z());
1106 std::size_t idx
= k
* dimensions
.x() * dimensions
.y() + j
* dimensions
.x() + i
;
1108 imageData
[idx
] = data
;
1111 TextureObjChunkTransitPtr
1112 ClusterShadingStage::create_texture_state(Image
* image
)
1114 TextureObjChunkTransitPtr texObjChunk
= TextureObjChunk::createLocal();
1116 texObjChunk
->setTarget(GL_TEXTURE_2D_ARRAY
);
1117 texObjChunk
->setScale(false);
1118 texObjChunk
->setInternalFormat(GL_RG32UI
);
1119 texObjChunk
->setExternalFormat(GL_RG_INTEGER
);
1120 texObjChunk
->setMinFilter(GL_NEAREST
);
1121 texObjChunk
->setMagFilter(GL_NEAREST
);
1122 texObjChunk
->setImage(image
);
1127 void ClusterShadingStage::update_texture_state(TextureObjChunk
* texObjChunk
)
1129 texObjChunk
->imageContentChanged();
1132 TextureImageChunkTransitPtr
1133 ClusterShadingStage::create_texture_image_state(
1134 TextureObjChunk
* texObjChunk
,
1135 const GLenum access
)
1137 TextureImageChunkTransitPtr texImageChunk
= TextureImageChunk::createLocal();
1138 texImageChunk
->setTexture(texObjChunk
);
1139 texImageChunk
->setAccess(access
);
1140 texImageChunk
->setFormat(GL_RG32UI
);
1141 texImageChunk
->setLayer(-1);
1143 return texImageChunk
;
1146 void ClusterShadingStage::update_light_grid_image(
1147 const Vec4u
& viewport
,
1149 bool allocate_memory
)
1151 Real32 w
= static_cast<Real32
>(viewport
[2]);
1152 Real32 h
= static_cast<Real32
>(viewport
[3]);
1154 UInt32 numHorizontalTiles
= UInt32(osgCeil(w
/ getTileSize()));
1155 UInt32 numVerticalTiles
= UInt32(osgCeil(h
/ getTileSize()));
1157 update_image(image
, numHorizontalTiles
, numVerticalTiles
, getNumClusterZ(), allocate_memory
);
1160 /*----------------------- Light Index Details ----------------------------*/
1163 ClusterShadingStage::calc_light_index_buffer_size(
1164 const VecLightIndexT
& vIndexList
)
1166 return sizeof(UInt32
) * vIndexList
.size();
1170 ClusterShadingStage::create_light_index_buffer(
1171 const VecLightIndexT
& vIndexList
)
1173 std::size_t size
= calc_light_index_buffer_size(vIndexList
);
1175 std::vector
<UInt8
> buffer(size
);
1177 std::size_t ao
= 0; // aligned offset
1178 std::size_t bo
= 0; // base offset
1180 for (std::size_t i
= 0; i
< vIndexList
.size(); ++i
)
1182 ao
= alignOffset(4, bo
);
1183 *(reinterpret_cast<UInt32
*>(&buffer
[0] + ao
)) = vIndexList
[i
];
1184 bo
= ao
+ sizeof(UInt32
);
1187 ao
= alignOffset( 4, bo
); bo
= ao
; // padding
1192 ShaderStorageBufferObjStdLayoutChunkTransitPtr
1193 ClusterShadingStage::create_index_state(
1194 const VecLightIndexT
& vIndexList
)
1196 ShaderStorageBufferObjStdLayoutChunkTransitPtr ssbo
= ShaderStorageBufferObjStdLayoutChunk::createLocal();
1198 std::vector
<UInt8
> buffer
= create_light_index_buffer(vIndexList
);
1200 ssbo
->editMFBuffer()->setValues(buffer
);
1201 ssbo
->setUsage(GL_DYNAMIC_DRAW
);
1206 ShaderStorageBufferObjStdLayoutChunkTransitPtr
1207 ClusterShadingStage::create_index_state(std::size_t sz
)
1209 ShaderStorageBufferObjStdLayoutChunkTransitPtr ssbo
= ShaderStorageBufferObjStdLayoutChunk::createLocal();
1211 std::vector
<UInt8
> buffer(sizeof(UInt32
) * sz
, 0);
1213 ssbo
->editMFBuffer()->setValues(buffer
);
1214 ssbo
->setUsage(GL_DYNAMIC_DRAW
);
1219 void ClusterShadingStage::update_index_state(
1220 ShaderStorageBufferObjStdLayoutChunk
* ssbo
,
1221 const VecLightIndexT
& vIndexList
)
1224 std::vector
<UInt8
> buffer
= create_light_index_buffer(vIndexList
);
1225 ssbo
->editMFBuffer()->setValues(buffer
);
1229 void ClusterShadingStage::clear_index_state(
1230 ShaderStorageBufferObjStdLayoutChunk
* ssbo
,
1234 std::vector
<UInt8
> buffer(sizeof(UInt32
) * sz
, 0);
1235 ssbo
->editMFBuffer()->setValues(buffer
);
1239 /*------------------- Light Index Counter Details ------------------------*/
1241 ShaderStorageBufferObjStdLayoutChunkTransitPtr
1242 ClusterShadingStage::create_light_index_counter_state()
1244 ShaderStorageBufferObjStdLayoutChunkTransitPtr ssbo
= ShaderStorageBufferObjStdLayoutChunk::createLocal();
1247 // The counter compromises exactly one UInt32 element
1249 std::vector
<UInt8
> buffer(sizeof(UInt32
) * 1, 0);
1251 ssbo
->editMFBuffer()->setValues(buffer
);
1252 ssbo
->setUsage(GL_DYNAMIC_DRAW
);
1257 void ClusterShadingStage::clear_light_index_counter_state(
1258 ShaderStorageBufferObjStdLayoutChunk
* ssbo
)
1262 // The counter compromises exactly one UInt32 element
1264 std::vector
<UInt8
> buffer(sizeof(UInt32
) * 1, 0);
1266 ssbo
->editMFBuffer()->setValues(buffer
);
1270 /*---------------------- Dispatch Data Details ---------------------------*/
1272 std::size_t ClusterShadingStage::calc_dispatch_data_buffer_size(
1273 const DispatchData
& data
)
1275 std::size_t ao
= 0; // aligned offset
1276 std::size_t bo
= 0; // base offset
1278 // Matrix matTransf;
1279 ao
= alignOffset(16, bo
); bo
= ao
+ sizeof(Vec4f
);
1280 ao
= alignOffset(16, bo
); bo
= ao
+ sizeof(Vec4f
);
1281 ao
= alignOffset(16, bo
); bo
= ao
+ sizeof(Vec4f
);
1282 ao
= alignOffset(16, bo
); bo
= ao
+ sizeof(Vec4f
);
1285 ao
= alignOffset(16, bo
); bo
= ao
+ sizeof(Vec4u
);
1288 ao
= alignOffset( 8, bo
); bo
= ao
+ sizeof(Vec2i
);
1291 ao
= alignOffset(8, bo
); bo
= ao
;
1297 ClusterShadingStage::create_dispatch_data_buffer(
1298 const DispatchData
& data
)
1300 std::size_t size
= calc_dispatch_data_buffer_size(data
);
1302 std::vector
<UInt8
> buffer(size
);
1304 std::size_t ao
= 0; // aligned offset
1305 std::size_t bo
= 0; // base offset
1307 // Matrix matTransf;
1308 ao
= alignOffset(16, bo
);
1309 memcpy(&buffer
[0] + ao
, &data
.matTransf
[0][0], sizeof(Vec4f
));
1310 bo
= ao
+ sizeof(Vec4f
);
1312 ao
= alignOffset(16, bo
);
1313 memcpy(&buffer
[0] + ao
, &data
.matTransf
[1][0], sizeof(Vec4f
));
1314 bo
= ao
+ sizeof(Vec4f
);
1316 ao
= alignOffset(16, bo
);
1317 memcpy(&buffer
[0] + ao
, &data
.matTransf
[2][0], sizeof(Vec4f
));
1318 bo
= ao
+ sizeof(Vec4f
);
1320 ao
= alignOffset(16, bo
);
1321 memcpy(&buffer
[0] + ao
, &data
.matTransf
[3][0], sizeof(Vec4f
));
1322 bo
= ao
+ sizeof(Vec4f
);
1325 ao
= alignOffset(16, bo
);
1326 memcpy(&buffer
[0] + ao
, &data
.viewport
[0], sizeof(Vec4u
));
1327 bo
= ao
+ sizeof(Vec4u
);
1330 ao
= alignOffset( 8, bo
);
1331 memcpy(&buffer
[0] + ao
, &data
.numTiles
[0], sizeof(Vec2i
));
1332 bo
= ao
+ sizeof(Vec2i
);
1335 ao
= alignOffset(8, bo
); bo
= ao
;
1340 UniformBufferObjStd140ChunkTransitPtr
1341 ClusterShadingStage::create_dispatch_data(const DispatchData
& data
)
1343 UniformBufferObjStd140ChunkTransitPtr ubo
= UniformBufferObjStd140Chunk::createLocal();
1345 std::vector
<UInt8
> buffer
= create_dispatch_data_buffer(data
);
1347 ubo
->editMFBuffer()->setValues(buffer
);
1348 ubo
->setUsage(GL_DYNAMIC_DRAW
);
1353 void ClusterShadingStage::update_dispatch_data(
1354 UniformBufferObjStd140Chunk
* ubo
,
1355 const DispatchData
& data
)
1358 std::vector
<UInt8
> buffer
= create_dispatch_data_buffer(data
);
1359 ubo
->editMFBuffer()->setValues(buffer
);
1363 /*-------------------- Clustering Index Details --------------------------*/
1365 std::size_t ClusterShadingStage::calc_clustering_data_buffer_size(
1366 const ClusteringData
& data
)
1368 std::size_t ao
= 0; // aligned offset
1369 std::size_t bo
= 0; // base offset
1371 // Real32 zNear; // positive near plane distance from eye zNear > 0
1372 ao
= alignOffset( 4, bo
); bo
= ao
+ sizeof(Real32
);
1374 // Real32 zFar; // positive far plane distance from eye zFar > zNear > 0
1375 ao
= alignOffset( 4, bo
); bo
= ao
+ sizeof(Real32
);
1377 //Real32 D; // positive near plane offset D >= 0 // for testing
1378 //ao = alignOffset( 4, bo); bo = ao + sizeof(Real32);
1380 //Real32 nD; // zNear + D : shader optimization
1381 ao
= alignOffset( 4, bo
); bo
= ao
+ sizeof(Real32
);
1383 //Real32 lg_nD; // log2(nD) : shader optimization
1384 ao
= alignOffset( 4, bo
); bo
= ao
+ sizeof(Real32
);
1386 //Real32 a; // precalculated factor (c-1)/log2(f/(n+D))
1387 ao
= alignOffset( 4, bo
); bo
= ao
+ sizeof(Real32
);
1389 //Real32 b; // precalculated factor log2(f/(n+D))/(c-1)
1390 ao
= alignOffset( 4, bo
); bo
= ao
+ sizeof(Real32
);
1392 //Int32 c; // number of cluster planes // for testing
1393 //ao = alignOffset( 4, bo); bo = ao + sizeof(Int32);
1395 //Int32 c_1; // number of cluster planes minus one : shader optimization
1396 ao
= alignOffset( 4, bo
); bo
= ao
+ sizeof(Int32
);
1398 //Vec2i p_v; // viewport corner points
1399 ao
= alignOffset( 8, bo
); bo
= ao
+ sizeof(Vec2i
);
1401 //Vec3i n_c; // number of clusters // for testing
1402 //ao = alignOffset(16, bo); bo = ao + sizeof(Vec3i);
1403 //ao = alignOffset(16, bo); bo = ao;
1405 //bool enabled;// cluster shading enabled
1406 ao
= alignOffset( 4, bo
); bo
= ao
+ sizeof(Int32
);
1409 ao
= alignOffset(4, bo
); bo
= ao
;
1415 ClusterShadingStage::create_clustering_data_buffer(
1416 const ClusteringData
& data
)
1418 std::size_t size
= calc_clustering_data_buffer_size(data
);
1420 std::vector
<UInt8
> buffer(size
);
1422 std::size_t ao
= 0; // aligned offset
1423 std::size_t bo
= 0; // base offset
1425 // Real32 zNear; // positive near plane distance from eye zNear > 0
1426 ao
= alignOffset( 4, bo
); *(reinterpret_cast<Real32
*>(&buffer
[0] + ao
)) = data
.zNear
; bo
= ao
+ sizeof(Real32
);
1428 // Real32 zFar; // positive far plane distance from eye zFar > zNear > 0
1429 ao
= alignOffset( 4, bo
); *(reinterpret_cast<Real32
*>(&buffer
[0] + ao
)) = data
.zFar
; bo
= ao
+ sizeof(Real32
);
1431 //Real32 D; // positive near plane offset D >= 0 // for testing
1432 //ao = alignOffset( 4, bo); *(reinterpret_cast<Real32*>(&buffer[0] + ao)) = data.D; bo = ao + sizeof(Real32);
1434 //Real32 nD; // zNear + D : shader optimization
1435 ao
= alignOffset( 4, bo
); *(reinterpret_cast<Real32
*>(&buffer
[0] + ao
)) = data
.nD
; bo
= ao
+ sizeof(Real32
);
1437 //Real32 lg_nD; // log2(nD) : shader optimization
1438 ao
= alignOffset( 4, bo
); *(reinterpret_cast<Real32
*>(&buffer
[0] + ao
)) = data
.lg_nD
; bo
= ao
+ sizeof(Real32
);
1440 //Real32 a; // precalculated factor (c-1)/log2(f/(n+D))
1441 ao
= alignOffset( 4, bo
); *(reinterpret_cast<Real32
*>(&buffer
[0] + ao
)) = data
.a
; bo
= ao
+ sizeof(Real32
);
1443 //Real32 b; // precalculated factor log2(f/(n+D))/(c-1)
1444 ao
= alignOffset( 4, bo
); *(reinterpret_cast<Real32
*>(&buffer
[0] + ao
)) = data
.b
; bo
= ao
+ sizeof(Real32
);
1446 //Int32 c; // number of cluster planes // for testing
1447 //ao = alignOffset( 4, bo); *(reinterpret_cast<Int32*>(&buffer[0] + ao)) = data.c; bo = ao + sizeof(Int32);
1449 //Int32 c_1; // number of cluster planes minus one : shader optimization
1450 ao
= alignOffset( 4, bo
); *(reinterpret_cast<Int32
*>(&buffer
[0] + ao
)) = data
.c_1
; bo
= ao
+ sizeof(Int32
);
1452 //Vec2i p_v; // viewport corner points
1453 ao
= alignOffset( 8, bo
); memcpy(&buffer
[0] + ao
, &data
.p_v
[0], sizeof(Vec2i
)); bo
= ao
+ sizeof(Vec2i
);
1455 //Vec3i n_c; // number of clusters // for testing
1456 //ao = alignOffset(16, bo); memcpy(&buffer[0] + ao, &data.n_c[0], sizeof(Vec3i)); bo = ao + sizeof(Vec3i);
1457 //ao = alignOffset(16, bo); bo = ao;
1459 //bool enabled;// cluster shading enabled
1460 ao
= alignOffset( 4, bo
); *(reinterpret_cast<bool*>(&buffer
[0] + ao
)) = data
.enabled
; bo
= ao
+ sizeof(Int32
);
1463 ao
= alignOffset(4, bo
); bo
= ao
;
1468 UniformBufferObjStd140ChunkTransitPtr
1469 ClusterShadingStage::create_clustering_data(
1470 const ClusteringData
& data
)
1472 UniformBufferObjStd140ChunkTransitPtr ubo
= UniformBufferObjStd140Chunk::createLocal();
1474 std::vector
<UInt8
> buffer
= create_clustering_data_buffer(data
);
1476 ubo
->editMFBuffer()->setValues(buffer
);
1477 ubo
->setUsage(GL_STATIC_DRAW
);
1482 void ClusterShadingStage::update_clustering_data(
1483 UniformBufferObjStd140Chunk
* ubo
,
1484 const ClusteringData
& data
)
1487 std::vector
<UInt8
> buffer
= create_clustering_data_buffer(data
);
1488 ubo
->editMFBuffer()->setValues(buffer
);
1492 /*------------------------- Cluster Access -------------------------------*/
1494 UInt32
ClusterShadingStage::cluster_k(
1495 Real32 z_e
, // eye space z-position, z_e < 0
1496 Real32 n
, // near plane distance from viewer n > 0
1497 Real32 f
, // far plane distance from viewer f > n > 0
1498 Real32 D
, // near plane offset
1499 UInt32 c
) // number of cluster planes
1501 if (z_e
>= -(n
+D
)) return 0;
1502 if (z_e
<= -f
) return c
-1;
1504 Real32 s
= 1 + ((c
-1) / log2(f
/(n
+D
))) * (log2(-z_e
) - log2(n
+D
));
1505 UInt32 k
= UInt32(osgFloor(s
));
1507 return osgClamp(0U, k
, c
-1);
1510 Real32
ClusterShadingStage::cluster_z(
1511 UInt32 k
, // cluster coordinate, 0 <= k <= c
1512 Real32 n
, // near plane distance from viewer n > 0
1513 Real32 f
, // far plane distance from viewer f > n > 0
1514 Real32 D
, // near plane offset
1515 UInt32 c
) // number of cluster planes
1517 if (k
== 0) return -n
;
1518 if (k
> c
-1) return -f
;
1520 Real32 z_e
= -(n
+D
)*exp2(static_cast<Real32
>(k
-1) * (log2(f
/(n
+D
))/(c
-1)));
1524 UInt32
ClusterShadingStage::cluster_k(
1525 Real32 z_e
, // eye space z-position, z_e < 0
1526 Real32 nD
, // near plane distance from viewer plus offset, nD = n+D with n > 0, D > 0
1527 Real32 lg_nD
, // log2(nD)
1528 Real32 f
, // far plane distance from viewer f > n > 0
1529 Real32 a
, // factor (c-1)/log2(f/nD)
1530 UInt32 c_1
) // number of cluster planes
1532 if (z_e
>= -nD
) return 0;
1533 if (z_e
<= -f
) return c_1
;
1535 Real32 s
= 1 + a
* (log2(-z_e
) - lg_nD
);
1536 UInt32 k
= UInt32(s
);
1537 return osgClamp(0U, k
, c_1
);
1540 Real32
ClusterShadingStage::cluster_z(
1541 UInt32 k
, // cluster coordinate, 0 <= k <= c
1542 Real32 n
, // near plane distance from viewer n > 0
1543 Real32 f
, // far plane distance from viewer f > n > 0
1544 Real32 nD
, // near plane distance from viewer plus offset, nD = n+D with n > 0, D > 0
1545 Real32 b
, // factor log2(f/(n+D))/(c-1)
1546 UInt32 c_1
) // number of cluster planes
1548 if (k
== 0) return -n
;
1549 if (k
> c_1
) return -f
;
1551 Real32 z_e
= -nD
*exp2(static_cast<Real32
>(k
-1) * b
);
1555 /*-------------------------- Computation ---------------------------------*/
1557 void ClusterShadingStage::create_computation(
1558 ClusterShadingStageData
* pData
)
1560 create_frustum_computation (pData
);
1561 create_light_culling_computation(pData
);
1563 NodeUnrecPtr frustNode
= Node::createLocal();
1564 NodeUnrecPtr cullNode
= Node::createLocal();
1566 if (getCalcFrustumsOnCPU() || getCullLighsOnCPU())
1568 frustNode
->setCore(Group::createLocal());
1572 frustNode
->setCore(getFrustAlgoElement());
1575 if (getCullLighsOnCPU())
1577 cullNode
->setCore(Group::createLocal());
1581 cullNode
->setCore(getCullAlgoElement());
1584 frustNode
->addChild(cullNode
);
1586 frustNode
->editVolume().setInfinite();
1587 frustNode
->editVolume().setStatic ();
1589 cullNode
->editVolume().setInfinite();
1590 cullNode
->editVolume().setStatic ();
1592 setFrustNode(frustNode
);
1593 setCullNode ( cullNode
);
1596 void ClusterShadingStage::update_computation(
1597 ClusterShadingStageData
* pData
)
1600 // Adpat the stage data chunk material
1602 if (!getCalcFrustumsOnCPU())
1604 ChunkMaterial
* pFrustChunkMat
= getFrustCompShaderAlgo()->getChunkMaterial();
1605 if (pFrustChunkMat
!= pData
->getFrustChunkMat())
1607 getFrustCompShaderAlgo()->setChunkMaterial(pData
->getFrustChunkMat());
1611 if (!getCullLighsOnCPU())
1613 ChunkMaterial
* pCullChunkMat
= getCullCompShaderAlgo() ->getChunkMaterial();
1614 if (pCullChunkMat
!= pData
->getCullChunkMat())
1616 getCullCompShaderAlgo()->setChunkMaterial(pData
->getCullChunkMat());
1621 // Nothing to do here, because we always recreate the stage if the CPU/GPU modes
1626 // Setup the computation cores
1628 if (getCalcFrustumsOnCPU() || getCullLighsOnCPU())
1630 if (getFrustNode()->getCore()->getType() != Group::getClassType())
1632 getFrustNode()->setCore(Group::createLocal());
1637 if (getFrustNode()->getCore() != getFrustAlgoElement())
1639 getFrustNode()->setCore(getFrustAlgoElement());
1643 if (getCullLighsOnCPU())
1645 if (getCullNode()->getCore()->getType() != Group::getClassType())
1647 getCullNode()->setCore(Group::createLocal());
1652 if (getCullNode()->getCore() != getCullAlgoElement())
1654 getCullNode()->setCore(getCullAlgoElement());
1660 /*---------------------- Frustum Computation -----------------------------*/
1662 void ClusterShadingStage::create_frustum_computation(
1663 ClusterShadingStageData
* pData
)
1667 ShaderProgramUnrecPtr shader
= ShaderProgram::createLocal();
1669 shader
->setShaderType(GL_COMPUTE_SHADER
);
1671 pData
->getIsOrthographicCamera()
1672 ? get_ortho_frustum_cp_program()
1673 : get_persp_frustum_cp_program()
1676 shader
->addUniformBlock ("DispatchData", getDispatchDataBindingPnt());
1677 shader
->addShaderStorageBlock("Frustums", getFrustumBindingPnt());
1679 ComputeShaderChunkUnrecPtr chunk
= ComputeShaderChunk::createLocal();
1680 chunk
->addComputeShader(shader
);
1681 chunk
->setVariables(shader
->getVariables());
1683 ComputeShaderAlgorithmUnrecPtr algorithm
= ComputeShaderAlgorithm::createLocal();
1684 algorithm
->setUseMemoryBarrier(true);
1685 algorithm
->setMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT
);
1686 algorithm
->setComputeShader(chunk
);
1688 Vec3i
work_group_count(1,1,1);
1689 algorithm
->setDispatchConfig(work_group_count
);
1690 algorithm
->setChunkMaterial(pData
->getFrustChunkMat());
1692 AlgorithmComputeElementUnrecPtr element
= AlgorithmComputeElement::createLocal();
1693 element
->setAlgorithm(algorithm
);
1695 setFrustAlgoElement(element
);
1696 setFrustCompShaderAlgo(algorithm
);
1699 /*------------------- Light Culling Computation --------------------------*/
1701 void ClusterShadingStage::create_light_culling_computation(
1702 ClusterShadingStageData
* pData
)
1706 ShaderProgramUnrecPtr shader
= ShaderProgram::createLocal();
1707 shader
->setShaderType(GL_COMPUTE_SHADER
);
1708 shader
->setProgram(get_light_culling_cp_program());
1710 shader
->addShaderStorageBlock(getMultiLightChunk()->getLightBlockName().c_str(), getLightBindingPnt());
1711 shader
->addUniformBlock ("DispatchData", getDispatchDataBindingPnt());
1712 shader
->addUniformBlock (getClusteringDataBlockName().c_str(), getClusterDataBindingPnt());
1713 shader
->addShaderStorageBlock("Frustums", getFrustumBindingPnt());
1714 shader
->addShaderStorageBlock(getAffectedLightIndexListBlockName().c_str(), getAffectedLightIndexListBindingPnt());
1715 shader
->addShaderStorageBlock(getLightIndexListBlockName().c_str(), getLightIndexListBindingPnt());
1716 shader
->addShaderStorageBlock("LightIndexCounter", getLightIndexCounterBindingPnt());
1718 ComputeShaderChunkUnrecPtr chunk
= ComputeShaderChunk::createLocal();
1719 chunk
->addComputeShader(shader
);
1720 chunk
->setVariables(shader
->getVariables());
1722 ComputeShaderAlgorithmUnrecPtr algorithm
= ComputeShaderAlgorithm::createLocal();
1723 algorithm
->setUseMemoryBarrier(true);
1724 algorithm
->setMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT
);
1725 algorithm
->setComputeShader(chunk
);
1727 Vec3i
work_group_count(1,1,1);
1728 algorithm
->setDispatchConfig(work_group_count
);
1729 algorithm
->setChunkMaterial(pData
->getCullChunkMat());
1731 AlgorithmComputeElementUnrecPtr element
= AlgorithmComputeElement::createLocal();
1732 element
->setAlgorithm(algorithm
);
1734 setCullAlgoElement(element
);
1735 setCullCompShaderAlgo(algorithm
);
1738 /*------------------------- Fragment Shader -------------------------------*/
1740 void ClusterShadingStage::create_fragment_shader(ClusterShadingStageData
* pData
)
1742 ShaderProgramChunkUnrecPtr progChunk
= ShaderProgramChunk::createLocal();
1744 ShaderProgramUnrecPtr shader
= ShaderProgram::createLocal();
1745 shader
->setShaderType(GL_FRAGMENT_SHADER
);
1746 shader
->setProgram(get_fragment_cp_program());
1748 shader
->addShaderStorageBlock(getMultiLightChunk()->getLightBlockName().c_str(), getLightBindingPnt());
1749 shader
->addShaderStorageBlock(getAffectedLightIndexListBlockName().c_str(), getAffectedLightIndexListBindingPnt());
1750 shader
->addShaderStorageBlock(getLightIndexListBlockName().c_str(), getLightIndexListBindingPnt());
1751 shader
->addUniformBlock (getClusteringDataBlockName().c_str(), getClusterDataBindingPnt());
1753 progChunk
->addShader(shader
);
1755 setShaderProgChunk(progChunk
);
1758 /*------------------ Update Dispatch Configurations -----------------------*/
1760 void ClusterShadingStage::update_dispatch_config_frustums(
1761 ClusterShadingStageData
* pData
,
1762 const Vec4u
& viewport
,
1763 const Matrix
& matInvProjection
)
1765 // Real32 x_v = static_cast<Real32>(viewport[0]);
1766 // Real32 y_v = static_cast<Real32>(viewport[1]);
1767 Real32 w
= static_cast<Real32
>(viewport
[2]);
1768 Real32 h
= static_cast<Real32
>(viewport
[3]);
1770 const Vec3i work_group_size
= Vec3i(getBlockSize(), getBlockSize(), 1);
1771 const UInt32 tile_size
= getTileSize();
1773 Real32 numHorizontalTiles
= osgCeil(w
/ tile_size
);
1774 Real32 numVerticalTiles
= osgCeil(h
/ tile_size
);
1777 data
.matTransf
= matInvProjection
;
1778 data
.viewport
= viewport
;
1779 data
.numTiles
= Vec2i(Int32(numHorizontalTiles
), Int32(numVerticalTiles
));
1781 VecFrustumsT
vFrustums(data
.numTiles
.x() * data
.numTiles
.y());
1782 update_frustum_state(pData
->getFrustumsSSBOChunk(), vFrustums
);
1783 update_dispatch_data(pData
->getFrustDispDatUBOChunk(), data
);
1785 UInt32 szX
= UInt32(osgCeil(numHorizontalTiles
/ work_group_size
.x()));
1786 UInt32 szY
= UInt32(osgCeil( numVerticalTiles
/ work_group_size
.y()));
1789 Vec3i work_group_count
= Vec3i(szX
, szY
, szZ
);
1790 getFrustCompShaderAlgo()->setDispatchConfig(work_group_count
);
1793 void ClusterShadingStage::update_dispatch_config_cull_lights(
1794 ClusterShadingStageData
* pData
,
1795 const Vec4u
& viewport
,
1796 const Matrix
& matViewing
)
1798 // Real32 x_v = static_cast<Real32>(viewport[0]);
1799 // Real32 y_v = static_cast<Real32>(viewport[1]);
1800 Real32 w
= static_cast<Real32
>(viewport
[2]);
1801 Real32 h
= static_cast<Real32
>(viewport
[3]);
1803 const Vec3i work_group_size
= Vec3i(getBlockSize(), getBlockSize(), 1);
1804 const UInt32 tile_size
= getTileSize();
1806 Real32 numHorizontalTiles
= osgCeil(w
/ tile_size
);
1807 Real32 numVerticalTiles
= osgCeil(h
/ tile_size
);
1810 data
.matTransf
= matViewing
;
1811 data
.viewport
= viewport
;
1812 data
.numTiles
= Vec2i(Int32(numHorizontalTiles
), Int32(numVerticalTiles
));
1814 update_dispatch_data(pData
->getCullDispDatUBOChunk(), data
);
1816 Vec3i work_group_count
= Vec3i(numHorizontalTiles
, numVerticalTiles
, getNumClusterZ());
1817 getCullCompShaderAlgo()->setDispatchConfig(work_group_count
);
1820 /*------------------- Calculate Affected Lights --------------------------*/
1822 void ClusterShadingStage::calc_affected_lights(
1823 const Matrix
& matEyeFromWorld
,
1824 FrustumVolume volViewFrustum
,
1825 VecLightIndexT
& vecAffectedLights
,
1826 VecLightEyeSpaceDataT
& vecEyeSpaceData
)
1829 // We test the light against the view frustum to get only affected lights.
1830 // Since all of out tests happens in eye space we must transform the world
1831 // space view frustum to eye space first.
1833 volViewFrustum
.transform(matEyeFromWorld
);
1836 frustum
.planes
[0] = volViewFrustum
.getPlane(FrustumVolume::PLANE_LEFT
);
1837 frustum
.planes
[1] = volViewFrustum
.getPlane(FrustumVolume::PLANE_RIGHT
);
1838 frustum
.planes
[2] = volViewFrustum
.getPlane(FrustumVolume::PLANE_TOP
);
1839 frustum
.planes
[3] = volViewFrustum
.getPlane(FrustumVolume::PLANE_BOTTOM
);
1841 Pnt3f nlt
= volViewFrustum
.getCorner(FrustumVolume::NEAR_LEFT_TOP
);
1842 Pnt3f flt
= volViewFrustum
.getCorner(FrustumVolume:: FAR_LEFT_TOP
);
1847 UInt32 numLights
= getMultiLightChunk()->numLights();
1850 // We do need the eye space positions and directions of the lights
1851 // for our test view frustum and cluster frustum tests. In order to
1852 // calculate them only once we remember them in a vector.
1854 if (vecEyeSpaceData
.size() != numLights
)
1855 vecEyeSpaceData
.resize(numLights
);
1857 for (UInt32 i
= 0; i
< numLights
; ++i
)
1859 bool affected
= false;
1861 if (getMultiLightChunk()->getEnabled(i
))
1863 switch (getMultiLightChunk()->getType(i
))
1865 case MultiLight::DIRECTIONAL_LIGHT
:
1870 case MultiLight::POINT_LIGHT
:
1871 case MultiLight::CINEMA_LIGHT
:
1873 transform_to_eye_space(
1875 getMultiLightChunk()->getBeacon(i
),
1876 getMultiLightChunk()->getPosition(i
),
1877 getMultiLightChunk()->getDirection(i
),
1878 vecEyeSpaceData
[i
].position
,
1879 vecEyeSpaceData
[i
].direction
);
1881 if (point_light_inside_frustum(
1882 vecEyeSpaceData
[i
].position
,
1883 getMultiLightChunk()->getRangeCutOff(i
),
1890 case MultiLight::SPOT_LIGHT
:
1892 transform_to_eye_space(
1894 getMultiLightChunk()->getBeacon(i
),
1895 getMultiLightChunk()->getPosition(i
),
1896 getMultiLightChunk()->getDirection(i
),
1897 vecEyeSpaceData
[i
].position
,
1898 vecEyeSpaceData
[i
].direction
);
1900 if (spot_light_inside_frustum(
1901 vecEyeSpaceData
[i
].position
,
1902 vecEyeSpaceData
[i
].direction
,
1903 getMultiLightChunk()->getRangeCutOff(i
),
1904 getMultiLightChunk()->getSpotlightAngle(i
),
1915 vecAffectedLights
.push_back(UInt32(i
));
1919 void ClusterShadingStage::transform_to_eye_space(
1920 Matrix matEyeFromWorld
,
1922 const Pnt3f
& position_bs
,
1923 const Vec3f
& direction_bs
,
1925 Vec3f
& direction_es
)
1931 beacon
->getToWorld(matWsFromBS
);
1935 matWsFromBS
.setIdentity();
1938 matEyeFromWorld
.mult(matWsFromBS
); // matEsFromBs
1939 matEyeFromWorld
.multFull( position_bs
, position_es
);
1940 matEyeFromWorld
.multFull(direction_bs
, direction_es
);
1943 bool ClusterShadingStage::point_light_inside_frustum(
1944 const Pnt3f
& position_es
,
1946 const Frustum
& frustum
,
1951 sphere
.c
= position_es
;
1954 return SphereInsideFrustum(sphere
, frustum
, n
, f
);
1957 bool ClusterShadingStage::spot_light_inside_frustum(
1958 const Pnt3f
& position_es
,
1959 const Vec3f
& direction_es
,
1961 Real32 spotlightAngle
,
1962 const Frustum
& frustum
,
1967 cone
.T
= position_es
;
1968 cone
.d
= direction_es
;
1970 cone
.r
= osgTan( osgDegree2Rad(spotlightAngle
)) * cone
.h
;
1972 return ConeInsideFrustum(cone
, frustum
, n
, f
);
1975 /*----------------------- CPU Light Culling ------------------------------*/
1977 inline std::size_t ClusterShadingStage::frustum_accessor(
1980 const Vec3u
& dimensions
)
1982 return j
* dimensions
.x() + i
;
1985 void ClusterShadingStage::cull_lights(
1986 ClusterShadingStageData
* pData
, // in contains the CPU calculated tile frustum planes
1987 const VecLightIndexT
& vecAffectedLights
, // in the lights that are actually contributing to the view frustum shading
1988 const VecLightEyeSpaceDataT
& vecEyeSpaceData
, // in the precalculated light eye space positions and directions
1989 const Vec3u
& dimensions
, // in the cluster dimentsions
1990 const Matrix
& matEyeFromWorld
, // in transform from world to eye space
1991 Real32 zNear
, // in distance of near plane from eye point
1992 Real32 zFar
, // in distance of far plane from eye point
1993 Real32 D
, // in distance offset for near plane
1994 VecImageDataT
& gridData
, // out the raw grid data
1995 VecLightIndexT
& lightIndexList
) // out the index list
1998 lightIndexList
.clear();
2000 gridData
.resize(dimensions
.x() * dimensions
.y() * dimensions
.z());
2002 UInt32 c_1
= dimensions
.z()-1;
2003 Real32 zNearD
= zNear
+ D
;
2004 Real32 b
= log2(zFar
/zNearD
)/(c_1
);
2006 for (UInt32 k
= 0; k
< dimensions
.z(); ++k
)
2008 Real32 n
= cluster_z(k
, zNear
, zFar
, zNearD
, b
, c_1
);
2009 Real32 f
= cluster_z(k
+1, zNear
, zFar
, zNearD
, b
, c_1
);
2011 for (UInt32 j
= 0; j
< dimensions
.y(); ++j
)
2013 for (UInt32 i
= 0; i
< dimensions
.x(); ++i
)
2015 std::size_t tile
= frustum_accessor(i
, j
, dimensions
);
2016 Frustum frustum
= getFrustum(pData
, static_cast<UInt32
>(tile
));
2018 UInt32 light_count
= 0;
2019 UInt32 light_start_offset
= UInt32(lightIndexList
.size());
2021 std::size_t num_affected_lights
= vecAffectedLights
.size();
2023 for (std::size_t l
= 0; l
< num_affected_lights
; ++l
)
2025 UInt32 light_index
= vecAffectedLights
[l
];
2027 if (getMultiLightChunk()->getEnabled(light_index
))
2029 switch (getMultiLightChunk()->getType(light_index
))
2031 case MultiLight::DIRECTIONAL_LIGHT
:
2034 lightIndexList
.push_back(light_index
);
2037 case MultiLight::POINT_LIGHT
:
2038 case MultiLight::CINEMA_LIGHT
:
2041 point_light_inside_frustum(
2042 vecEyeSpaceData
[light_index
].position
,
2043 getMultiLightChunk()->getRangeCutOff(light_index
),
2048 lightIndexList
.push_back(light_index
);
2052 case MultiLight::SPOT_LIGHT
:
2055 spot_light_inside_frustum(
2056 vecEyeSpaceData
[light_index
].position
,
2057 vecEyeSpaceData
[light_index
].direction
,
2058 getMultiLightChunk()->getRangeCutOff(light_index
),
2059 getMultiLightChunk()->getSpotlightAngle(light_index
),
2064 lightIndexList
.push_back(light_index
);
2072 write_image_data(i
, j
, k
, dimensions
, std::make_pair(light_start_offset
, light_count
), gridData
);
2078 /*-------------------- Compute Shader Programs ---------------------------*/
2080 std::string
ClusterShadingStage::get_persp_frustum_cp_program()
2082 using namespace std
;
2084 const Vec3i work_group_size
= Vec3i(getBlockSize(), getBlockSize(), 1);
2085 const UInt32 tile_size
= getTileSize();
2089 ost
<< "#version 430 compatibility"
2091 << endl
<< "layout (local_size_x = " << work_group_size
.x()
2092 << ", local_size_y = " << work_group_size
.y()
2093 << ", local_size_z = " << work_group_size
.z() << ") in;"
2095 << endl
<< "const int tile_size = " << tile_size
<< ";"
2098 << endl
<< "// matTransf is the inverse projection matrix"
2100 << getDispatchProgSnippet()
2102 << getFrustumProgSnippet()
2104 << endl
<< "const vec3 eyePos = vec3(0, 0, 0);"
2106 << endl
<< "Plane computePlane(in const vec3 p0, in const vec3 p1, in const vec3 p2)"
2108 << endl
<< " Plane plane;"
2110 << endl
<< " vec3 v1 = p1 - p0;"
2111 << endl
<< " vec3 v2 = p2 - p0;"
2113 << endl
<< " plane.N = normalize(cross(v1, v2));"
2114 << endl
<< " plane.d = dot(plane.N, p0);"
2116 << endl
<< " return plane;"
2119 << endl
<< "vec4 ndcFromScreen(in const vec3 p_w)"
2121 << endl
<< " return vec4("
2122 << endl
<< " 2.0 * (p_w.x - dispatchData.viewport.x) / dispatchData.viewport[2] - 1.0,"
2123 << endl
<< " 2.0 * (p_w.y - dispatchData.viewport.y) / dispatchData.viewport[3] - 1.0,"
2124 << endl
<< " p_w.z, // assumed to be already in ndc-space!"
2128 << endl
<< "vec3 eyeFromNdc(in vec4 p_n)"
2130 << endl
<< " vec4 p_e = dispatchData.matTransf * p_n; // inverse projection matrix"
2131 << endl
<< " p_e /= p_e.w;"
2132 << endl
<< " return p_e.xyz;"
2135 << endl
<< "void main()"
2137 << endl
<< " vec3 pnts_w[4];"
2138 << endl
<< " vec4 pnts_n[4];"
2139 << endl
<< " vec3 pnts_e[4];"
2141 << endl
<< " float x_v = dispatchData.viewport.x;"
2142 << endl
<< " float y_v = dispatchData.viewport.y;"
2143 << endl
<< " float w_v = dispatchData.viewport.z;"
2144 << endl
<< " float h_v = dispatchData.viewport.w;"
2146 << endl
<< " float x0 = x_v + gl_GlobalInvocationID.x * tile_size;"
2147 << endl
<< " float x1 = min(x_v + (gl_GlobalInvocationID.x+1) * tile_size, x_v + w_v);"
2148 << endl
<< " float y0 = y_v + gl_GlobalInvocationID.y * tile_size;"
2149 << endl
<< " float y1 = min(y_v + (gl_GlobalInvocationID.y+1) * tile_size, y_v + h_v);"
2151 << endl
<< " pnts_w[0] = vec3(x0, y0, -1.0);"
2152 << endl
<< " pnts_w[1] = vec3(x1, y0, -1.0);"
2153 << endl
<< " pnts_w[2] = vec3(x0, y1, -1.0);"
2154 << endl
<< " pnts_w[3] = vec3(x1, y1, -1.0);"
2156 << endl
<< " for (int i = 0; i < 4; ++i)"
2158 << endl
<< " pnts_n[i] = ndcFromScreen(pnts_w[i]);"
2159 << endl
<< " pnts_e[i] = eyeFromNdc (pnts_n[i]);"
2162 << endl
<< " Frustum frustum;"
2164 << endl
<< " frustum.planes[0] = computePlane(eyePos, pnts_e[0], pnts_e[2]);"
2165 << endl
<< " frustum.planes[1] = computePlane(eyePos, pnts_e[3], pnts_e[1]);"
2166 << endl
<< " frustum.planes[2] = computePlane(eyePos, pnts_e[2], pnts_e[3]);"
2167 << endl
<< " frustum.planes[3] = computePlane(eyePos, pnts_e[1], pnts_e[0]);"
2169 << endl
<< " if (gl_GlobalInvocationID.x < dispatchData.numTiles.x && gl_GlobalInvocationID.y < dispatchData.numTiles.y)"
2171 << endl
<< " uint idx = gl_GlobalInvocationID.y * dispatchData.numTiles.x + gl_GlobalInvocationID.x;"
2172 << endl
<< " frustums.frustum[idx] = frustum;"
2181 std::string
ClusterShadingStage::get_ortho_frustum_cp_program()
2183 using namespace std
;
2185 const Vec3i work_group_size
= Vec3i(getBlockSize(), getBlockSize(), 1);
2186 const UInt32 tile_size
= getTileSize();
2190 ost
<< "#version 430 compatibility"
2192 << endl
<< "layout (local_size_x = " << work_group_size
.x()
2193 << ", local_size_y = " << work_group_size
.y()
2194 << ", local_size_z = " << work_group_size
.z() << ") in;"
2196 << endl
<< "const int tile_size = " << tile_size
<< ";"
2199 << endl
<< "// matTransf is the inverse projection matrix"
2201 << getDispatchProgSnippet()
2203 << getFrustumProgSnippet()
2205 << endl
<< "Plane computePlane(in const vec3 p0, in const vec3 p1, in const vec3 p2)"
2207 << endl
<< " Plane plane;"
2209 << endl
<< " vec3 v1 = p1 - p0;"
2210 << endl
<< " vec3 v2 = p2 - p0;"
2212 << endl
<< " plane.N = normalize(cross(v1, v2));"
2213 << endl
<< " plane.d = dot(plane.N, p0);"
2215 << endl
<< " return plane;"
2218 << endl
<< "vec4 ndcFromScreen(in const vec3 p_w)"
2220 << endl
<< " return vec4("
2221 << endl
<< " 2.0 * (p_w.x - dispatchData.viewport.x) / dispatchData.viewport[2] - 1.0,"
2222 << endl
<< " 2.0 * (p_w.y - dispatchData.viewport.y) / dispatchData.viewport[3] - 1.0,"
2223 << endl
<< " p_w.z, // assumed to be already in ndc-space!"
2227 << endl
<< "vec3 eyeFromNdc(in vec4 p_n)"
2229 << endl
<< " vec4 p_e = dispatchData.matTransf * p_n; // inverse projection matrix"
2230 << endl
<< " p_e /= p_e.w;"
2231 << endl
<< " return p_e.xyz;"
2234 << endl
<< "void main()"
2236 << endl
<< " vec3 pnts_w[8];"
2237 << endl
<< " vec4 pnts_n[8];"
2238 << endl
<< " vec3 pnts_e[8];"
2240 << endl
<< " float x_v = dispatchData.viewport.x;"
2241 << endl
<< " float y_v = dispatchData.viewport.y;"
2242 << endl
<< " float w_v = dispatchData.viewport.z;"
2243 << endl
<< " float h_v = dispatchData.viewport.w;"
2245 << endl
<< " float x0 = x_v + gl_GlobalInvocationID.x * tile_size;"
2246 << endl
<< " float x1 = min(x_v + (gl_GlobalInvocationID.x+1) * tile_size, x_v + w_v);"
2247 << endl
<< " float y0 = y_v + gl_GlobalInvocationID.y * tile_size;"
2248 << endl
<< " float y1 = min(y_v + (gl_GlobalInvocationID.y+1) * tile_size, y_v + h_v);"
2250 << endl
<< " pnts_w[0] = vec3(x0, y0, 1.0);"
2251 << endl
<< " pnts_w[1] = vec3(x1, y0, 1.0);"
2252 << endl
<< " pnts_w[2] = vec3(x0, y1, 1.0);"
2253 << endl
<< " pnts_w[3] = vec3(x1, y1, 1.0);"
2255 << endl
<< " pnts_w[4] = vec3(x0, y0,-1.0);"
2256 << endl
<< " pnts_w[5] = vec3(x1, y0,-1.0);"
2257 << endl
<< " pnts_w[6] = vec3(x0, y1,-1.0);"
2258 << endl
<< " pnts_w[7] = vec3(x1, y1,-1.0);"
2260 << endl
<< " for (int i = 0; i < 8; ++i)"
2262 << endl
<< " pnts_n[i] = ndcFromScreen(pnts_w[i]);"
2263 << endl
<< " pnts_e[i] = eyeFromNdc (pnts_n[i]);"
2266 << endl
<< " Frustum frustum;"
2268 << endl
<< " frustum.planes[0] = computePlane(pnts_e[6], pnts_e[0], pnts_e[2]);"
2269 << endl
<< " frustum.planes[1] = computePlane(pnts_e[7], pnts_e[3], pnts_e[1]);"
2270 << endl
<< " frustum.planes[2] = computePlane(pnts_e[6], pnts_e[2], pnts_e[3]);"
2271 << endl
<< " frustum.planes[3] = computePlane(pnts_e[4], pnts_e[1], pnts_e[0]);"
2273 << endl
<< " if (gl_GlobalInvocationID.x < dispatchData.numTiles.x && gl_GlobalInvocationID.y < dispatchData.numTiles.y)"
2275 << endl
<< " uint idx = gl_GlobalInvocationID.y * dispatchData.numTiles.x + gl_GlobalInvocationID.x;"
2276 << endl
<< " frustums.frustum[idx] = frustum;"
2285 std::string
ClusterShadingStage::get_light_culling_cp_program()
2287 using namespace std
;
2289 const Vec3i work_group_size
= Vec3i(getBlockSize(), getBlockSize(), 1);
2290 const UInt32 tile_size
= getTileSize();
2294 ost
<< "#version 430 compatibility"
2296 << endl
<< "#extension GL_ARB_shader_image_load_store: enable"
2298 << endl
<< "layout (local_size_x = " << work_group_size
.x()
2299 << ", local_size_y = " << work_group_size
.y()
2300 << ", local_size_z = " << work_group_size
.z() << ") in;"
2302 << endl
<< "const int tile_size = " << tile_size
<< ";"
2304 << getMultiLightChunk()->getLightProgSnippet()
2307 << endl
<< "// matTransf is the world to view matrix"
2309 << getDispatchProgSnippet()
2311 << getClusteringProgSnippet(false)
2314 << endl
<< "// We get the pre calculated list of frustums for each tile."
2315 << endl
<< "// The cluster frustum is accessed with the help of the"
2316 << endl
<< "// gl_WorkGroupID variable:"
2318 << endl
<< "// idx = j * numHorizontalTiles + i"
2319 << endl
<< "// idx = gl_WorkGroupID.y * dispatchData.numTiles.x + gl_WorkGroupID.x"
2321 << getFrustumProgSnippet()
2324 << endl
<< "// A simple Sphere abstraction"
2326 << endl
<< "struct Sphere"
2328 << endl
<< " vec3 c; // Center point."
2329 << endl
<< " float r; // Radius."
2333 << endl
<< "// A simple Cone abstraction"
2335 << endl
<< "struct Cone"
2337 << endl
<< " vec3 T; // Cone tip."
2338 << endl
<< " float h; // Height of the cone."
2339 << endl
<< " vec3 d; // Direction of the cone."
2340 << endl
<< " float r; // bottom radius of the cone."
2343 << getLightIndexProgSnippet()
2346 << endl
<< "// The global light index list that is to be written by this shader"
2348 << endl
<< "layout (std430) buffer LightIndexCounter"
2350 << endl
<< " uint lightIndexCounter;"
2354 << endl
<< "// Per work group shared state"
2356 << endl
<< "shared Frustum sharedFrustum;"
2357 << endl
<< "shared vec2 sharedFrustumZ;"
2358 << endl
<< "shared uint sharedLightCount;"
2359 << endl
<< "shared uint sharedLightStartOffset;"
2360 << endl
<< "shared uint sharedLightIndexList[1024];"
2363 << endl
<< "// Check to see if a sphere is fully behind (inside the negative halfspace of) a plane."
2365 << endl
<< "bool sphereInsidePlane("
2366 << endl
<< " in const Sphere sphere,"
2367 << endl
<< " in const Plane plane)"
2369 << endl
<< " float val = dot(plane.N, sphere.c) - plane.d;"
2370 << endl
<< " return val < -sphere.r;"
2374 << endl
<< "// Check to see of a light is partially contained within the frustum."
2376 << endl
<< "bool sphereInsideFrustum("
2377 << endl
<< " in const Sphere sphere,"
2378 << endl
<< " in const Frustum frustum,"
2379 << endl
<< " in const float zNear,"
2380 << endl
<< " in const float zFar)"
2382 << endl
<< " bool result = true;"
2384 << endl
<< " if (sphere.c.z - sphere.r > zNear || zFar > sphere.c.z + sphere.r)"
2386 << endl
<< " result = false;"
2389 << endl
<< " for (int i = 0; i < 4 && result; i++)"
2391 << endl
<< " if (sphereInsidePlane(sphere, frustum.planes[i]))"
2393 << endl
<< " result = false;"
2397 << endl
<< " return result;"
2401 << endl
<< "// Check to see if a point is fully behind (inside the negative halfspace of) a plane."
2403 << endl
<< "bool pointInsidePlane("
2404 << endl
<< " in const vec3 p,"
2405 << endl
<< " in const Plane plane)"
2407 << endl
<< " float val = dot(plane.N, p) - plane.d;"
2408 << endl
<< " return val < 0;"
2412 << endl
<< "// Check to see if a cone if fully behind (inside the negative halfspace of) a plane."
2414 << endl
<< "bool coneInsidePlane("
2415 << endl
<< " in const Cone cone,"
2416 << endl
<< " in const Plane plane)"
2418 << endl
<< " // Compute the farthest point on the end of the cone to the positive space of the plane."
2419 << endl
<< " vec3 m = cross(cross(plane.N, cone.d), cone.d);"
2420 << endl
<< " vec3 Q = cone.T + cone.d * cone.h - m * cone.r;"
2422 << endl
<< " // The cone is in the negative halfspace of the plane if both"
2423 << endl
<< " // the tip of the cone and the farthest point on the end of the cone to the "
2424 << endl
<< " // positive halfspace of the plane are both inside the negative halfspace "
2425 << endl
<< " // of the plane."
2426 << endl
<< " return pointInsidePlane(cone.T, plane) && pointInsidePlane(Q, plane);"
2429 << endl
<< "bool coneInsideFrustum("
2430 << endl
<< " in const Cone cone,"
2431 << endl
<< " in const Frustum frustum,"
2432 << endl
<< " in const float zNear,"
2433 << endl
<< " in const float zFar)"
2435 << endl
<< " bool result = true;"
2437 << endl
<< " Plane nearPlane; nearPlane.N = vec3(0, 0,-1); nearPlane.d = -zNear;"
2438 << endl
<< " Plane farPlane; farPlane.N = vec3(0, 0, 1); farPlane.d = zFar;"
2440 << endl
<< " if (coneInsidePlane(cone, nearPlane) || coneInsidePlane(cone, farPlane))"
2442 << endl
<< " result = false;"
2445 << endl
<< " for (int i = 0; i < 4 && result; i++)"
2447 << endl
<< " if (coneInsidePlane(cone, frustum.planes[i]))"
2449 << endl
<< " result = false;"
2453 << endl
<< " return result;"
2457 << endl
<< "// Helper function that provides the cluster index (ivec3) of the current work group invocation"
2459 << endl
<< "ivec3 clusterAccessor()"
2461 << endl
<< " return ivec3(gl_WorkGroupID);"
2465 << endl
<< "// Helper function that provides the tile frustum index of the current cluster"
2467 << endl
<< "int frustumAccessor()"
2469 << endl
<< " return int(gl_WorkGroupID.y) * dispatchData.numTiles.x + int(gl_WorkGroupID.x);"
2473 << endl
<< "// Append the light light_idx to the list of lights to be rendered for "
2474 << endl
<< "// this cluster. That is we have to increment the sharedLightCount and"
2475 << endl
<< "// to append the light_idx to the sharedLightIndexList."
2477 << endl
<< "void appendLight(in const uint light_idx)"
2479 << endl
<< " uint idx = atomicAdd(sharedLightCount, 1);"
2480 << endl
<< " if (idx < 1024)"
2482 << endl
<< " sharedLightIndexList[idx] = light_idx;"
2486 << endl
<< "void main()"
2489 << endl
<< " // Initialize the work group shared state: Only the first thread is needed for that"
2491 << endl
<< " if (gl_LocalInvocationIndex == 0)"
2493 << endl
<< " sharedFrustum = frustums.frustum[frustumAccessor()];"
2494 << endl
<< " sharedFrustumZ = getClusterDepth();"
2495 << endl
<< " sharedLightCount = 0;"
2498 << endl
<< " memoryBarrierShared(); // Ensure change to sharedXXX is visible in other invocations"
2499 << endl
<< " barrier(); // Stall until every thread reaches this point"
2502 << endl
<< " // We iterate over all affected lights whereby splitting the lights into subsets for each thread."
2503 << endl
<< " // For each light we test it against the current frustum and if it is inside of the frustum, we add"
2504 << endl
<< " // the light to the group shared list of lights contributing to the current cluster corresponding to"
2505 << endl
<< " // the work group."
2507 << endl
<< " for (uint i = gl_LocalInvocationIndex; i < " << getAffectedLightIndexListVariableName() << ".idx.length(); i += gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z)"
2509 << endl
<< " uint light_index = " << getAffectedLightIndexListVariableName() << ".idx[i];"
2511 << endl
<< " if (" << getMultiLightChunk()->getLightVariableName() << ".light[light_index].enabled)"
2513 << endl
<< " Light light = " << getMultiLightChunk()->getLightVariableName() << ".light[light_index];"
2515 << endl
<< " switch (light.type)"
2517 << endl
<< " case DIRECTIONAL_LIGHT:"
2519 << endl
<< " appendLight(light_index);"
2521 << endl
<< " break;"
2523 << endl
<< " case POINT_LIGHT:"
2524 << endl
<< " case CINEMA_LIGHT:"
2526 << endl
<< " vec4 position = dispatchData.matTransf * vec4(light.position, 1.0);"
2528 << endl
<< " Sphere sphere = { position.xyz, light.rangeCutOff };"
2530 << endl
<< " if (sphereInsideFrustum(sphere, sharedFrustum, sharedFrustumZ.x, sharedFrustumZ.y))"
2532 << endl
<< " appendLight(light_index);"
2535 << endl
<< " break;"
2537 << endl
<< " case SPOT_LIGHT:"
2539 << endl
<< " vec4 position = dispatchData.matTransf * vec4(light.position, 1.0);"
2540 << endl
<< " vec4 direction = dispatchData.matTransf * vec4(light.direction, 0.0);"
2542 << endl
<< " float radius = tan(light.spotlightAngle) * light.rangeCutOff;"
2543 << endl
<< " Cone cone = { position.xyz, light.rangeCutOff, direction.xyz, radius };"
2545 << endl
<< " if (coneInsideFrustum(cone, sharedFrustum, sharedFrustumZ.x, sharedFrustumZ.y))"
2547 << endl
<< " appendLight(light_index);"
2550 << endl
<< " break;"
2556 << endl
<< " // Wait till all threads in group have caught up."
2558 << endl
<< " memoryBarrierShared(); // Ensure change to sharedXXX is visible in other invocations"
2559 << endl
<< " barrier(); // Stall until every thread reaches this point"
2562 << endl
<< " // Now we have the sharedLightIndexList filled and know by sharedLightCount the number"
2563 << endl
<< " // of lights that are contributing for the current cluster. What we have to do now is"
2564 << endl
<< " // to get space in the global light index list (lightIndexList). For that, we use the"
2565 << endl
<< " // global light index counter (lightIndexCounter) by atomically incrementing it"
2566 << endl
<< " // with the number of lights contributing to the current cluster and getting back the"
2567 << endl
<< " // offset from the start of the global light index list (sharedLightStartOffset)."
2568 << endl
<< " // So now we have requested space on the global light index list and we have the data"
2569 << endl
<< " // that need to be written to the global light grid data image, i.e. the offset from"
2570 << endl
<< " // the beginning of the global light index list and the number of lights to use for"
2571 << endl
<< " // shading the fragments falling into the current cluster."
2573 << endl
<< " // Only thread 0 is needed for:"
2574 << endl
<< " // - atomically increment lightIndexCounter by sharedLightCount to get"
2575 << endl
<< " // sharedLightStartOffset"
2576 << endl
<< " // - write (sharedLightStartOffset, sharedLightCount) to light grid"
2578 << endl
<< " if (gl_LocalInvocationIndex == 0)"
2580 << endl
<< " sharedLightStartOffset = atomicAdd(lightIndexCounter, sharedLightCount);"
2582 << endl
<< " uvec4 data = uvec4(sharedLightStartOffset, sharedLightCount, 0, 0);"
2585 << endl
<< " // We must protect from overflow"
2587 << endl
<< " uint num_indices = " << getLightIndexListVariableName() << ".idx.length();"
2588 << endl
<< " if (sharedLightStartOffset + sharedLightCount >= num_indices)"
2589 << endl
<< " data = uvec4(0,0,0,0);"
2591 << endl
<< " imageStore(" << getLightGridVariableName() << ", clusterAccessor(), data);"
2594 << endl
<< " memoryBarrierShared(); // Ensure change to sharedXXX is visible in other invocations"
2595 << endl
<< " barrier(); // Stall until every thread reaches this point"
2598 << endl
<< " // The last task is to write the actual lights affecting the current cluster into the"
2599 << endl
<< " // the global light index list. We have already requestet the appropriate space on the"
2600 << endl
<< " // this list so we can just iterate over the local light list (sharedLightIndexList) and"
2601 << endl
<< " // write the carry the content to the global list. That can also be done in parrallel, so"
2602 << endl
<< " // we use all the thread we have at hand."
2605 << endl
<< " uint num_indices = " << getLightIndexListVariableName() << ".idx.length();"
2607 << endl
<< " for (uint i = gl_LocalInvocationIndex; i < sharedLightCount; i += gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z)"
2609 << endl
<< " if (sharedLightStartOffset + i < num_indices)"
2610 << endl
<< " " << getLightIndexListVariableName() << ".idx[sharedLightStartOffset + i] = sharedLightIndexList[i];"
2619 std::string
ClusterShadingStage::get_fragment_cp_program()
2621 using namespace std
;
2627 //<< getMultiLightChunk()->getLightProgSnippet()
2629 //<< getClusteringProgSnippet()
2631 //<< getLightIndexProgSnippet()
2638 /*-------------------- Shader Program Snippets ---------------------------*/
2640 std::string
ClusterShadingStage::getDispatchProgSnippet() const
2642 using namespace std
;
2647 << endl
<< "layout (std140) uniform DispatchData"
2649 << endl
<< " mat4 matTransf;"
2650 << endl
<< " uvec4 viewport;"
2651 << endl
<< " ivec2 numTiles;"
2652 << endl
<< "} dispatchData;"
2659 std::string
ClusterShadingStage::getClusteringProgSnippet(bool is_frag_shader
) const
2661 using namespace std
;
2663 const UInt32 tile_size
= getTileSize();
2670 << endl
<< "const int tile_size = " << tile_size
<< ";"
2676 << endl
<< "// We write our results into the global light grid with:"
2677 << endl
<< "// imageStore(" << getLightGridVariableName() << ", ivec3(i,j,k), uvec4(o,c,0,0))"
2678 << endl
<< "// and read it back with:"
2679 << endl
<< "// imageLoad(" << getLightGridVariableName() << ", ivec3(i,j,k)).xy"
2680 << endl
<< "// were:"
2681 << endl
<< "// i,j,k define the cluster index"
2682 << endl
<< "// o is the offset from the beginning of the global light index list for the cluster"
2683 << endl
<< "// c is the number if lights that are to be used for the cluster"
2685 << endl
<< "layout(binding = " << getLightGridBindingPnt() << ", rg32ui) uniform uimage2DArray " << getLightGridVariableName() << ";"
2688 << endl
<< "// The data we need to calc the cluster depth values from the cluster key k"
2690 << endl
<< "layout (std140) uniform " << getClusteringDataBlockName()
2692 << endl
<< " float zNear; // positive near plane distance from eye zNear > 0"
2693 << endl
<< " float zFar; // positive far plane distance from eye zFar > zNear > 0"
2694 //<< endl << " float D; // positive near plane offset D >= 0 // for testing"
2695 << endl
<< " float nD; // zNear + D : shader optimization"
2696 << endl
<< " float lg_nD; // log2(nD) : shader optimization"
2697 << endl
<< " float a; // precalculated factor (c-1)/log2(f/(n+D))"
2698 << endl
<< " float b; // precalculated factor log2(f/(n+D))/(c-1)"
2699 //<< endl << " int c; // number of cluster planes // for testing"
2700 << endl
<< " int c_1; // number of cluster planes minus one : shader optimization"
2701 << endl
<< " ivec2 p_v; // viewport corner points"
2702 //<< endl << " ivec3 n_c; // number of clusters // for testing"
2703 << endl
<< " bool enabled;// cluster shadingd enabled"
2704 << endl
<< "} " << getClusteringDataVariableName() << ";"
2707 << endl
<< "// cluster_z calculates the cluster eye space z from the cluster key k value"
2708 << endl
<< "// z_e = cluster_z(k, ...) with z_e in [-n, -f]"
2710 << endl
<< "// Formular:"
2711 << endl
<< "// z_e = -n if k == 0"
2712 << endl
<< "// z_e = -(n+D) * exp2( (k-1)*(log2(f/(n+D))/(c-1)) ) else"
2713 << endl
<< "// z_e = -f if k > c-1"
2716 if (!is_frag_shader
)
2719 //<< endl << "float cluster_z("
2720 //<< endl << " in const uint k, // cluster coordinate, 0 <= k <= c"
2721 //<< endl << " in const float n, // near plane distance from viewer n > 0"
2722 //<< endl << " in const float f, // far plane distance from viewer f > n > 0"
2723 //<< endl << " in const float D, // near plane offset"
2724 //<< endl << " in const int c) // number of cluster planes"
2726 //<< endl << " if (k == 0) return -n;"
2727 //<< endl << " if (k > c-1) return -f;"
2729 //<< endl << " float z_e = -(n+D) * exp2(float(k-1) * (log2(f/(n+D))/float(c-1)));"
2730 //<< endl << " return z_e;"
2733 << endl
<< "float cluster_z("
2734 << endl
<< " in const uint k, // cluster coordinate, 0 <= k <= c"
2735 << endl
<< " in const float n, // near plane distance from viewer n > 0"
2736 << endl
<< " in const float f, // far plane distance from viewer f > n > 0"
2737 << endl
<< " in const float nD, // near plane distance from viewer plus offset, nD = n+D with n > 0, D > 0"
2738 << endl
<< " in const float b, // factor log2(f/(n+D))/(c-1)"
2739 << endl
<< " in const int c_1) // number of cluster planes"
2741 << endl
<< " if (k == 0) return -n;"
2742 << endl
<< " if (k > c_1) return -f;"
2744 << endl
<< " float z_e = -nD * exp2(float(k-1) * b);"
2745 << endl
<< " return z_e;"
2749 << endl
<< "// Helper function that provides the tile frustum index of the current cluster"
2751 << endl
<< "vec2 getClusterDepth()"
2753 << endl
<< " return vec2(cluster_z(gl_WorkGroupID.z, "
2754 << getClusteringDataVariableName() << ".zNear, "
2755 << getClusteringDataVariableName() << ".zFar, "
2756 << getClusteringDataVariableName() << ".nD, "
2757 << getClusteringDataVariableName() << ".b, "
2758 << getClusteringDataVariableName() << ".c_1),"
2759 << endl
<< " cluster_z(gl_WorkGroupID.z+1, "
2760 << getClusteringDataVariableName() << ".zNear, "
2761 << getClusteringDataVariableName() << ".zFar, "
2762 << getClusteringDataVariableName() << ".nD, "
2763 << getClusteringDataVariableName() << ".b, "
2764 << getClusteringDataVariableName() << ".c_1));"
2772 << endl
<< "// cluster_k and cluster_k_verbose calculates the cluster key from the eye space z value"
2773 << endl
<< "// k = cluster_k(z_e, ...) with k in [0, c["
2775 << endl
<< "// Formular:"
2776 << endl
<< "// k = 0 if z_e >= -(n+D)"
2777 << endl
<< "// k = 1 + (c-1) * log2(z_e/-(n+D)) / log2(f/(n+D)) else"
2778 << endl
<< "// k = c-1 if z_e <= -f"
2780 //<< endl << "int cluster_k("
2781 //<< endl << " in const float z_e, // eye space z-position, z_e < 0"
2782 //<< endl << " in const float n, // near plane distance from viewer n > 0"
2783 //<< endl << " in const float f, // far plane distance from viewer f > n > 0"
2784 //<< endl << " in const float D, // near plane offset"
2785 //<< endl << " in const int c) // number of cluster planes"
2787 //<< endl << " if (z_e >= -(n+D)) return 0;"
2788 //<< endl << " if (z_e <= -f) return c-1;"
2790 //<< endl << " float s = 1.0+(float(c-1)/log2(f/(n+D)))*log2(z_e/-(n+D));"
2791 //<< endl << " return clamp(int(s), 0, c-1);"
2794 << endl
<< "int cluster_k("
2795 << endl
<< " in const float z_e, // eye space z-position, z_e < 0"
2796 << endl
<< " in const float nD, // near plane distance plus the offset D from viewer n > 0, D > 0"
2797 << endl
<< " in const float lg_nD,// log2(nD)"
2798 << endl
<< " in const float f, // far plane distance from viewer f > n > 0"
2799 << endl
<< " in const float a, // (c-1)/log2(f/(n+D))"
2800 << endl
<< " in const int c_1) // number of cluster planes minus 1"
2802 << endl
<< " if (z_e >= -nD) return 0;"
2803 << endl
<< " if (z_e <= -f) return c_1;"
2805 << endl
<< " float s = 1.0 + a * (log2(-z_e) - lg_nD);"
2806 << endl
<< " return clamp(int(s), 0, c_1);"
2810 << endl
<< "// OpenGL window space is defined such that pixel centers are on half-integer boundaries."
2811 << endl
<< "// So the center of the lower-left pixel is (0.5,0.5). Using pixel_center_integer? adjust"
2812 << endl
<< "// gl_FragCoord such that whole integer values represent pixel centers."
2813 << endl
<< "// This feature exist to be compatible with D3D's window space. Unless you need your shaders"
2814 << endl
<< "// to have this compatibility, you are advised not to use these features."
2815 << endl
<< "// => We do not use it!"
2817 << endl
<< "// Provide a accessor key to probe the light grid."
2818 << endl
<< "// in p_w : xy-screen position provided by gl_FragCoord.xy: lower-left is (0.5, 0.5)"
2819 << endl
<< "// in z_e : fragment eye space z from vPositionES"
2820 << endl
<< "// out : 3D image coordinate"
2822 << endl
<< "ivec3 gridAccessor("
2823 << endl
<< " in const vec2 p_w,"
2824 << endl
<< " in const float z_e)"
2826 << endl
<< " ivec2 q_w = ivec2(p_w - vec2(0.5, 0.5));"
2827 << endl
<< " int k = cluster_k(z_e, "
2828 << getClusteringDataVariableName() << ".nD, "
2829 << getClusteringDataVariableName() << ".lg_nD, "
2830 << getClusteringDataVariableName() << ".zFar, "
2831 << getClusteringDataVariableName() << ".a, "
2832 << getClusteringDataVariableName() << ".c_1);"
2833 << endl
<< " ivec2 p = (q_w - " << getClusteringDataVariableName() << ".p_v) / tile_size;"
2834 << endl
<< " return ivec3(p.xy, k);"
2838 << endl
<< "// Retrieve the cluster light grid data."
2839 << endl
<< "// in p_w : xy-screen position provided by gl_FragCoord.xy: lower-left is (0.5, 0.5)"
2840 << endl
<< "// in z_e : fragment eye space z from vPositionES"
2841 << endl
<< "// out : (light index list start position, number of lights)"
2843 << endl
<< "uvec2 getGridData("
2844 << endl
<< " in const vec2 p_w,"
2845 << endl
<< " in const float z_e)"
2847 << endl
<< " ivec3 accessor = gridAccessor(p_w, z_e);"
2848 << endl
<< " return imageLoad(" << getLightGridVariableName() << ", accessor).xy;"
2859 std::string
ClusterShadingStage::getFrustumProgSnippet() const
2861 using namespace std
;
2867 << endl
<< "// A simple Plane abstraction"
2869 << endl
<< "struct Plane"
2871 << endl
<< " vec3 N;"
2872 << endl
<< " float d;"
2876 << endl
<< "// The left, right, top and bottom planes of the tile frustum"
2878 << endl
<< "struct Frustum"
2880 << endl
<< " Plane planes[4];"
2883 << endl
<< "layout (std430) buffer Frustums"
2885 << endl
<< " Frustum frustum[];"
2886 << endl
<< "} frustums;"
2893 std::string
ClusterShadingStage::getLightIndexProgSnippet() const
2895 using namespace std
;
2901 << endl
<< "// Index list into the global light list that contain only"
2902 << endl
<< "// those lights that affect shading in the view frustum."
2904 << endl
<< "layout (std430) buffer " << getAffectedLightIndexListBlockName()
2906 << endl
<< " uint idx[];"
2907 << endl
<< "} " << getAffectedLightIndexListVariableName() << ";"
2910 << endl
<< "// Index list into the global light list. All sequences of lights"
2911 << endl
<< "// contributing to shading of the individal clusters are assembled"
2912 << endl
<< "// in this list. The start position and number of lights for each"
2913 << endl
<< "// cluster are provided by the light grid image."
2915 << endl
<< "layout (std430) buffer " << getLightIndexListBlockName()
2917 << endl
<< " uint idx[];"
2918 << endl
<< "} " << getLightIndexListVariableName() << ";"
2925 std::string
ClusterShadingStage::getFragmentProgramSnippet() const
2927 using namespace std
;
2933 << endl
<< "//-- ClusterShadingStage::getFragmentProgramSnippet() --"
2935 << getMultiLightChunk()->getLightProgSnippet()
2937 << getClusteringProgSnippet(true)
2939 << getLightIndexProgSnippet()
2941 << endl
<< "//------------------------------------------------------"