fixed: auto_ptr -> unique_ptr
[opensg.git] / Source / Contrib / Techniques / OSGClusterShadingStage.cpp
blob33dbc75de918d19feb69631bb6812866ea13a261
1 /*---------------------------------------------------------------------------*\
2 * OpenSG *
3 * *
4 * *
5 * Copyright (C) 2000-2013 by the OpenSG Forum *
6 * *
7 * www.opensg.org *
8 * *
9 * contact: dirk@opensg.org, gerrit.voss@vossg.org, carsten_neumann@gmx.net *
10 * *
11 \*---------------------------------------------------------------------------*/
12 /*---------------------------------------------------------------------------*\
13 * License *
14 * *
15 * This library is free software; you can redistribute it and/or modify it *
16 * under the terms of the GNU Library General Public License as published *
17 * by the Free Software Foundation, version 2. *
18 * *
19 * This library is distributed in the hope that it will be useful, but *
20 * WITHOUT ANY WARRANTY; without even the implied warranty of *
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
22 * Library General Public License for more details. *
23 * *
24 * You should have received a copy of the GNU Library General Public *
25 * License along with this library; if not, write to the Free Software *
26 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *
27 * *
28 \*---------------------------------------------------------------------------*/
29 /*---------------------------------------------------------------------------*\
30 * Changes *
31 * *
32 * *
33 * *
34 * *
35 * *
36 * *
37 \*---------------------------------------------------------------------------*/
39 //---------------------------------------------------------------------------
40 // Includes
41 //---------------------------------------------------------------------------
43 #include <cstdlib>
44 #include <cstdio>
46 #include "OSGConfig.h"
47 #include "OSGAction.h"
48 #include "OSGCamera.h"
49 #include "OSGMatrixCamera.h"
50 #include "OSGOrthographicCamera.h"
51 #include "OSGRenderAction.h"
52 #include "OSGTime.h"
54 #include "OSGClusterShadingStage.h"
55 #include "OSGClusterShadingStageData.h"
57 #include "OSGChunkMaterial.h"
58 #include "OSGDrawEnv.h"
59 #include "OSGFrameBufferObject.h"
60 #include "OSGTextureObjChunk.h"
61 #include "OSGStateOverride.h"
63 #include "OSGMatrixUtility.h"
65 #include "OSGImage.h"
66 #include "OSGTextureObjChunk.h"
67 #include "OSGTextureImageChunk.h"
68 #include "OSGMultiLightChunk.h"
69 #include "OSGUniformBufferObjStd140Chunk.h"
70 #include "OSGShaderStorageBufferObjStdLayoutChunk.h"
72 #include "OSGShaderProgram.h"
73 #include "OSGShaderProgramChunk.h"
74 #include "OSGComputeShaderChunk.h"
75 #include "OSGComputeShaderAlgorithm.h"
76 #include "OSGAlgorithmComputeElement.h"
78 OSG_BEGIN_NAMESPACE
80 // Documentation for this class is emitted in the
81 // OSGClusterShadingStageBase.cpp file.
82 // To modify it, please change the .fcd file (OSGClusterShadingStage.fcd) and
83 // regenerate the base file.
85 /***************************************************************************\
86 * Class variables *
87 \***************************************************************************/
89 /***************************************************************************\
90 * Class methods *
91 \***************************************************************************/
93 void ClusterShadingStage::initMethod(InitPhase ePhase)
95 Inherited::initMethod(ePhase);
97 if(ePhase == TypeObject::SystemPost)
99 RenderAction::registerEnterDefault(
100 ClusterShadingStage::getClassType(),
101 reinterpret_cast<Action::Callback>(&ClusterShadingStage::renderEnter));
103 RenderAction::registerLeaveDefault(
104 ClusterShadingStage::getClassType(),
105 reinterpret_cast<Action::Callback>(&ClusterShadingStage::renderLeave));
109 /***************************************************************************\
110 * Instance methods *
111 \***************************************************************************/
113 /*-------------------------------------------------------------------------*\
114 - private -
115 \*-------------------------------------------------------------------------*/
117 /*----------------------- constructors & destructors ----------------------*/
119 ClusterShadingStage::ClusterShadingStage(void)
120 : Inherited()
121 , _bSetupStage(false)
125 ClusterShadingStage::ClusterShadingStage(const ClusterShadingStage &source)
126 : Inherited(source)
127 , _bSetupStage(false)
131 ClusterShadingStage::~ClusterShadingStage(void)
135 /*----------------------------- class specific ----------------------------*/
137 void ClusterShadingStage::changed(ConstFieldMaskArg whichField,
138 UInt32 origin,
139 BitVector details)
142 // The BeaconMatrixFieldMask is deliberately omitted
144 if((whichField & (
145 BlockSizeFieldMask |
146 TileSizeFieldMask |
147 NumClusterZFieldMask |
148 NearPlaneOffsetFieldMask |
150 DispatchDataBindingPntFieldMask |
151 ClusterDataBindingPntFieldMask |
152 LightBindingPntFieldMask |
153 AffectedLightIndexListBindingPntFieldMask |
154 FrustumBindingPntFieldMask |
155 LightIndexListBindingPntFieldMask |
156 LightIndexCounterBindingPntFieldMask |
157 LightGridBindingPntFieldMask |
159 AffectedLightIndexListBlockNameFieldMask |
160 AffectedLightIndexListVariableNameFieldMask |
161 LightIndexListBlockNameFieldMask |
162 LightIndexListVariableNameFieldMask |
163 ClusteringDataBlockNameFieldMask |
164 ClusteringDataVariableNameFieldMask |
165 LightGridVariableNameFieldMask |
167 CalcFrustumsOnCPUFieldMask |
168 CullLighsOnCPUFieldMask |
169 //DisabledFieldMask |
170 MaxLightIndexListSizeFieldMask |
171 MaxClusterLightCountFieldMask
172 //FrustNodeFieldMask |
173 //FrustCompShaderAlgoFieldMask |
174 //CullCompShaderAlgoFieldMask |
175 //MultiLightChunkFieldMask |
176 //ShaderProgChunkFieldMask
177 )) != 0)
179 _bSetupStage = true;
182 Inherited::changed(whichField, origin, details);
185 void ClusterShadingStage::dump( UInt32 ,
186 const BitVector ) const
188 SLOG << "Dump ClusterShadingStage NI" << std::endl;
191 /*------------------------------- Draw ------------------------------------*/
193 Action::ResultE ClusterShadingStage::renderEnter(Action* action)
195 RenderAction* a = dynamic_cast<RenderAction*>(action);
197 if(a != NULL)
199 // ??? this->pushPartition(a, RenderPartition::CopyAll);
201 RenderPartition* pPart = a->getActivePartition();
203 DrawEnv& oDrawEnv = pPart->getDrawEnv();
205 Int32 iVPLeft = oDrawEnv.getPixelLeft();
206 Int32 iVPBottom = oDrawEnv.getPixelBottom();
207 Int32 iVPWidth = oDrawEnv.getPixelWidth();
208 Int32 iVPHeight = oDrawEnv.getPixelHeight();
210 this->updateData(a, iVPLeft, iVPBottom, iVPWidth, iVPHeight);
212 ClusterShadingStageDataUnrecPtr pData = a->getData<ClusterShadingStageData *>(_iDataSlotId);
215 // Perform the computation
217 if (!getDisabled())
219 //std::cout << "compute shader..." << std::endl;
220 this->recurse(action, getFrustNode());
221 a->useNodeList(false);
224 a->pushState();
226 a->addOverride( getMultiLightChunk ()->getClassId() + getLightBindingPnt(), getMultiLightChunk());
227 a->addOverride( getShaderProgChunk ()->getClassId(), getShaderProgChunk());
228 a->addOverride(pData->getLightGridTexImgChunkFS ()->getClassId() + getLightGridBindingPnt(), pData->getLightGridTexImgChunkFS());
229 a->addOverride(pData->getAffectedLightIndexListSSBOChunk()->getClassId() + getAffectedLightIndexListBindingPnt(), pData->getAffectedLightIndexListSSBOChunk());
230 a->addOverride(pData->getLightIndexListSSBOChunk ()->getClassId() + getLightIndexListBindingPnt(), pData->getLightIndexListSSBOChunk());
231 a->addOverride(pData->getCullClusterDatUBOChunk ()->getClassId() + getClusterDataBindingPnt(), pData->getCullClusterDatUBOChunk());
233 this->recurseFromThis(a);
234 a->useNodeList(false);
236 a->popState();
238 // ??? this->popPartition(a);
241 return Action::Skip;
244 Action::ResultE ClusterShadingStage::renderLeave(Action *action)
246 return Action::Skip;
249 /*------------------------------ Data -------------------------------------*/
251 void ClusterShadingStage::initData(
252 RenderAction* pAction,
253 Int32 iVPLeft,
254 Int32 iVPBottom,
255 Int32 iVPWidth,
256 Int32 iVPHeight)
258 Camera* pCam = pAction->getCamera();
259 if (pCam == NULL)
260 return;
262 OSG_ASSERT(pAction->getData<ClusterShadingStageData*>(_iDataSlotId) == NULL);
264 ClusterShadingStageDataUnrecPtr pData = ClusterShadingStageData::createLocal();
265 this->setData(pData, _iDataSlotId, pAction);
267 setupStageData(pData, pCam, iVPLeft, iVPBottom, iVPWidth, iVPHeight);
270 void ClusterShadingStage::updateData(
271 RenderAction* pAction,
272 Int32 iVPLeft,
273 Int32 iVPBottom,
274 Int32 iVPWidth,
275 Int32 iVPHeight)
277 ClusterShadingStageDataUnrecPtr pData = pAction->getData<ClusterShadingStageData *>(_iDataSlotId);
279 Camera* pCam = pAction->getCamera();
280 if (pCam == NULL)
281 return;
283 if(pData == NULL)
285 initData(pAction, iVPLeft, iVPBottom, iVPWidth, iVPHeight);
287 pData = pAction->getData<ClusterShadingStageData *>(_iDataSlotId);
289 else if (_bSetupStage)
291 setupStageData(pData, pCam, iVPLeft, iVPBottom, iVPWidth, iVPHeight);
294 updateStageData(pData, pCam, iVPLeft, iVPBottom, iVPWidth, iVPHeight);
296 _bSetupStage = false;
299 /*---------------------------- StageData ----------------------------------*/
301 void ClusterShadingStage::setupStageData(
302 ClusterShadingStageData* pData,
303 Camera* pCam,
304 Int32 iPixelLeft,
305 Int32 iPixelBottom,
306 Int32 iPixelWidth,
307 Int32 iPixelHeight)
309 if (pData != NULL)
311 ImageUnrecPtr img_light_grid = create_image(1,1,1, getCullLighsOnCPU());
312 TextureObjChunkUnrecPtr tex_obj_light_grid = create_texture_state(img_light_grid);
313 TextureImageChunkUnrecPtr tex_img_light_grid_cs = create_texture_image_state(tex_obj_light_grid, GL_WRITE_ONLY);
314 TextureImageChunkUnrecPtr tex_img_light_grid_fs = create_texture_image_state(tex_obj_light_grid, GL_READ_ONLY);
316 ShaderStorageBufferObjStdLayoutChunkUnrecPtr ssbo_frustums = create_frustum_state(VecFrustumsT(1));
317 ShaderStorageBufferObjStdLayoutChunkUnrecPtr ssbo_affected_light_index_list = create_index_state(1);
318 ShaderStorageBufferObjStdLayoutChunkUnrecPtr ssbo_light_index_list = create_index_state(getMaxLightIndexListSize());
319 ShaderStorageBufferObjStdLayoutChunkUnrecPtr ssbo_light_index_counter = create_light_index_counter_state();
321 UniformBufferObjStd140ChunkUnrecPtr ubo_frustum_dispatch_data = create_dispatch_data(DispatchData());
322 UniformBufferObjStd140ChunkUnrecPtr ubo_light_culling_dispatch_data = create_dispatch_data(DispatchData());
323 UniformBufferObjStd140ChunkUnrecPtr ubo_clustering_data = create_clustering_data(ClusteringData());
325 ChunkMaterialUnrecPtr frust_chunk_material = ChunkMaterial::createLocal();
326 ChunkMaterialUnrecPtr cull_chunk_material = ChunkMaterial::createLocal();
328 frust_chunk_material->addChunk(ubo_frustum_dispatch_data, getDispatchDataBindingPnt());
329 frust_chunk_material->addChunk(ssbo_frustums, getFrustumBindingPnt());
331 cull_chunk_material->addChunk(tex_img_light_grid_cs, getLightGridBindingPnt());
332 cull_chunk_material->addChunk(getMultiLightChunk(), getLightBindingPnt());
333 cull_chunk_material->addChunk(ubo_light_culling_dispatch_data, getDispatchDataBindingPnt());
334 cull_chunk_material->addChunk(ubo_clustering_data, getClusterDataBindingPnt());
335 cull_chunk_material->addChunk(ssbo_frustums, getFrustumBindingPnt());
336 cull_chunk_material->addChunk(ssbo_affected_light_index_list, getAffectedLightIndexListBindingPnt());
337 cull_chunk_material->addChunk(ssbo_light_index_list, getLightIndexListBindingPnt());
338 cull_chunk_material->addChunk(ssbo_light_index_counter, getLightIndexCounterBindingPnt());
341 // force resize in first update
343 pData->setLeft (0);
344 pData->setBottom(0);
345 pData->setWidth (0);
346 pData->setHeight(0);
348 pData->setLightGridImage (img_light_grid);
349 pData->setLightGridTexObjChunk (tex_obj_light_grid);
350 pData->setLightGridTexImgChunkCS (tex_img_light_grid_cs);
351 pData->setLightGridTexImgChunkFS (tex_img_light_grid_fs);
352 pData->setFrustumsSSBOChunk (ssbo_frustums);
353 pData->setAffectedLightIndexListSSBOChunk(ssbo_affected_light_index_list);
354 pData->setLightIndexListSSBOChunk (ssbo_light_index_list);
355 pData->setCullLightIndexCounterSSBOChunk (ssbo_light_index_counter);
356 pData->setFrustDispDatUBOChunk (ubo_frustum_dispatch_data);
357 pData->setCullDispDatUBOChunk (ubo_light_culling_dispatch_data);
358 pData->setCullClusterDatUBOChunk (ubo_clustering_data);
359 pData->setFrustChunkMat (frust_chunk_material);
360 pData->setCullChunkMat (cull_chunk_material);
362 bool isOrtho = isOrthographicCamera(pCam, iPixelWidth, iPixelHeight);
363 pData->setIsOrthographicCamera(isOrtho);
365 create_computation (pData);
366 create_fragment_shader(pData);
368 Thread::setCurrentLocalFlags();
372 void ClusterShadingStage::updateStageData(
373 ClusterShadingStageData* pData,
374 Camera* pCam,
375 Int32 iPixelLeft,
376 Int32 iPixelBottom,
377 Int32 iPixelWidth,
378 Int32 iPixelHeight)
380 if (pData == NULL || pCam == NULL)
381 return;
383 update_computation(pData);
385 bool resize = false;
387 if ( iPixelLeft != pData->getLeft()
388 || iPixelBottom != pData->getBottom()
389 || iPixelWidth != pData->getWidth()
390 || iPixelHeight != pData->getHeight())
391 resize = true;
393 pData->setLeft (iPixelLeft );
394 pData->setBottom(iPixelBottom);
395 pData->setWidth (iPixelWidth );
396 pData->setHeight(iPixelHeight);
398 Vec4u viewport(iPixelLeft, iPixelBottom, iPixelWidth, iPixelHeight);
400 if (!check_frustum_state(pData, viewport))
401 resize = true;
403 if (resize)
405 //std::cout << "resize...";
407 Matrix matProjection, matInverseProjection, matProjectionTranslation;
409 pCam->getProjection (matProjection, iPixelWidth, iPixelHeight);
410 pCam->getProjectionTranslation(matProjectionTranslation, iPixelWidth, iPixelHeight);
412 matProjection.mult(matProjectionTranslation);
413 matProjection.inverse(matInverseProjection);
416 // The light grid image size must be adapted.
417 // CPU memory is needed only if culling is performed on the CPU.
419 update_light_grid_image(viewport, pData->getLightGridImage(), getCullLighsOnCPU());
422 // Light culling on CPU forces frustum calculation on CPU
424 if (getCalcFrustumsOnCPU() == true || getCullLighsOnCPU() == true)
426 //std::cout << "...on CPU" << std::endl;
428 // We caclulate the tile frustum planes on the CPU, store them in the following
429 // vector for further evaluation.
431 VecFrustumsT frustums;
432 if (isOrthographicCamera(pCam, iPixelWidth, iPixelHeight))
433 calc_ortho_frustums_cpu(viewport, matInverseProjection, frustums);
434 else
435 calc_persp_frustums_cpu(viewport, matInverseProjection, frustums);
438 if (getCullLighsOnCPU())
441 // The just CPU calculated frustum must be stored in the stage
442 // data object for CPU light culling.
444 update_frustum_state(pData, frustums);
446 else
449 // The just CPU calculated frustum must be uploaded to the SSBO
451 update_frustum_state(pData->getFrustumsSSBOChunk(), frustums);
454 else
456 //std::cout << "...on GPU" << std::endl;
458 update_dispatch_config_frustums(pData, viewport, matInverseProjection);
462 UInt32 tile_size = getTileSize();
464 Real32 zNear = pCam->getNear();
465 Real32 zFar = pCam->getFar ();
467 UInt32 num_cluster_x = UInt32(osgCeil(Real32(iPixelWidth) / tile_size));
468 UInt32 num_cluster_y = UInt32(osgCeil(Real32(iPixelHeight) / tile_size));
469 UInt32 num_cluster_z = getNumClusterZ();
471 FrustumVolume viewVolume;
472 pCam->getFrustum(viewVolume, iPixelWidth, iPixelHeight);
474 Matrix matEyeFromWorld;
475 pCam->getViewing(matEyeFromWorld, iPixelWidth, iPixelHeight);
478 // We have to update the view matrix for the light culling compute shader
480 if (getCullLighsOnCPU() == false)
482 update_dispatch_config_cull_lights(pData, viewport, matEyeFromWorld);
485 VecLightIndexT vecAffectedLights; // the lights that contribute to the view frustum illumination
486 VecLightEyeSpaceDataT vecEyeSpaceData; // the eye space position and direction of the lights
489 // We determine an index list of all lights that are actually contributing to the
490 // shading of the complete visible view frustum...
492 calc_affected_lights(
493 matEyeFromWorld, // in transform from world to eye space
494 viewVolume, // in the view frustum volume in world space
495 vecAffectedLights, // out the index list
496 vecEyeSpaceData); // optimization: eye space light positions and directions
499 // ...and update the corresponding SSBO accordingly
501 update_index_state(pData->getAffectedLightIndexListSSBOChunk(), vecAffectedLights);
503 if (!getDisabled())
505 //std::cout << "not disabled" << std::endl;
507 // Clearing of the global light index counter is absolutly necessary!
509 if (getCullLighsOnCPU() == false)
511 clear_light_index_counter_state(pData->getCullLightIndexCounterSSBOChunk());
514 if (getCullLighsOnCPU() == true)
516 //std::cout << "cull on CPU" << std::endl;
518 // These are the results of the light culling procedure below. We will fill these
519 // raw data into appropriate SSBO so that they are available in the fragment shader
520 // for evaluation.
522 VecImageDataT gridData; // tuples of offset and cound integers: { o1,c1, o2,c2, o3,c3,...}
523 VecLightIndexT lightIndexList; // pointers into the lights array.
525 Vec3u dimensions(num_cluster_x, num_cluster_y, num_cluster_z);
527 cull_lights(
528 pData, // in contains the CPU calculated tile frustum planes
529 vecAffectedLights, // in the lights that are actually contributing to the view frustum shading
530 vecEyeSpaceData, // in the precalculated light eye space positions and directions
531 dimensions, // in cluster dimensions
532 matEyeFromWorld, // in transform from world to eye space
533 zNear, // in distance of near plane from eye point
534 zFar, // in distance of far plane from eye point
535 getNearPlaneOffset(), // in distance offset for near plane
536 gridData, // out the raw grid data
537 lightIndexList); // out the index list
540 // Fill gridData to image and inform texture object about change
542 update_image_data (pData->getLightGridImage(), gridData);
543 update_texture_state(pData->getLightGridTexObjChunk());
546 // Fill lightIndexList to SSBO:
548 update_index_state(pData->getLightIndexListSSBOChunk(), lightIndexList);
553 // Update the clustering data used in the light cull and fragment shader
555 ClusteringData clusteringData;
556 clusteringData.zNear = zNear; // positive near plane distance from eye zNear > 0
557 clusteringData.zFar = zFar; // positive far plane distance from eye zFar > zNear > 0
558 //clusteringData.D = getNearPlaneOffset(); // positive near plane offset D >= 0
559 clusteringData.nD = zNear + getNearPlaneOffset(); // zNear + D : shader optimization
560 clusteringData.lg_nD = log2(clusteringData.nD); // log2(nD) : shader optimization
561 clusteringData.a = (num_cluster_z-1)/log2(zFar/clusteringData.nD); // precalculated factor (c-1)/log2(f/(n+D))
562 clusteringData.b = 1.f/clusteringData.a; // precalculated factor log2(f/(n+D))/(c-1)
563 //clusteringData.c = num_cluster_z; // number of cluster planes
564 clusteringData.c_1 = num_cluster_z - 1; // number of cluster planes minus one : shader optimization
565 clusteringData.p_v = Vec2i(iPixelLeft, iPixelBottom); // viewport corner points
566 //clusteringData.n_c = Vec3i(num_cluster_x, num_cluster_y, num_cluster_z); // number of clusters
567 clusteringData.enabled = !getDisabled();
569 update_clustering_data(pData->getCullClusterDatUBOChunk(), clusteringData);
571 commitChanges();
574 /*-------------------------- Frustum Details ------------------------------*/
576 inline
577 void ClusterShadingStage::resize_frustums(
578 ClusterShadingStageData* pData,
579 std::size_t size)
581 OSG_ASSERT(pData != NULL);
583 MFMatrix* normals = pData->editMFFrustumPlanesNormals();
584 MFVec4f* distances = pData->editMFFrustumPlanesDistances();
586 normals ->resize(size);
587 distances->resize(size);
590 inline
591 ClusterShadingStage::Frustum
592 ClusterShadingStage::getFrustum(
593 ClusterShadingStageData* pData,
594 const UInt32 idx) const
596 OSG_ASSERT(pData != NULL);
598 Frustum frustum;
600 const Matrix& normals = pData->getFrustumPlanesNormals (idx);
601 const Vec4f& distances = pData->getFrustumPlanesDistances(idx);
603 frustum.planes[0] = Plane(Vec3f(normals[0]), distances[0]);
604 frustum.planes[1] = Plane(Vec3f(normals[1]), distances[1]);
605 frustum.planes[2] = Plane(Vec3f(normals[2]), distances[2]);
606 frustum.planes[3] = Plane(Vec3f(normals[3]), distances[3]);
608 return frustum;
611 inline
612 void ClusterShadingStage::setFrustum(
613 ClusterShadingStageData* pData,
614 const UInt32 idx,
615 const Frustum& frustum)
617 OSG_ASSERT(pData != NULL);
618 OSG_ASSERT(pData->getMFFrustumPlanesNormals ()->size() > idx);
619 OSG_ASSERT(pData->getMFFrustumPlanesDistances()->size() > idx);
621 MFMatrix::reference rMat = pData->editFrustumPlanesNormals (idx);
622 MFVec4f ::reference rVec = pData->editFrustumPlanesDistances(idx);
624 rMat.setValue(frustum.planes[0].getNormal(),
625 frustum.planes[1].getNormal(),
626 frustum.planes[2].getNormal(),
627 frustum.planes[3].getNormal());
629 rVec.setValue(
630 Vec4f(frustum.planes[0].getDistanceFromOrigin(),
631 frustum.planes[1].getDistanceFromOrigin(),
632 frustum.planes[2].getDistanceFromOrigin(),
633 frustum.planes[3].getDistanceFromOrigin())
637 std::size_t ClusterShadingStage::calc_frustum_buffer_size(
638 const VecFrustumsT& vFrustums)
640 std::size_t ao = 0; // aligned offset
641 std::size_t bo = 0; // base offset
643 for (std::size_t j = 0; j < 4; ++j)
645 ao = alignOffset(16, bo); bo = ao + sizeof(Vec3f);
646 ao = alignOffset( 4, bo); bo = ao + sizeof(Real32);
647 ao = alignOffset(16, bo); bo = ao;
650 ao *= vFrustums.size(); bo = ao; // array
651 ao = alignOffset( 16, bo); bo = ao; // padding
653 return ao;
656 std::vector<UInt8> ClusterShadingStage::create_frustum_buffer(
657 const VecFrustumsT& vFrustums)
659 std::size_t size = calc_frustum_buffer_size(vFrustums);
661 std::vector<UInt8> buffer(size);
663 std::size_t ao = 0; // aligned offset
664 std::size_t bo = 0; // base offset
666 for (std::size_t i = 0; i < vFrustums.size(); ++i)
668 for (std::size_t j = 0; j < 4; ++j)
670 ao = alignOffset(16, bo);
671 memcpy(&buffer[0] + ao, &vFrustums[i].planes[j].getNormal(), sizeof(Vec3f));
672 bo = ao + sizeof(Vec3f);
674 ao = alignOffset( 4, bo);
675 *(reinterpret_cast<Real32*>(&buffer[0] + ao)) = vFrustums[i].planes[j].getDistanceFromOrigin();
676 bo = ao + sizeof(Real32);
678 ao = alignOffset(16, bo); bo = ao;
681 ao = alignOffset( 16, bo); bo = ao; // padding
684 return buffer;
687 ShaderStorageBufferObjStdLayoutChunkTransitPtr
688 ClusterShadingStage::create_frustum_state(
689 const VecFrustumsT& vFrustums)
691 ShaderStorageBufferObjStdLayoutChunkTransitPtr ssbo = ShaderStorageBufferObjStdLayoutChunk::createLocal();
693 std::vector<UInt8> buffer = create_frustum_buffer(vFrustums);
695 ssbo->editMFBuffer()->setValues(buffer);
696 ssbo->setUsage(GL_DYNAMIC_DRAW);
698 return ssbo;
701 void ClusterShadingStage::update_frustum_state(
702 ShaderStorageBufferObjStdLayoutChunk* ssbo,
703 const VecFrustumsT& vFrustums)
705 if (ssbo) {
706 std::vector<UInt8> buffer = create_frustum_buffer(vFrustums);
707 ssbo->editMFBuffer()->setValues(buffer);
711 std::size_t ClusterShadingStage::calc_frustum_buffer_size(
712 ClusterShadingStageData* pData)
714 std::size_t ao = 0; // aligned offset
715 std::size_t bo = 0; // base offset
717 for (std::size_t j = 0; j < 4; ++j)
719 ao = alignOffset(16, bo); bo = ao + sizeof(Vec3f);
720 ao = alignOffset( 4, bo); bo = ao + sizeof(Real32);
721 ao = alignOffset(16, bo); bo = ao;
724 OSG_ASSERT(pData != NULL);
725 OSG_ASSERT(pData->editMFFrustumPlanesNormals()->size() == pData->editMFFrustumPlanesDistances()->size());
727 ao *= pData->editMFFrustumPlanesNormals()->size(); bo = ao; // array
728 ao = alignOffset( 16, bo); bo = ao; // padding
730 return ao;
733 std::vector<UInt8> ClusterShadingStage::create_frustum_buffer(
734 ClusterShadingStageData* pData)
736 std::size_t size = calc_frustum_buffer_size(pData);
738 std::vector<UInt8> buffer(size);
740 std::size_t ao = 0; // aligned offset
741 std::size_t bo = 0; // base offset
743 std::size_t num_frustums = pData->editMFFrustumPlanesNormals()->size();
745 for (std::size_t i = 0; i < num_frustums; ++i)
747 Frustum frustum = getFrustum(pData, static_cast<UInt32>(i));
749 for (std::size_t j = 0; j < 4; ++j)
751 ao = alignOffset(16, bo);
752 memcpy(&buffer[0] + ao, &frustum.planes[j].getNormal(), sizeof(Vec3f));
753 bo = ao + sizeof(Vec3f);
755 ao = alignOffset( 4, bo);
756 *(reinterpret_cast<Real32*>(&buffer[0] + ao)) = frustum.planes[j].getDistanceFromOrigin();
757 bo = ao + sizeof(Real32);
759 ao = alignOffset(16, bo); bo = ao;
762 ao = alignOffset( 16, bo); bo = ao; // padding
765 return buffer;
768 ShaderStorageBufferObjStdLayoutChunkTransitPtr
769 ClusterShadingStage::create_frustum_state(
770 ClusterShadingStageData* pData)
772 ShaderStorageBufferObjStdLayoutChunkTransitPtr ssbo = ShaderStorageBufferObjStdLayoutChunk::createLocal();
774 std::vector<UInt8> buffer = create_frustum_buffer(pData);
776 ssbo->editMFBuffer()->setValues(buffer);
777 ssbo->setUsage(GL_DYNAMIC_DRAW);
779 return ssbo;
782 void ClusterShadingStage::update_frustum_state(
783 ShaderStorageBufferObjStdLayoutChunk* ssbo,
784 ClusterShadingStageData* pData)
786 if (ssbo) {
787 std::vector<UInt8> buffer = create_frustum_buffer(pData);
788 ssbo->editMFBuffer()->setValues(buffer);
792 void ClusterShadingStage::update_frustum_state(
793 ClusterShadingStageData* pData,
794 const VecFrustumsT& vFrustums)
796 resize_frustums(pData, vFrustums.size());
798 std::size_t sz = vFrustums.size();
800 for (std::size_t i = 0; i < sz; ++i)
802 setFrustum(pData, static_cast<UInt32>(i), vFrustums[i]);
806 bool ClusterShadingStage::check_frustum_state(
807 ClusterShadingStageData* pData,
808 const Vec4u& viewport)
810 if (getCalcFrustumsOnCPU() == true || getCullLighsOnCPU() == true)
812 // Real32 x_v = static_cast<Real32>(viewport[0]);
813 // Real32 y_v = static_cast<Real32>(viewport[1]);
814 Real32 w = static_cast<Real32>(viewport[2]);
815 Real32 h = static_cast<Real32>(viewport[3]);
817 UInt32 tile_size = getTileSize();
819 UInt32 numHorizontalTiles = UInt32(osgCeil(w / tile_size));
820 UInt32 numVerticalTiles = UInt32(osgCeil(h / tile_size));
822 std::size_t sz = numHorizontalTiles * numVerticalTiles;
824 MFMatrix* normals = pData->editMFFrustumPlanesNormals();
825 MFVec4f* distances = pData->editMFFrustumPlanesDistances();
827 if (normals->size() != sz || distances->size() != sz)
828 return false;
830 return true;
833 Pnt4f ClusterShadingStage::NdcFromScreen(
834 const Pnt3f& p_w,
835 const Vec4u& viewport,
836 Real32 zNear,
837 Real32 zFar)
839 Real32 x_v = static_cast<Real32>(viewport[0]);
840 Real32 y_v = static_cast<Real32>(viewport[1]);
841 Real32 w = static_cast<Real32>(viewport[2]);
842 Real32 h = static_cast<Real32>(viewport[3]);
844 Pnt4f p_n(
845 2.f * (p_w.x() - x_v) / w - 1.f,
846 2.f * (p_w.y() - y_v) / h - 1.f,
847 2.f * (p_w.z() - zFar - zNear) / (zFar - zNear),
850 return p_n;
853 Pnt4f ClusterShadingStage::NdcFromScreen(
854 const Pnt3f& p_w, // with z-coord already given in ndc coords!
855 const Vec4u& viewport)
857 Real32 x_v = static_cast<Real32>(viewport[0]);
858 Real32 y_v = static_cast<Real32>(viewport[1]);
859 Real32 w = static_cast<Real32>(viewport[2]);
860 Real32 h = static_cast<Real32>(viewport[3]);
862 Pnt4f p_n(
863 2.f * (p_w.x() - x_v) / w - 1.f,
864 2.f * (p_w.y() - y_v) / h - 1.f,
865 p_w.z(),
868 return p_n;
871 Pnt3f ClusterShadingStage::EyeFromNdc(
872 const Pnt4f& p_n,
873 const Matrix& matInvProjection)
875 Pnt4f p_e;
876 matInvProjection.mult(p_n, p_e);
877 p_e /= p_e.w();
878 return Pnt3f(p_e.x(), p_e.y(), p_e.z());
881 bool ClusterShadingStage::isOrthographicCamera(
882 Camera* cam,
883 UInt32 width,
884 UInt32 height)
886 OrthographicCamera* orthoCam = dynamic_cast<OrthographicCamera*>(cam);
887 if (orthoCam)
888 return true;
890 MatrixCamera* matrixCam = dynamic_cast<MatrixCamera*>(cam);
891 if (matrixCam)
893 Matrix matProjection;
894 matrixCam->getProjection(matProjection, width, height);
896 // float m32 = matProjection[2][3];
897 // float m33 = matProjection[3][3];
899 if (matProjection[2][3] == 0.f && matProjection[3][3] == 1.f)
900 return true;
903 return false;
906 void ClusterShadingStage::calc_ortho_frustums_cpu(
907 const Vec4u& viewport,
908 const Matrix& matInvProjection,
909 VecFrustumsT& frustums)
911 Real32 x_v = static_cast<Real32>(viewport[0]);
912 Real32 y_v = static_cast<Real32>(viewport[1]);
913 Real32 w = static_cast<Real32>(viewport[2]);
914 Real32 h = static_cast<Real32>(viewport[3]);
916 UInt32 tile_size = getTileSize();
918 UInt32 numHorizontalTiles = UInt32(osgCeil(w / tile_size));
919 UInt32 numVerticalTiles = UInt32(osgCeil(h / tile_size));
921 frustums.resize(numHorizontalTiles * numVerticalTiles);
923 if (getDisabled())
924 return;
926 Pnt3f pnts_w[8];
927 Pnt4f pnts_n[8];
928 Pnt3f pnts_e[8];
930 for (UInt32 j = 0; j < numVerticalTiles; ++j)
932 Real32 y0 = y_v + j * tile_size;
933 Real32 y1 = osgMin(y_v + (j+1) * tile_size, y_v + h);
935 for (UInt32 i = 0; i < numHorizontalTiles; ++i)
937 Real32 x0 = x_v + i * tile_size;
938 Real32 x1 = osgMin(x_v + (i+1) * tile_size, x_v + w);
940 pnts_w[0] = Pnt3f(x0, y0, 1.f);
941 pnts_w[1] = Pnt3f(x1, y0, 1.f);
942 pnts_w[2] = Pnt3f(x0, y1, 1.f);
943 pnts_w[3] = Pnt3f(x1, y1, 1.f);
945 pnts_w[4] = Pnt3f(x0, y0, -1.f);
946 pnts_w[5] = Pnt3f(x1, y0, -1.f);
947 pnts_w[6] = Pnt3f(x0, y1, -1.f);
948 pnts_w[7] = Pnt3f(x1, y1, -1.f);
950 for (UInt32 k = 0; k < 8; ++k)
952 pnts_n[k] = NdcFromScreen(pnts_w[k], viewport);
953 pnts_e[k] = EyeFromNdc (pnts_n[k], matInvProjection);
956 UInt32 idx = j * numHorizontalTiles + i;
958 frustums[idx].planes[0] = Plane(pnts_e[6], pnts_e[0], pnts_e[2]); // left plane
959 frustums[idx].planes[1] = Plane(pnts_e[7], pnts_e[3], pnts_e[1]); // right plane
960 frustums[idx].planes[2] = Plane(pnts_e[6], pnts_e[2], pnts_e[3]); // top plane
961 frustums[idx].planes[3] = Plane(pnts_e[4], pnts_e[1], pnts_e[0]); // bottom plane
966 void ClusterShadingStage::calc_persp_frustums_cpu(
967 const Vec4u& viewport,
968 const Matrix& matInvProjection,
969 VecFrustumsT& frustums)
971 Real32 x_v = static_cast<Real32>(viewport[0]);
972 Real32 y_v = static_cast<Real32>(viewport[1]);
973 Real32 w = static_cast<Real32>(viewport[2]);
974 Real32 h = static_cast<Real32>(viewport[3]);
976 UInt32 tile_size = getTileSize();
978 UInt32 numHorizontalTiles = UInt32(osgCeil(w / tile_size));
979 UInt32 numVerticalTiles = UInt32(osgCeil(h / tile_size));
981 frustums.resize(numHorizontalTiles * numVerticalTiles);
983 if (getDisabled())
984 return;
986 Pnt3f pEye = Pnt3f(0.f, 0.f, 0.f); // eye position in view space
988 Pnt3f pnts_w[4];
989 Pnt4f pnts_n[4];
990 Pnt3f pnts_e[4];
992 for (UInt32 j = 0; j < numVerticalTiles; ++j)
994 Real32 y0 = y_v + j * tile_size;
995 Real32 y1 = osgMin(y_v + (j+1) * tile_size, y_v + h);
997 for (UInt32 i = 0; i < numHorizontalTiles; ++i)
999 Real32 x0 = x_v + i * tile_size;
1000 Real32 x1 = osgMin(x_v + (i+1) * tile_size, x_v + w);
1002 pnts_w[0] = Pnt3f(x0, y0, -1.f);
1003 pnts_w[1] = Pnt3f(x1, y0, -1.f);
1004 pnts_w[2] = Pnt3f(x0, y1, -1.f);
1005 pnts_w[3] = Pnt3f(x1, y1, -1.f);
1007 for (UInt32 k = 0; k < 4; ++k)
1009 pnts_n[k] = NdcFromScreen(pnts_w[k], viewport);
1010 pnts_e[k] = EyeFromNdc (pnts_n[k], matInvProjection);
1013 UInt32 idx = j * numHorizontalTiles + i;
1015 frustums[idx].planes[0] = Plane(pEye, pnts_e[0], pnts_e[2]); // left plane
1016 frustums[idx].planes[1] = Plane(pEye, pnts_e[3], pnts_e[1]); // right plane
1017 frustums[idx].planes[2] = Plane(pEye, pnts_e[2], pnts_e[3]); // top plane
1018 frustums[idx].planes[3] = Plane(pEye, pnts_e[1], pnts_e[0]); // bottom plane
1023 /*-------------------------- Image Details -------------------------------*/
1025 ImageTransitPtr ClusterShadingStage::create_image(
1026 UInt32 width,
1027 UInt32 height,
1028 UInt32 depth,
1029 bool allocate_memory)
1031 ImageTransitPtr image = Image::createLocal();
1032 image->set(
1033 GL_RG, // pixel format
1034 width, // width in pixel
1035 height, // height in pixel
1036 depth, // depth in pixel
1037 1, // mipmap count
1038 1, // frame count
1039 0.0, // frame delay
1040 NULL, // data
1041 Image::OSG_UINT32_IMAGEDATA, // type
1042 allocate_memory, // allocate memory
1043 1 // side count
1045 return image;
1048 void ClusterShadingStage::update_image(
1049 Image* image,
1050 UInt32 width,
1051 UInt32 height,
1052 UInt32 depth,
1053 bool allocate_memory)
1055 image->set(
1056 GL_RG, // pixel format
1057 width, // width in pixel
1058 height, // height in pixel
1059 depth, // depth in pixel
1060 1, // mipmap count
1061 1, // frame count
1062 0.0, // frame delay
1063 NULL, // data
1064 Image::OSG_UINT32_IMAGEDATA, // type
1065 allocate_memory, // allocate memory
1066 1 // side count
1070 void ClusterShadingStage::update_image_data(
1071 Image* image,
1072 const VecImageDataT& imageData)
1074 UInt8* data = image->editData();
1076 std::size_t ao = 0; // aligned offset
1077 std::size_t bo = 0; // base offset
1079 for (std::size_t i = 0; i < imageData.size(); ++i)
1081 UInt32 offset = imageData[i].first;
1082 UInt32 index = imageData[i].second;
1084 ao = alignOffset(4, bo);
1085 *(reinterpret_cast<UInt32*>(data + ao)) = offset;
1086 bo = ao + sizeof(UInt32);
1088 ao = alignOffset(4, bo);
1089 *(reinterpret_cast<UInt32*>(data + ao)) = index;
1090 bo = ao + sizeof(UInt32);
1094 void ClusterShadingStage::write_image_data(
1095 UInt32 i,
1096 UInt32 j,
1097 UInt32 k,
1098 const Vec3u& dimensions,
1099 const ImageDataT& data,
1100 VecImageDataT& imageData)
1102 OSG_ASSERT(i < dimensions.x());
1103 OSG_ASSERT(j < dimensions.y());
1104 OSG_ASSERT(k < dimensions.z());
1106 std::size_t idx = k * dimensions.x() * dimensions.y() + j * dimensions.x() + i;
1108 imageData[idx] = data;
1111 TextureObjChunkTransitPtr
1112 ClusterShadingStage::create_texture_state(Image* image)
1114 TextureObjChunkTransitPtr texObjChunk = TextureObjChunk::createLocal();
1116 texObjChunk->setTarget(GL_TEXTURE_2D_ARRAY);
1117 texObjChunk->setScale(false);
1118 texObjChunk->setInternalFormat(GL_RG32UI);
1119 texObjChunk->setExternalFormat(GL_RG_INTEGER);
1120 texObjChunk->setMinFilter(GL_NEAREST);
1121 texObjChunk->setMagFilter(GL_NEAREST);
1122 texObjChunk->setImage(image);
1124 return texObjChunk;
1127 void ClusterShadingStage::update_texture_state(TextureObjChunk* texObjChunk)
1129 texObjChunk->imageContentChanged();
1132 TextureImageChunkTransitPtr
1133 ClusterShadingStage::create_texture_image_state(
1134 TextureObjChunk* texObjChunk,
1135 const GLenum access)
1137 TextureImageChunkTransitPtr texImageChunk = TextureImageChunk::createLocal();
1138 texImageChunk->setTexture(texObjChunk);
1139 texImageChunk->setAccess(access);
1140 texImageChunk->setFormat(GL_RG32UI);
1141 texImageChunk->setLayer(-1);
1143 return texImageChunk;
1146 void ClusterShadingStage::update_light_grid_image(
1147 const Vec4u& viewport,
1148 Image* image,
1149 bool allocate_memory)
1151 Real32 w = static_cast<Real32>(viewport[2]);
1152 Real32 h = static_cast<Real32>(viewport[3]);
1154 UInt32 numHorizontalTiles = UInt32(osgCeil(w / getTileSize()));
1155 UInt32 numVerticalTiles = UInt32(osgCeil(h / getTileSize()));
1157 update_image(image , numHorizontalTiles, numVerticalTiles, getNumClusterZ(), allocate_memory);
1160 /*----------------------- Light Index Details ----------------------------*/
1162 std::size_t
1163 ClusterShadingStage::calc_light_index_buffer_size(
1164 const VecLightIndexT& vIndexList)
1166 return sizeof(UInt32) * vIndexList.size();
1169 std::vector<UInt8>
1170 ClusterShadingStage::create_light_index_buffer(
1171 const VecLightIndexT& vIndexList)
1173 std::size_t size = calc_light_index_buffer_size(vIndexList);
1175 std::vector<UInt8> buffer(size);
1177 std::size_t ao = 0; // aligned offset
1178 std::size_t bo = 0; // base offset
1180 for (std::size_t i = 0; i < vIndexList.size(); ++i)
1182 ao = alignOffset(4, bo);
1183 *(reinterpret_cast<UInt32*>(&buffer[0] + ao)) = vIndexList[i];
1184 bo = ao + sizeof(UInt32);
1187 ao = alignOffset( 4, bo); bo = ao; // padding
1189 return buffer;
1192 ShaderStorageBufferObjStdLayoutChunkTransitPtr
1193 ClusterShadingStage::create_index_state(
1194 const VecLightIndexT& vIndexList)
1196 ShaderStorageBufferObjStdLayoutChunkTransitPtr ssbo = ShaderStorageBufferObjStdLayoutChunk::createLocal();
1198 std::vector<UInt8> buffer = create_light_index_buffer(vIndexList);
1200 ssbo->editMFBuffer()->setValues(buffer);
1201 ssbo->setUsage(GL_DYNAMIC_DRAW);
1203 return ssbo;
1206 ShaderStorageBufferObjStdLayoutChunkTransitPtr
1207 ClusterShadingStage::create_index_state(std::size_t sz)
1209 ShaderStorageBufferObjStdLayoutChunkTransitPtr ssbo = ShaderStorageBufferObjStdLayoutChunk::createLocal();
1211 std::vector<UInt8> buffer(sizeof(UInt32) * sz, 0);
1213 ssbo->editMFBuffer()->setValues(buffer);
1214 ssbo->setUsage(GL_DYNAMIC_DRAW);
1216 return ssbo;
1219 void ClusterShadingStage::update_index_state(
1220 ShaderStorageBufferObjStdLayoutChunk* ssbo,
1221 const VecLightIndexT& vIndexList)
1223 if (ssbo) {
1224 std::vector<UInt8> buffer = create_light_index_buffer(vIndexList);
1225 ssbo->editMFBuffer()->setValues(buffer);
1229 void ClusterShadingStage::clear_index_state(
1230 ShaderStorageBufferObjStdLayoutChunk* ssbo,
1231 std::size_t sz)
1233 if (ssbo) {
1234 std::vector<UInt8> buffer(sizeof(UInt32) * sz, 0);
1235 ssbo->editMFBuffer()->setValues(buffer);
1239 /*------------------- Light Index Counter Details ------------------------*/
1241 ShaderStorageBufferObjStdLayoutChunkTransitPtr
1242 ClusterShadingStage::create_light_index_counter_state()
1244 ShaderStorageBufferObjStdLayoutChunkTransitPtr ssbo = ShaderStorageBufferObjStdLayoutChunk::createLocal();
1247 // The counter compromises exactly one UInt32 element
1249 std::vector<UInt8> buffer(sizeof(UInt32) * 1, 0);
1251 ssbo->editMFBuffer()->setValues(buffer);
1252 ssbo->setUsage(GL_DYNAMIC_DRAW);
1254 return ssbo;
1257 void ClusterShadingStage::clear_light_index_counter_state(
1258 ShaderStorageBufferObjStdLayoutChunk* ssbo)
1260 if (ssbo) {
1262 // The counter compromises exactly one UInt32 element
1264 std::vector<UInt8> buffer(sizeof(UInt32) * 1, 0);
1266 ssbo->editMFBuffer()->setValues(buffer);
1270 /*---------------------- Dispatch Data Details ---------------------------*/
1272 std::size_t ClusterShadingStage::calc_dispatch_data_buffer_size(
1273 const DispatchData& data)
1275 std::size_t ao = 0; // aligned offset
1276 std::size_t bo = 0; // base offset
1278 // Matrix matTransf;
1279 ao = alignOffset(16, bo); bo = ao + sizeof(Vec4f);
1280 ao = alignOffset(16, bo); bo = ao + sizeof(Vec4f);
1281 ao = alignOffset(16, bo); bo = ao + sizeof(Vec4f);
1282 ao = alignOffset(16, bo); bo = ao + sizeof(Vec4f);
1284 // Vec4u viewport;
1285 ao = alignOffset(16, bo); bo = ao + sizeof(Vec4u);
1287 // Vec2i numTiles;
1288 ao = alignOffset( 8, bo); bo = ao + sizeof(Vec2i);
1290 // Padding at end
1291 ao = alignOffset(8, bo); bo = ao;
1293 return ao;
1296 std::vector<UInt8>
1297 ClusterShadingStage::create_dispatch_data_buffer(
1298 const DispatchData& data)
1300 std::size_t size = calc_dispatch_data_buffer_size(data);
1302 std::vector<UInt8> buffer(size);
1304 std::size_t ao = 0; // aligned offset
1305 std::size_t bo = 0; // base offset
1307 // Matrix matTransf;
1308 ao = alignOffset(16, bo);
1309 memcpy(&buffer[0] + ao, &data.matTransf[0][0], sizeof(Vec4f));
1310 bo = ao + sizeof(Vec4f);
1312 ao = alignOffset(16, bo);
1313 memcpy(&buffer[0] + ao, &data.matTransf[1][0], sizeof(Vec4f));
1314 bo = ao + sizeof(Vec4f);
1316 ao = alignOffset(16, bo);
1317 memcpy(&buffer[0] + ao, &data.matTransf[2][0], sizeof(Vec4f));
1318 bo = ao + sizeof(Vec4f);
1320 ao = alignOffset(16, bo);
1321 memcpy(&buffer[0] + ao, &data.matTransf[3][0], sizeof(Vec4f));
1322 bo = ao + sizeof(Vec4f);
1324 // Vec4u viewport;
1325 ao = alignOffset(16, bo);
1326 memcpy(&buffer[0] + ao, &data.viewport[0], sizeof(Vec4u));
1327 bo = ao + sizeof(Vec4u);
1329 // Vec2i numTiles;
1330 ao = alignOffset( 8, bo);
1331 memcpy(&buffer[0] + ao, &data.numTiles[0], sizeof(Vec2i));
1332 bo = ao + sizeof(Vec2i);
1334 // Padding at end
1335 ao = alignOffset(8, bo); bo = ao;
1337 return buffer;
1340 UniformBufferObjStd140ChunkTransitPtr
1341 ClusterShadingStage::create_dispatch_data(const DispatchData& data)
1343 UniformBufferObjStd140ChunkTransitPtr ubo = UniformBufferObjStd140Chunk::createLocal();
1345 std::vector<UInt8> buffer = create_dispatch_data_buffer(data);
1347 ubo->editMFBuffer()->setValues(buffer);
1348 ubo->setUsage(GL_DYNAMIC_DRAW);
1350 return ubo;
1353 void ClusterShadingStage::update_dispatch_data(
1354 UniformBufferObjStd140Chunk* ubo,
1355 const DispatchData& data)
1357 if (ubo) {
1358 std::vector<UInt8> buffer = create_dispatch_data_buffer(data);
1359 ubo->editMFBuffer()->setValues(buffer);
1363 /*-------------------- Clustering Index Details --------------------------*/
1365 std::size_t ClusterShadingStage::calc_clustering_data_buffer_size(
1366 const ClusteringData& data)
1368 std::size_t ao = 0; // aligned offset
1369 std::size_t bo = 0; // base offset
1371 // Real32 zNear; // positive near plane distance from eye zNear > 0
1372 ao = alignOffset( 4, bo); bo = ao + sizeof(Real32);
1374 // Real32 zFar; // positive far plane distance from eye zFar > zNear > 0
1375 ao = alignOffset( 4, bo); bo = ao + sizeof(Real32);
1377 //Real32 D; // positive near plane offset D >= 0 // for testing
1378 //ao = alignOffset( 4, bo); bo = ao + sizeof(Real32);
1380 //Real32 nD; // zNear + D : shader optimization
1381 ao = alignOffset( 4, bo); bo = ao + sizeof(Real32);
1383 //Real32 lg_nD; // log2(nD) : shader optimization
1384 ao = alignOffset( 4, bo); bo = ao + sizeof(Real32);
1386 //Real32 a; // precalculated factor (c-1)/log2(f/(n+D))
1387 ao = alignOffset( 4, bo); bo = ao + sizeof(Real32);
1389 //Real32 b; // precalculated factor log2(f/(n+D))/(c-1)
1390 ao = alignOffset( 4, bo); bo = ao + sizeof(Real32);
1392 //Int32 c; // number of cluster planes // for testing
1393 //ao = alignOffset( 4, bo); bo = ao + sizeof(Int32);
1395 //Int32 c_1; // number of cluster planes minus one : shader optimization
1396 ao = alignOffset( 4, bo); bo = ao + sizeof(Int32);
1398 //Vec2i p_v; // viewport corner points
1399 ao = alignOffset( 8, bo); bo = ao + sizeof(Vec2i);
1401 //Vec3i n_c; // number of clusters // for testing
1402 //ao = alignOffset(16, bo); bo = ao + sizeof(Vec3i);
1403 //ao = alignOffset(16, bo); bo = ao;
1405 //bool enabled;// cluster shading enabled
1406 ao = alignOffset( 4, bo); bo = ao + sizeof(Int32);
1408 // Padding at end
1409 ao = alignOffset(4, bo); bo = ao;
1411 return ao;
1414 std::vector<UInt8>
1415 ClusterShadingStage::create_clustering_data_buffer(
1416 const ClusteringData& data)
1418 std::size_t size = calc_clustering_data_buffer_size(data);
1420 std::vector<UInt8> buffer(size);
1422 std::size_t ao = 0; // aligned offset
1423 std::size_t bo = 0; // base offset
1425 // Real32 zNear; // positive near plane distance from eye zNear > 0
1426 ao = alignOffset( 4, bo); *(reinterpret_cast<Real32*>(&buffer[0] + ao)) = data.zNear; bo = ao + sizeof(Real32);
1428 // Real32 zFar; // positive far plane distance from eye zFar > zNear > 0
1429 ao = alignOffset( 4, bo); *(reinterpret_cast<Real32*>(&buffer[0] + ao)) = data.zFar; bo = ao + sizeof(Real32);
1431 //Real32 D; // positive near plane offset D >= 0 // for testing
1432 //ao = alignOffset( 4, bo); *(reinterpret_cast<Real32*>(&buffer[0] + ao)) = data.D; bo = ao + sizeof(Real32);
1434 //Real32 nD; // zNear + D : shader optimization
1435 ao = alignOffset( 4, bo); *(reinterpret_cast<Real32*>(&buffer[0] + ao)) = data.nD; bo = ao + sizeof(Real32);
1437 //Real32 lg_nD; // log2(nD) : shader optimization
1438 ao = alignOffset( 4, bo); *(reinterpret_cast<Real32*>(&buffer[0] + ao)) = data.lg_nD; bo = ao + sizeof(Real32);
1440 //Real32 a; // precalculated factor (c-1)/log2(f/(n+D))
1441 ao = alignOffset( 4, bo); *(reinterpret_cast<Real32*>(&buffer[0] + ao)) = data.a; bo = ao + sizeof(Real32);
1443 //Real32 b; // precalculated factor log2(f/(n+D))/(c-1)
1444 ao = alignOffset( 4, bo); *(reinterpret_cast<Real32*>(&buffer[0] + ao)) = data.b; bo = ao + sizeof(Real32);
1446 //Int32 c; // number of cluster planes // for testing
1447 //ao = alignOffset( 4, bo); *(reinterpret_cast<Int32*>(&buffer[0] + ao)) = data.c; bo = ao + sizeof(Int32);
1449 //Int32 c_1; // number of cluster planes minus one : shader optimization
1450 ao = alignOffset( 4, bo); *(reinterpret_cast<Int32*>(&buffer[0] + ao)) = data.c_1; bo = ao + sizeof(Int32);
1452 //Vec2i p_v; // viewport corner points
1453 ao = alignOffset( 8, bo); memcpy(&buffer[0] + ao, &data.p_v[0], sizeof(Vec2i)); bo = ao + sizeof(Vec2i);
1455 //Vec3i n_c; // number of clusters // for testing
1456 //ao = alignOffset(16, bo); memcpy(&buffer[0] + ao, &data.n_c[0], sizeof(Vec3i)); bo = ao + sizeof(Vec3i);
1457 //ao = alignOffset(16, bo); bo = ao;
1459 //bool enabled;// cluster shading enabled
1460 ao = alignOffset( 4, bo); *(reinterpret_cast<bool*>(&buffer[0] + ao)) = data.enabled; bo = ao + sizeof(Int32);
1462 // Padding at end
1463 ao = alignOffset(4, bo); bo = ao;
1465 return buffer;
1468 UniformBufferObjStd140ChunkTransitPtr
1469 ClusterShadingStage::create_clustering_data(
1470 const ClusteringData& data)
1472 UniformBufferObjStd140ChunkTransitPtr ubo = UniformBufferObjStd140Chunk::createLocal();
1474 std::vector<UInt8> buffer = create_clustering_data_buffer(data);
1476 ubo->editMFBuffer()->setValues(buffer);
1477 ubo->setUsage(GL_STATIC_DRAW);
1479 return ubo;
1482 void ClusterShadingStage::update_clustering_data(
1483 UniformBufferObjStd140Chunk* ubo,
1484 const ClusteringData& data)
1486 if (ubo) {
1487 std::vector<UInt8> buffer = create_clustering_data_buffer(data);
1488 ubo->editMFBuffer()->setValues(buffer);
1492 /*------------------------- Cluster Access -------------------------------*/
1494 UInt32 ClusterShadingStage::cluster_k(
1495 Real32 z_e, // eye space z-position, z_e < 0
1496 Real32 n, // near plane distance from viewer n > 0
1497 Real32 f, // far plane distance from viewer f > n > 0
1498 Real32 D, // near plane offset
1499 UInt32 c) // number of cluster planes
1501 if (z_e >= -(n+D)) return 0;
1502 if (z_e <= -f) return c-1;
1504 Real32 s = 1 + ((c-1) / log2(f/(n+D))) * (log2(-z_e) - log2(n+D));
1505 UInt32 k = UInt32(osgFloor(s));
1507 return osgClamp(0U, k, c-1);
1510 Real32 ClusterShadingStage::cluster_z(
1511 UInt32 k, // cluster coordinate, 0 <= k <= c
1512 Real32 n, // near plane distance from viewer n > 0
1513 Real32 f, // far plane distance from viewer f > n > 0
1514 Real32 D, // near plane offset
1515 UInt32 c) // number of cluster planes
1517 if (k == 0) return -n;
1518 if (k > c-1) return -f;
1520 Real32 z_e = -(n+D)*exp2(static_cast<Real32>(k-1) * (log2(f/(n+D))/(c-1)));
1521 return z_e;
1524 UInt32 ClusterShadingStage::cluster_k(
1525 Real32 z_e, // eye space z-position, z_e < 0
1526 Real32 nD, // near plane distance from viewer plus offset, nD = n+D with n > 0, D > 0
1527 Real32 lg_nD, // log2(nD)
1528 Real32 f, // far plane distance from viewer f > n > 0
1529 Real32 a, // factor (c-1)/log2(f/nD)
1530 UInt32 c_1) // number of cluster planes
1532 if (z_e >= -nD) return 0;
1533 if (z_e <= -f) return c_1;
1535 Real32 s = 1 + a * (log2(-z_e) - lg_nD);
1536 UInt32 k = UInt32(s);
1537 return osgClamp(0U, k, c_1);
1540 Real32 ClusterShadingStage::cluster_z(
1541 UInt32 k, // cluster coordinate, 0 <= k <= c
1542 Real32 n, // near plane distance from viewer n > 0
1543 Real32 f, // far plane distance from viewer f > n > 0
1544 Real32 nD, // near plane distance from viewer plus offset, nD = n+D with n > 0, D > 0
1545 Real32 b, // factor log2(f/(n+D))/(c-1)
1546 UInt32 c_1) // number of cluster planes
1548 if (k == 0) return -n;
1549 if (k > c_1) return -f;
1551 Real32 z_e = -nD*exp2(static_cast<Real32>(k-1) * b);
1552 return z_e;
1555 /*-------------------------- Computation ---------------------------------*/
1557 void ClusterShadingStage::create_computation(
1558 ClusterShadingStageData* pData)
1560 create_frustum_computation (pData);
1561 create_light_culling_computation(pData);
1563 NodeUnrecPtr frustNode = Node::createLocal();
1564 NodeUnrecPtr cullNode = Node::createLocal();
1566 if (getCalcFrustumsOnCPU() || getCullLighsOnCPU())
1568 frustNode->setCore(Group::createLocal());
1570 else
1572 frustNode->setCore(getFrustAlgoElement());
1575 if (getCullLighsOnCPU())
1577 cullNode->setCore(Group::createLocal());
1579 else
1581 cullNode->setCore(getCullAlgoElement());
1584 frustNode->addChild(cullNode);
1586 frustNode->editVolume().setInfinite();
1587 frustNode->editVolume().setStatic ();
1589 cullNode->editVolume().setInfinite();
1590 cullNode->editVolume().setStatic ();
1592 setFrustNode(frustNode);
1593 setCullNode ( cullNode);
1596 void ClusterShadingStage::update_computation(
1597 ClusterShadingStageData* pData)
1600 // Adpat the stage data chunk material
1602 if (!getCalcFrustumsOnCPU())
1604 ChunkMaterial* pFrustChunkMat = getFrustCompShaderAlgo()->getChunkMaterial();
1605 if (pFrustChunkMat != pData->getFrustChunkMat())
1607 getFrustCompShaderAlgo()->setChunkMaterial(pData->getFrustChunkMat());
1611 if (!getCullLighsOnCPU())
1613 ChunkMaterial* pCullChunkMat = getCullCompShaderAlgo() ->getChunkMaterial();
1614 if (pCullChunkMat != pData->getCullChunkMat())
1616 getCullCompShaderAlgo()->setChunkMaterial(pData->getCullChunkMat());
1621 // Nothing to do here, because we always recreate the stage if the CPU/GPU modes
1622 // change.
1626 // Setup the computation cores
1628 if (getCalcFrustumsOnCPU() || getCullLighsOnCPU())
1630 if (getFrustNode()->getCore()->getType() != Group::getClassType())
1632 getFrustNode()->setCore(Group::createLocal());
1635 else
1637 if (getFrustNode()->getCore() != getFrustAlgoElement())
1639 getFrustNode()->setCore(getFrustAlgoElement());
1643 if (getCullLighsOnCPU())
1645 if (getCullNode()->getCore()->getType() != Group::getClassType())
1647 getCullNode()->setCore(Group::createLocal());
1650 else
1652 if (getCullNode()->getCore() != getCullAlgoElement())
1654 getCullNode()->setCore(getCullAlgoElement());
1660 /*---------------------- Frustum Computation -----------------------------*/
1662 void ClusterShadingStage::create_frustum_computation(
1663 ClusterShadingStageData* pData)
1665 OSG_ASSERT(pData);
1667 ShaderProgramUnrecPtr shader = ShaderProgram::createLocal();
1669 shader->setShaderType(GL_COMPUTE_SHADER);
1670 shader->setProgram(
1671 pData->getIsOrthographicCamera()
1672 ? get_ortho_frustum_cp_program()
1673 : get_persp_frustum_cp_program()
1676 shader->addUniformBlock ("DispatchData", getDispatchDataBindingPnt());
1677 shader->addShaderStorageBlock("Frustums", getFrustumBindingPnt());
1679 ComputeShaderChunkUnrecPtr chunk = ComputeShaderChunk::createLocal();
1680 chunk->addComputeShader(shader);
1681 chunk->setVariables(shader->getVariables());
1683 ComputeShaderAlgorithmUnrecPtr algorithm = ComputeShaderAlgorithm::createLocal();
1684 algorithm->setUseMemoryBarrier(true);
1685 algorithm->setMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
1686 algorithm->setComputeShader(chunk);
1688 Vec3i work_group_count(1,1,1);
1689 algorithm->setDispatchConfig(work_group_count);
1690 algorithm->setChunkMaterial(pData->getFrustChunkMat());
1692 AlgorithmComputeElementUnrecPtr element = AlgorithmComputeElement::createLocal();
1693 element->setAlgorithm(algorithm);
1695 setFrustAlgoElement(element);
1696 setFrustCompShaderAlgo(algorithm);
1699 /*------------------- Light Culling Computation --------------------------*/
1701 void ClusterShadingStage::create_light_culling_computation(
1702 ClusterShadingStageData* pData)
1704 OSG_ASSERT(pData);
1706 ShaderProgramUnrecPtr shader = ShaderProgram::createLocal();
1707 shader->setShaderType(GL_COMPUTE_SHADER);
1708 shader->setProgram(get_light_culling_cp_program());
1710 shader->addShaderStorageBlock(getMultiLightChunk()->getLightBlockName().c_str(), getLightBindingPnt());
1711 shader->addUniformBlock ("DispatchData", getDispatchDataBindingPnt());
1712 shader->addUniformBlock (getClusteringDataBlockName().c_str(), getClusterDataBindingPnt());
1713 shader->addShaderStorageBlock("Frustums", getFrustumBindingPnt());
1714 shader->addShaderStorageBlock(getAffectedLightIndexListBlockName().c_str(), getAffectedLightIndexListBindingPnt());
1715 shader->addShaderStorageBlock(getLightIndexListBlockName().c_str(), getLightIndexListBindingPnt());
1716 shader->addShaderStorageBlock("LightIndexCounter", getLightIndexCounterBindingPnt());
1718 ComputeShaderChunkUnrecPtr chunk = ComputeShaderChunk::createLocal();
1719 chunk->addComputeShader(shader);
1720 chunk->setVariables(shader->getVariables());
1722 ComputeShaderAlgorithmUnrecPtr algorithm = ComputeShaderAlgorithm::createLocal();
1723 algorithm->setUseMemoryBarrier(true);
1724 algorithm->setMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
1725 algorithm->setComputeShader(chunk);
1727 Vec3i work_group_count(1,1,1);
1728 algorithm->setDispatchConfig(work_group_count);
1729 algorithm->setChunkMaterial(pData->getCullChunkMat());
1731 AlgorithmComputeElementUnrecPtr element = AlgorithmComputeElement::createLocal();
1732 element->setAlgorithm(algorithm);
1734 setCullAlgoElement(element);
1735 setCullCompShaderAlgo(algorithm);
1738 /*------------------------- Fragment Shader -------------------------------*/
1740 void ClusterShadingStage::create_fragment_shader(ClusterShadingStageData* pData)
1742 ShaderProgramChunkUnrecPtr progChunk = ShaderProgramChunk::createLocal();
1744 ShaderProgramUnrecPtr shader = ShaderProgram::createLocal();
1745 shader->setShaderType(GL_FRAGMENT_SHADER);
1746 shader->setProgram(get_fragment_cp_program());
1748 shader->addShaderStorageBlock(getMultiLightChunk()->getLightBlockName().c_str(), getLightBindingPnt());
1749 shader->addShaderStorageBlock(getAffectedLightIndexListBlockName().c_str(), getAffectedLightIndexListBindingPnt());
1750 shader->addShaderStorageBlock(getLightIndexListBlockName().c_str(), getLightIndexListBindingPnt());
1751 shader->addUniformBlock (getClusteringDataBlockName().c_str(), getClusterDataBindingPnt());
1753 progChunk->addShader(shader);
1755 setShaderProgChunk(progChunk);
1758 /*------------------ Update Dispatch Configurations -----------------------*/
1760 void ClusterShadingStage::update_dispatch_config_frustums(
1761 ClusterShadingStageData* pData,
1762 const Vec4u& viewport,
1763 const Matrix& matInvProjection)
1765 // Real32 x_v = static_cast<Real32>(viewport[0]);
1766 // Real32 y_v = static_cast<Real32>(viewport[1]);
1767 Real32 w = static_cast<Real32>(viewport[2]);
1768 Real32 h = static_cast<Real32>(viewport[3]);
1770 const Vec3i work_group_size = Vec3i(getBlockSize(), getBlockSize(), 1);
1771 const UInt32 tile_size = getTileSize();
1773 Real32 numHorizontalTiles = osgCeil(w / tile_size);
1774 Real32 numVerticalTiles = osgCeil(h / tile_size);
1776 DispatchData data;
1777 data.matTransf = matInvProjection;
1778 data.viewport = viewport;
1779 data.numTiles = Vec2i(Int32(numHorizontalTiles), Int32(numVerticalTiles));
1781 VecFrustumsT vFrustums(data.numTiles.x() * data.numTiles.y());
1782 update_frustum_state(pData->getFrustumsSSBOChunk(), vFrustums);
1783 update_dispatch_data(pData->getFrustDispDatUBOChunk(), data);
1785 UInt32 szX = UInt32(osgCeil(numHorizontalTiles / work_group_size.x()));
1786 UInt32 szY = UInt32(osgCeil( numVerticalTiles / work_group_size.y()));
1787 UInt32 szZ = 1;
1789 Vec3i work_group_count = Vec3i(szX, szY, szZ);
1790 getFrustCompShaderAlgo()->setDispatchConfig(work_group_count);
1793 void ClusterShadingStage::update_dispatch_config_cull_lights(
1794 ClusterShadingStageData* pData,
1795 const Vec4u& viewport,
1796 const Matrix& matViewing)
1798 // Real32 x_v = static_cast<Real32>(viewport[0]);
1799 // Real32 y_v = static_cast<Real32>(viewport[1]);
1800 Real32 w = static_cast<Real32>(viewport[2]);
1801 Real32 h = static_cast<Real32>(viewport[3]);
1803 const Vec3i work_group_size = Vec3i(getBlockSize(), getBlockSize(), 1);
1804 const UInt32 tile_size = getTileSize();
1806 Real32 numHorizontalTiles = osgCeil(w / tile_size);
1807 Real32 numVerticalTiles = osgCeil(h / tile_size);
1809 DispatchData data;
1810 data.matTransf = matViewing;
1811 data.viewport = viewport;
1812 data.numTiles = Vec2i(Int32(numHorizontalTiles), Int32(numVerticalTiles));
1814 update_dispatch_data(pData->getCullDispDatUBOChunk(), data);
1816 Vec3i work_group_count = Vec3i(numHorizontalTiles, numVerticalTiles, getNumClusterZ());
1817 getCullCompShaderAlgo()->setDispatchConfig(work_group_count);
1820 /*------------------- Calculate Affected Lights --------------------------*/
1822 void ClusterShadingStage::calc_affected_lights(
1823 const Matrix& matEyeFromWorld,
1824 FrustumVolume volViewFrustum,
1825 VecLightIndexT& vecAffectedLights,
1826 VecLightEyeSpaceDataT& vecEyeSpaceData)
1829 // We test the light against the view frustum to get only affected lights.
1830 // Since all of out tests happens in eye space we must transform the world
1831 // space view frustum to eye space first.
1833 volViewFrustum.transform(matEyeFromWorld);
1835 Frustum frustum;
1836 frustum.planes[0] = volViewFrustum.getPlane(FrustumVolume::PLANE_LEFT);
1837 frustum.planes[1] = volViewFrustum.getPlane(FrustumVolume::PLANE_RIGHT);
1838 frustum.planes[2] = volViewFrustum.getPlane(FrustumVolume::PLANE_TOP);
1839 frustum.planes[3] = volViewFrustum.getPlane(FrustumVolume::PLANE_BOTTOM);
1841 Pnt3f nlt = volViewFrustum.getCorner(FrustumVolume::NEAR_LEFT_TOP);
1842 Pnt3f flt = volViewFrustum.getCorner(FrustumVolume:: FAR_LEFT_TOP);
1844 Real32 n = nlt.z();
1845 Real32 f = flt.z();
1847 UInt32 numLights = getMultiLightChunk()->numLights();
1850 // We do need the eye space positions and directions of the lights
1851 // for our test view frustum and cluster frustum tests. In order to
1852 // calculate them only once we remember them in a vector.
1854 if (vecEyeSpaceData.size() != numLights)
1855 vecEyeSpaceData.resize(numLights);
1857 for (UInt32 i = 0; i < numLights; ++i)
1859 bool affected = false;
1861 if (getMultiLightChunk()->getEnabled(i))
1863 switch (getMultiLightChunk()->getType(i))
1865 case MultiLight::DIRECTIONAL_LIGHT:
1867 affected = true;
1869 break;
1870 case MultiLight::POINT_LIGHT:
1871 case MultiLight::CINEMA_LIGHT:
1873 transform_to_eye_space(
1874 matEyeFromWorld,
1875 getMultiLightChunk()->getBeacon(i),
1876 getMultiLightChunk()->getPosition(i),
1877 getMultiLightChunk()->getDirection(i),
1878 vecEyeSpaceData[i].position,
1879 vecEyeSpaceData[i].direction);
1881 if (point_light_inside_frustum(
1882 vecEyeSpaceData[i].position,
1883 getMultiLightChunk()->getRangeCutOff(i),
1884 frustum, n, f))
1886 affected = true;
1889 break;
1890 case MultiLight::SPOT_LIGHT:
1892 transform_to_eye_space(
1893 matEyeFromWorld,
1894 getMultiLightChunk()->getBeacon(i),
1895 getMultiLightChunk()->getPosition(i),
1896 getMultiLightChunk()->getDirection(i),
1897 vecEyeSpaceData[i].position,
1898 vecEyeSpaceData[i].direction);
1900 if (spot_light_inside_frustum(
1901 vecEyeSpaceData[i].position,
1902 vecEyeSpaceData[i].direction,
1903 getMultiLightChunk()->getRangeCutOff(i),
1904 getMultiLightChunk()->getSpotlightAngle(i),
1905 frustum, n, f))
1907 affected = true;
1910 break;
1914 if (affected)
1915 vecAffectedLights.push_back(UInt32(i));
1919 void ClusterShadingStage::transform_to_eye_space(
1920 Matrix matEyeFromWorld,
1921 Node* beacon,
1922 const Pnt3f& position_bs,
1923 const Vec3f& direction_bs,
1924 Pnt3f& position_es,
1925 Vec3f& direction_es)
1927 Matrix matWsFromBS;
1929 if(beacon != NULL)
1931 beacon->getToWorld(matWsFromBS);
1933 else
1935 matWsFromBS.setIdentity();
1938 matEyeFromWorld.mult(matWsFromBS); // matEsFromBs
1939 matEyeFromWorld.multFull( position_bs, position_es);
1940 matEyeFromWorld.multFull(direction_bs, direction_es);
1943 bool ClusterShadingStage::point_light_inside_frustum(
1944 const Pnt3f& position_es,
1945 Real32 range,
1946 const Frustum& frustum,
1947 Real32 n,
1948 Real32 f)
1950 Sphere sphere;
1951 sphere.c = position_es;
1952 sphere.r = range;
1954 return SphereInsideFrustum(sphere, frustum, n, f);
1957 bool ClusterShadingStage::spot_light_inside_frustum(
1958 const Pnt3f& position_es,
1959 const Vec3f& direction_es,
1960 Real32 range,
1961 Real32 spotlightAngle,
1962 const Frustum& frustum,
1963 Real32 n,
1964 Real32 f)
1966 Cone cone;
1967 cone.T = position_es;
1968 cone.d = direction_es;
1969 cone.h = range;
1970 cone.r = osgTan( osgDegree2Rad(spotlightAngle)) * cone.h;
1972 return ConeInsideFrustum(cone, frustum, n, f);
1975 /*----------------------- CPU Light Culling ------------------------------*/
1977 inline std::size_t ClusterShadingStage::frustum_accessor(
1978 const UInt32 i,
1979 const UInt32 j,
1980 const Vec3u& dimensions)
1982 return j * dimensions.x() + i;
1985 void ClusterShadingStage::cull_lights(
1986 ClusterShadingStageData* pData, // in contains the CPU calculated tile frustum planes
1987 const VecLightIndexT& vecAffectedLights, // in the lights that are actually contributing to the view frustum shading
1988 const VecLightEyeSpaceDataT& vecEyeSpaceData, // in the precalculated light eye space positions and directions
1989 const Vec3u& dimensions, // in the cluster dimentsions
1990 const Matrix& matEyeFromWorld, // in transform from world to eye space
1991 Real32 zNear, // in distance of near plane from eye point
1992 Real32 zFar, // in distance of far plane from eye point
1993 Real32 D, // in distance offset for near plane
1994 VecImageDataT& gridData, // out the raw grid data
1995 VecLightIndexT& lightIndexList) // out the index list
1997 gridData.clear();
1998 lightIndexList.clear();
2000 gridData.resize(dimensions.x() * dimensions.y() * dimensions.z());
2002 UInt32 c_1 = dimensions.z()-1;
2003 Real32 zNearD = zNear + D;
2004 Real32 b = log2(zFar/zNearD)/(c_1);
2006 for (UInt32 k = 0; k < dimensions.z(); ++k)
2008 Real32 n = cluster_z(k, zNear, zFar, zNearD, b, c_1);
2009 Real32 f = cluster_z(k+1, zNear, zFar, zNearD, b, c_1);
2011 for (UInt32 j = 0; j < dimensions.y(); ++j)
2013 for (UInt32 i = 0; i < dimensions.x(); ++i)
2015 std::size_t tile = frustum_accessor(i, j, dimensions);
2016 Frustum frustum = getFrustum(pData, static_cast<UInt32>(tile));
2018 UInt32 light_count = 0;
2019 UInt32 light_start_offset = UInt32(lightIndexList.size());
2021 std::size_t num_affected_lights = vecAffectedLights.size();
2023 for (std::size_t l = 0; l < num_affected_lights; ++l)
2025 UInt32 light_index = vecAffectedLights[l];
2027 if (getMultiLightChunk()->getEnabled(light_index))
2029 switch (getMultiLightChunk()->getType(light_index))
2031 case MultiLight::DIRECTIONAL_LIGHT:
2033 light_count += 1;
2034 lightIndexList.push_back(light_index);
2036 break;
2037 case MultiLight::POINT_LIGHT:
2038 case MultiLight::CINEMA_LIGHT:
2040 if (
2041 point_light_inside_frustum(
2042 vecEyeSpaceData[light_index].position,
2043 getMultiLightChunk()->getRangeCutOff(light_index),
2044 frustum, n, f)
2047 light_count += 1;
2048 lightIndexList.push_back(light_index);
2051 break;
2052 case MultiLight::SPOT_LIGHT:
2054 if (
2055 spot_light_inside_frustum(
2056 vecEyeSpaceData[light_index].position,
2057 vecEyeSpaceData[light_index].direction,
2058 getMultiLightChunk()->getRangeCutOff(light_index),
2059 getMultiLightChunk()->getSpotlightAngle(light_index),
2060 frustum, n, f)
2063 light_count += 1;
2064 lightIndexList.push_back(light_index);
2067 break;
2072 write_image_data(i, j, k, dimensions, std::make_pair(light_start_offset, light_count), gridData);
2078 /*-------------------- Compute Shader Programs ---------------------------*/
2080 std::string ClusterShadingStage::get_persp_frustum_cp_program()
2082 using namespace std;
2084 const Vec3i work_group_size = Vec3i(getBlockSize(), getBlockSize(), 1);
2085 const UInt32 tile_size = getTileSize();
2087 stringstream ost;
2089 ost << "#version 430 compatibility"
2090 << endl << ""
2091 << endl << "layout (local_size_x = " << work_group_size.x()
2092 << ", local_size_y = " << work_group_size.y()
2093 << ", local_size_z = " << work_group_size.z() << ") in;"
2094 << endl << ""
2095 << endl << "const int tile_size = " << tile_size << ";"
2096 << endl << ""
2097 << endl << "//"
2098 << endl << "// matTransf is the inverse projection matrix"
2099 << endl << "//"
2100 << getDispatchProgSnippet()
2101 << endl << ""
2102 << getFrustumProgSnippet()
2103 << endl << ""
2104 << endl << "const vec3 eyePos = vec3(0, 0, 0);"
2105 << endl << ""
2106 << endl << "Plane computePlane(in const vec3 p0, in const vec3 p1, in const vec3 p2)"
2107 << endl << "{"
2108 << endl << " Plane plane;"
2109 << endl << ""
2110 << endl << " vec3 v1 = p1 - p0;"
2111 << endl << " vec3 v2 = p2 - p0;"
2112 << endl << ""
2113 << endl << " plane.N = normalize(cross(v1, v2));"
2114 << endl << " plane.d = dot(plane.N, p0);"
2115 << endl << ""
2116 << endl << " return plane;"
2117 << endl << "}"
2118 << endl << ""
2119 << endl << "vec4 ndcFromScreen(in const vec3 p_w)"
2120 << endl << "{"
2121 << endl << " return vec4("
2122 << endl << " 2.0 * (p_w.x - dispatchData.viewport.x) / dispatchData.viewport[2] - 1.0,"
2123 << endl << " 2.0 * (p_w.y - dispatchData.viewport.y) / dispatchData.viewport[3] - 1.0,"
2124 << endl << " p_w.z, // assumed to be already in ndc-space!"
2125 << endl << " 1.0);"
2126 << endl << "}"
2127 << endl << ""
2128 << endl << "vec3 eyeFromNdc(in vec4 p_n)"
2129 << endl << "{"
2130 << endl << " vec4 p_e = dispatchData.matTransf * p_n; // inverse projection matrix"
2131 << endl << " p_e /= p_e.w;"
2132 << endl << " return p_e.xyz;"
2133 << endl << "}"
2134 << endl << ""
2135 << endl << "void main()"
2136 << endl << "{"
2137 << endl << " vec3 pnts_w[4];"
2138 << endl << " vec4 pnts_n[4];"
2139 << endl << " vec3 pnts_e[4];"
2140 << endl << ""
2141 << endl << " float x_v = dispatchData.viewport.x;"
2142 << endl << " float y_v = dispatchData.viewport.y;"
2143 << endl << " float w_v = dispatchData.viewport.z;"
2144 << endl << " float h_v = dispatchData.viewport.w;"
2145 << endl << ""
2146 << endl << " float x0 = x_v + gl_GlobalInvocationID.x * tile_size;"
2147 << endl << " float x1 = min(x_v + (gl_GlobalInvocationID.x+1) * tile_size, x_v + w_v);"
2148 << endl << " float y0 = y_v + gl_GlobalInvocationID.y * tile_size;"
2149 << endl << " float y1 = min(y_v + (gl_GlobalInvocationID.y+1) * tile_size, y_v + h_v);"
2150 << endl << ""
2151 << endl << " pnts_w[0] = vec3(x0, y0, -1.0);"
2152 << endl << " pnts_w[1] = vec3(x1, y0, -1.0);"
2153 << endl << " pnts_w[2] = vec3(x0, y1, -1.0);"
2154 << endl << " pnts_w[3] = vec3(x1, y1, -1.0);"
2155 << endl << ""
2156 << endl << " for (int i = 0; i < 4; ++i)"
2157 << endl << " {"
2158 << endl << " pnts_n[i] = ndcFromScreen(pnts_w[i]);"
2159 << endl << " pnts_e[i] = eyeFromNdc (pnts_n[i]);"
2160 << endl << " }"
2161 << endl << ""
2162 << endl << " Frustum frustum;"
2163 << endl << ""
2164 << endl << " frustum.planes[0] = computePlane(eyePos, pnts_e[0], pnts_e[2]);"
2165 << endl << " frustum.planes[1] = computePlane(eyePos, pnts_e[3], pnts_e[1]);"
2166 << endl << " frustum.planes[2] = computePlane(eyePos, pnts_e[2], pnts_e[3]);"
2167 << endl << " frustum.planes[3] = computePlane(eyePos, pnts_e[1], pnts_e[0]);"
2168 << endl << ""
2169 << endl << " if (gl_GlobalInvocationID.x < dispatchData.numTiles.x && gl_GlobalInvocationID.y < dispatchData.numTiles.y)"
2170 << endl << " {"
2171 << endl << " uint idx = gl_GlobalInvocationID.y * dispatchData.numTiles.x + gl_GlobalInvocationID.x;"
2172 << endl << " frustums.frustum[idx] = frustum;"
2173 << endl << " }"
2174 << endl << "}"
2175 << endl << ""
2176 << endl;
2178 return ost.str();
2181 std::string ClusterShadingStage::get_ortho_frustum_cp_program()
2183 using namespace std;
2185 const Vec3i work_group_size = Vec3i(getBlockSize(), getBlockSize(), 1);
2186 const UInt32 tile_size = getTileSize();
2188 stringstream ost;
2190 ost << "#version 430 compatibility"
2191 << endl << ""
2192 << endl << "layout (local_size_x = " << work_group_size.x()
2193 << ", local_size_y = " << work_group_size.y()
2194 << ", local_size_z = " << work_group_size.z() << ") in;"
2195 << endl << ""
2196 << endl << "const int tile_size = " << tile_size << ";"
2197 << endl << ""
2198 << endl << "//"
2199 << endl << "// matTransf is the inverse projection matrix"
2200 << endl << "//"
2201 << getDispatchProgSnippet()
2202 << endl << ""
2203 << getFrustumProgSnippet()
2204 << endl << ""
2205 << endl << "Plane computePlane(in const vec3 p0, in const vec3 p1, in const vec3 p2)"
2206 << endl << "{"
2207 << endl << " Plane plane;"
2208 << endl << ""
2209 << endl << " vec3 v1 = p1 - p0;"
2210 << endl << " vec3 v2 = p2 - p0;"
2211 << endl << ""
2212 << endl << " plane.N = normalize(cross(v1, v2));"
2213 << endl << " plane.d = dot(plane.N, p0);"
2214 << endl << ""
2215 << endl << " return plane;"
2216 << endl << "}"
2217 << endl << ""
2218 << endl << "vec4 ndcFromScreen(in const vec3 p_w)"
2219 << endl << "{"
2220 << endl << " return vec4("
2221 << endl << " 2.0 * (p_w.x - dispatchData.viewport.x) / dispatchData.viewport[2] - 1.0,"
2222 << endl << " 2.0 * (p_w.y - dispatchData.viewport.y) / dispatchData.viewport[3] - 1.0,"
2223 << endl << " p_w.z, // assumed to be already in ndc-space!"
2224 << endl << " 1.0);"
2225 << endl << "}"
2226 << endl << ""
2227 << endl << "vec3 eyeFromNdc(in vec4 p_n)"
2228 << endl << "{"
2229 << endl << " vec4 p_e = dispatchData.matTransf * p_n; // inverse projection matrix"
2230 << endl << " p_e /= p_e.w;"
2231 << endl << " return p_e.xyz;"
2232 << endl << "}"
2233 << endl << ""
2234 << endl << "void main()"
2235 << endl << "{"
2236 << endl << " vec3 pnts_w[8];"
2237 << endl << " vec4 pnts_n[8];"
2238 << endl << " vec3 pnts_e[8];"
2239 << endl << ""
2240 << endl << " float x_v = dispatchData.viewport.x;"
2241 << endl << " float y_v = dispatchData.viewport.y;"
2242 << endl << " float w_v = dispatchData.viewport.z;"
2243 << endl << " float h_v = dispatchData.viewport.w;"
2244 << endl << ""
2245 << endl << " float x0 = x_v + gl_GlobalInvocationID.x * tile_size;"
2246 << endl << " float x1 = min(x_v + (gl_GlobalInvocationID.x+1) * tile_size, x_v + w_v);"
2247 << endl << " float y0 = y_v + gl_GlobalInvocationID.y * tile_size;"
2248 << endl << " float y1 = min(y_v + (gl_GlobalInvocationID.y+1) * tile_size, y_v + h_v);"
2249 << endl << ""
2250 << endl << " pnts_w[0] = vec3(x0, y0, 1.0);"
2251 << endl << " pnts_w[1] = vec3(x1, y0, 1.0);"
2252 << endl << " pnts_w[2] = vec3(x0, y1, 1.0);"
2253 << endl << " pnts_w[3] = vec3(x1, y1, 1.0);"
2254 << endl << ""
2255 << endl << " pnts_w[4] = vec3(x0, y0,-1.0);"
2256 << endl << " pnts_w[5] = vec3(x1, y0,-1.0);"
2257 << endl << " pnts_w[6] = vec3(x0, y1,-1.0);"
2258 << endl << " pnts_w[7] = vec3(x1, y1,-1.0);"
2259 << endl << ""
2260 << endl << " for (int i = 0; i < 8; ++i)"
2261 << endl << " {"
2262 << endl << " pnts_n[i] = ndcFromScreen(pnts_w[i]);"
2263 << endl << " pnts_e[i] = eyeFromNdc (pnts_n[i]);"
2264 << endl << " }"
2265 << endl << ""
2266 << endl << " Frustum frustum;"
2267 << endl << ""
2268 << endl << " frustum.planes[0] = computePlane(pnts_e[6], pnts_e[0], pnts_e[2]);"
2269 << endl << " frustum.planes[1] = computePlane(pnts_e[7], pnts_e[3], pnts_e[1]);"
2270 << endl << " frustum.planes[2] = computePlane(pnts_e[6], pnts_e[2], pnts_e[3]);"
2271 << endl << " frustum.planes[3] = computePlane(pnts_e[4], pnts_e[1], pnts_e[0]);"
2272 << endl << ""
2273 << endl << " if (gl_GlobalInvocationID.x < dispatchData.numTiles.x && gl_GlobalInvocationID.y < dispatchData.numTiles.y)"
2274 << endl << " {"
2275 << endl << " uint idx = gl_GlobalInvocationID.y * dispatchData.numTiles.x + gl_GlobalInvocationID.x;"
2276 << endl << " frustums.frustum[idx] = frustum;"
2277 << endl << " }"
2278 << endl << "}"
2279 << endl << ""
2280 << endl;
2282 return ost.str();
2285 std::string ClusterShadingStage::get_light_culling_cp_program()
2287 using namespace std;
2289 const Vec3i work_group_size = Vec3i(getBlockSize(), getBlockSize(), 1);
2290 const UInt32 tile_size = getTileSize();
2292 stringstream ost;
2294 ost << "#version 430 compatibility"
2295 << endl << ""
2296 << endl << "#extension GL_ARB_shader_image_load_store: enable"
2297 << endl << ""
2298 << endl << "layout (local_size_x = " << work_group_size.x()
2299 << ", local_size_y = " << work_group_size.y()
2300 << ", local_size_z = " << work_group_size.z() << ") in;"
2301 << endl << ""
2302 << endl << "const int tile_size = " << tile_size << ";"
2303 << endl << ""
2304 << getMultiLightChunk()->getLightProgSnippet()
2305 << endl << ""
2306 << endl << "//"
2307 << endl << "// matTransf is the world to view matrix"
2308 << endl << "//"
2309 << getDispatchProgSnippet()
2310 << endl << ""
2311 << getClusteringProgSnippet(false)
2312 << endl << ""
2313 << endl << "//"
2314 << endl << "// We get the pre calculated list of frustums for each tile."
2315 << endl << "// The cluster frustum is accessed with the help of the"
2316 << endl << "// gl_WorkGroupID variable:"
2317 << endl << "//"
2318 << endl << "// idx = j * numHorizontalTiles + i"
2319 << endl << "// idx = gl_WorkGroupID.y * dispatchData.numTiles.x + gl_WorkGroupID.x"
2320 << endl << "//"
2321 << getFrustumProgSnippet()
2322 << endl << ""
2323 << endl << "//"
2324 << endl << "// A simple Sphere abstraction"
2325 << endl << "//"
2326 << endl << "struct Sphere"
2327 << endl << "{"
2328 << endl << " vec3 c; // Center point."
2329 << endl << " float r; // Radius."
2330 << endl << "};"
2331 << endl << ""
2332 << endl << "//"
2333 << endl << "// A simple Cone abstraction"
2334 << endl << "//"
2335 << endl << "struct Cone"
2336 << endl << "{"
2337 << endl << " vec3 T; // Cone tip."
2338 << endl << " float h; // Height of the cone."
2339 << endl << " vec3 d; // Direction of the cone."
2340 << endl << " float r; // bottom radius of the cone."
2341 << endl << "};"
2342 << endl << ""
2343 << getLightIndexProgSnippet()
2344 << endl << ""
2345 << endl << "//"
2346 << endl << "// The global light index list that is to be written by this shader"
2347 << endl << "//"
2348 << endl << "layout (std430) buffer LightIndexCounter"
2349 << endl << "{"
2350 << endl << " uint lightIndexCounter;"
2351 << endl << "};"
2352 << endl << ""
2353 << endl << "//"
2354 << endl << "// Per work group shared state"
2355 << endl << "//"
2356 << endl << "shared Frustum sharedFrustum;"
2357 << endl << "shared vec2 sharedFrustumZ;"
2358 << endl << "shared uint sharedLightCount;"
2359 << endl << "shared uint sharedLightStartOffset;"
2360 << endl << "shared uint sharedLightIndexList[1024];"
2361 << endl << ""
2362 << endl << "//"
2363 << endl << "// Check to see if a sphere is fully behind (inside the negative halfspace of) a plane."
2364 << endl << "//"
2365 << endl << "bool sphereInsidePlane("
2366 << endl << " in const Sphere sphere,"
2367 << endl << " in const Plane plane)"
2368 << endl << "{"
2369 << endl << " float val = dot(plane.N, sphere.c) - plane.d;"
2370 << endl << " return val < -sphere.r;"
2371 << endl << "}"
2372 << endl << ""
2373 << endl << "//"
2374 << endl << "// Check to see of a light is partially contained within the frustum."
2375 << endl << "//"
2376 << endl << "bool sphereInsideFrustum("
2377 << endl << " in const Sphere sphere,"
2378 << endl << " in const Frustum frustum,"
2379 << endl << " in const float zNear,"
2380 << endl << " in const float zFar)"
2381 << endl << "{"
2382 << endl << " bool result = true;"
2383 << endl << ""
2384 << endl << " if (sphere.c.z - sphere.r > zNear || zFar > sphere.c.z + sphere.r)"
2385 << endl << " {"
2386 << endl << " result = false;"
2387 << endl << " }"
2388 << endl << ""
2389 << endl << " for (int i = 0; i < 4 && result; i++)"
2390 << endl << " {"
2391 << endl << " if (sphereInsidePlane(sphere, frustum.planes[i]))"
2392 << endl << " {"
2393 << endl << " result = false;"
2394 << endl << " }"
2395 << endl << " }"
2396 << endl << ""
2397 << endl << " return result;"
2398 << endl << "}"
2399 << endl << ""
2400 << endl << "//"
2401 << endl << "// Check to see if a point is fully behind (inside the negative halfspace of) a plane."
2402 << endl << "//"
2403 << endl << "bool pointInsidePlane("
2404 << endl << " in const vec3 p,"
2405 << endl << " in const Plane plane)"
2406 << endl << "{"
2407 << endl << " float val = dot(plane.N, p) - plane.d;"
2408 << endl << " return val < 0;"
2409 << endl << "}"
2410 << endl << ""
2411 << endl << "//"
2412 << endl << "// Check to see if a cone if fully behind (inside the negative halfspace of) a plane."
2413 << endl << "//"
2414 << endl << "bool coneInsidePlane("
2415 << endl << " in const Cone cone,"
2416 << endl << " in const Plane plane)"
2417 << endl << "{"
2418 << endl << " // Compute the farthest point on the end of the cone to the positive space of the plane."
2419 << endl << " vec3 m = cross(cross(plane.N, cone.d), cone.d);"
2420 << endl << " vec3 Q = cone.T + cone.d * cone.h - m * cone.r;"
2421 << endl << ""
2422 << endl << " // The cone is in the negative halfspace of the plane if both"
2423 << endl << " // the tip of the cone and the farthest point on the end of the cone to the "
2424 << endl << " // positive halfspace of the plane are both inside the negative halfspace "
2425 << endl << " // of the plane."
2426 << endl << " return pointInsidePlane(cone.T, plane) && pointInsidePlane(Q, plane);"
2427 << endl << "}"
2428 << endl << ""
2429 << endl << "bool coneInsideFrustum("
2430 << endl << " in const Cone cone,"
2431 << endl << " in const Frustum frustum,"
2432 << endl << " in const float zNear,"
2433 << endl << " in const float zFar)"
2434 << endl << "{"
2435 << endl << " bool result = true;"
2436 << endl << ""
2437 << endl << " Plane nearPlane; nearPlane.N = vec3(0, 0,-1); nearPlane.d = -zNear;"
2438 << endl << " Plane farPlane; farPlane.N = vec3(0, 0, 1); farPlane.d = zFar;"
2439 << endl << ""
2440 << endl << " if (coneInsidePlane(cone, nearPlane) || coneInsidePlane(cone, farPlane))"
2441 << endl << " {"
2442 << endl << " result = false;"
2443 << endl << " }"
2444 << endl << ""
2445 << endl << " for (int i = 0; i < 4 && result; i++)"
2446 << endl << " {"
2447 << endl << " if (coneInsidePlane(cone, frustum.planes[i]))"
2448 << endl << " {"
2449 << endl << " result = false;"
2450 << endl << " }"
2451 << endl << " }"
2452 << endl << ""
2453 << endl << " return result;"
2454 << endl << "}"
2455 << endl << ""
2456 << endl << "//"
2457 << endl << "// Helper function that provides the cluster index (ivec3) of the current work group invocation"
2458 << endl << "//"
2459 << endl << "ivec3 clusterAccessor()"
2460 << endl << "{"
2461 << endl << " return ivec3(gl_WorkGroupID);"
2462 << endl << "}"
2463 << endl << ""
2464 << endl << "//"
2465 << endl << "// Helper function that provides the tile frustum index of the current cluster"
2466 << endl << "//"
2467 << endl << "int frustumAccessor()"
2468 << endl << "{"
2469 << endl << " return int(gl_WorkGroupID.y) * dispatchData.numTiles.x + int(gl_WorkGroupID.x);"
2470 << endl << "}"
2471 << endl << ""
2472 << endl << "//"
2473 << endl << "// Append the light light_idx to the list of lights to be rendered for "
2474 << endl << "// this cluster. That is we have to increment the sharedLightCount and"
2475 << endl << "// to append the light_idx to the sharedLightIndexList."
2476 << endl << "// "
2477 << endl << "void appendLight(in const uint light_idx)"
2478 << endl << "{"
2479 << endl << " uint idx = atomicAdd(sharedLightCount, 1);"
2480 << endl << " if (idx < 1024)"
2481 << endl << " {"
2482 << endl << " sharedLightIndexList[idx] = light_idx;"
2483 << endl << " }"
2484 << endl << "}"
2485 << endl << ""
2486 << endl << "void main()"
2487 << endl << "{"
2488 << endl << " //"
2489 << endl << " // Initialize the work group shared state: Only the first thread is needed for that"
2490 << endl << " //"
2491 << endl << " if (gl_LocalInvocationIndex == 0)"
2492 << endl << " {"
2493 << endl << " sharedFrustum = frustums.frustum[frustumAccessor()];"
2494 << endl << " sharedFrustumZ = getClusterDepth();"
2495 << endl << " sharedLightCount = 0;"
2496 << endl << " }"
2497 << endl << ""
2498 << endl << " memoryBarrierShared(); // Ensure change to sharedXXX is visible in other invocations"
2499 << endl << " barrier(); // Stall until every thread reaches this point"
2500 << endl << ""
2501 << endl << " //"
2502 << endl << " // We iterate over all affected lights whereby splitting the lights into subsets for each thread."
2503 << endl << " // For each light we test it against the current frustum and if it is inside of the frustum, we add"
2504 << endl << " // the light to the group shared list of lights contributing to the current cluster corresponding to"
2505 << endl << " // the work group."
2506 << endl << " //"
2507 << endl << " for (uint i = gl_LocalInvocationIndex; i < " << getAffectedLightIndexListVariableName() << ".idx.length(); i += gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z)"
2508 << endl << " {"
2509 << endl << " uint light_index = " << getAffectedLightIndexListVariableName() << ".idx[i];"
2510 << endl << ""
2511 << endl << " if (" << getMultiLightChunk()->getLightVariableName() << ".light[light_index].enabled)"
2512 << endl << " {"
2513 << endl << " Light light = " << getMultiLightChunk()->getLightVariableName() << ".light[light_index];"
2514 << endl << ""
2515 << endl << " switch (light.type)"
2516 << endl << " {"
2517 << endl << " case DIRECTIONAL_LIGHT:"
2518 << endl << " {"
2519 << endl << " appendLight(light_index);"
2520 << endl << " }"
2521 << endl << " break;"
2522 << endl << ""
2523 << endl << " case POINT_LIGHT:"
2524 << endl << " case CINEMA_LIGHT:"
2525 << endl << " {"
2526 << endl << " vec4 position = dispatchData.matTransf * vec4(light.position, 1.0);"
2527 << endl << ""
2528 << endl << " Sphere sphere = { position.xyz, light.rangeCutOff };"
2529 << endl << ""
2530 << endl << " if (sphereInsideFrustum(sphere, sharedFrustum, sharedFrustumZ.x, sharedFrustumZ.y))"
2531 << endl << " {"
2532 << endl << " appendLight(light_index);"
2533 << endl << " }"
2534 << endl << " }"
2535 << endl << " break;"
2536 << endl << ""
2537 << endl << " case SPOT_LIGHT:"
2538 << endl << " {"
2539 << endl << " vec4 position = dispatchData.matTransf * vec4(light.position, 1.0);"
2540 << endl << " vec4 direction = dispatchData.matTransf * vec4(light.direction, 0.0);"
2541 << endl << ""
2542 << endl << " float radius = tan(light.spotlightAngle) * light.rangeCutOff;"
2543 << endl << " Cone cone = { position.xyz, light.rangeCutOff, direction.xyz, radius };"
2544 << endl << ""
2545 << endl << " if (coneInsideFrustum(cone, sharedFrustum, sharedFrustumZ.x, sharedFrustumZ.y))"
2546 << endl << " {"
2547 << endl << " appendLight(light_index);"
2548 << endl << " }"
2549 << endl << " }"
2550 << endl << " break;"
2551 << endl << " }"
2552 << endl << " }"
2553 << endl << " }"
2554 << endl << ""
2555 << endl << " //"
2556 << endl << " // Wait till all threads in group have caught up."
2557 << endl << " //"
2558 << endl << " memoryBarrierShared(); // Ensure change to sharedXXX is visible in other invocations"
2559 << endl << " barrier(); // Stall until every thread reaches this point"
2560 << endl << ""
2561 << endl << " //"
2562 << endl << " // Now we have the sharedLightIndexList filled and know by sharedLightCount the number"
2563 << endl << " // of lights that are contributing for the current cluster. What we have to do now is"
2564 << endl << " // to get space in the global light index list (lightIndexList). For that, we use the"
2565 << endl << " // global light index counter (lightIndexCounter) by atomically incrementing it"
2566 << endl << " // with the number of lights contributing to the current cluster and getting back the"
2567 << endl << " // offset from the start of the global light index list (sharedLightStartOffset)."
2568 << endl << " // So now we have requested space on the global light index list and we have the data"
2569 << endl << " // that need to be written to the global light grid data image, i.e. the offset from"
2570 << endl << " // the beginning of the global light index list and the number of lights to use for"
2571 << endl << " // shading the fragments falling into the current cluster."
2572 << endl << " //"
2573 << endl << " // Only thread 0 is needed for:"
2574 << endl << " // - atomically increment lightIndexCounter by sharedLightCount to get"
2575 << endl << " // sharedLightStartOffset"
2576 << endl << " // - write (sharedLightStartOffset, sharedLightCount) to light grid"
2577 << endl << " //"
2578 << endl << " if (gl_LocalInvocationIndex == 0)"
2579 << endl << " {"
2580 << endl << " sharedLightStartOffset = atomicAdd(lightIndexCounter, sharedLightCount);"
2581 << endl << ""
2582 << endl << " uvec4 data = uvec4(sharedLightStartOffset, sharedLightCount, 0, 0);"
2583 << endl << ""
2584 << endl << " //"
2585 << endl << " // We must protect from overflow"
2586 << endl << " //"
2587 << endl << " uint num_indices = " << getLightIndexListVariableName() << ".idx.length();"
2588 << endl << " if (sharedLightStartOffset + sharedLightCount >= num_indices)"
2589 << endl << " data = uvec4(0,0,0,0);"
2590 << endl << ""
2591 << endl << " imageStore(" << getLightGridVariableName() << ", clusterAccessor(), data);"
2592 << endl << " }"
2593 << endl << ""
2594 << endl << " memoryBarrierShared(); // Ensure change to sharedXXX is visible in other invocations"
2595 << endl << " barrier(); // Stall until every thread reaches this point"
2596 << endl << ""
2597 << endl << " //"
2598 << endl << " // The last task is to write the actual lights affecting the current cluster into the"
2599 << endl << " // the global light index list. We have already requestet the appropriate space on the"
2600 << endl << " // this list so we can just iterate over the local light list (sharedLightIndexList) and"
2601 << endl << " // write the carry the content to the global list. That can also be done in parrallel, so"
2602 << endl << " // we use all the thread we have at hand."
2603 << endl << " //"
2604 << endl << ""
2605 << endl << " uint num_indices = " << getLightIndexListVariableName() << ".idx.length();"
2606 << endl << ""
2607 << endl << " for (uint i = gl_LocalInvocationIndex; i < sharedLightCount; i += gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z)"
2608 << endl << " {"
2609 << endl << " if (sharedLightStartOffset + i < num_indices)"
2610 << endl << " " << getLightIndexListVariableName() << ".idx[sharedLightStartOffset + i] = sharedLightIndexList[i];"
2611 << endl << " }"
2612 << endl << "}"
2613 << endl << ""
2614 << endl;
2616 return ost.str();
2619 std::string ClusterShadingStage::get_fragment_cp_program()
2621 using namespace std;
2623 stringstream ost;
2626 << endl << ""
2627 //<< getMultiLightChunk()->getLightProgSnippet()
2628 //<< endl << ""
2629 //<< getClusteringProgSnippet()
2630 //<< endl << ""
2631 //<< getLightIndexProgSnippet()
2632 //<< endl << ""
2633 << endl;
2635 return ost.str();
2638 /*-------------------- Shader Program Snippets ---------------------------*/
2640 std::string ClusterShadingStage::getDispatchProgSnippet() const
2642 using namespace std;
2644 stringstream ost;
2647 << endl << "layout (std140) uniform DispatchData"
2648 << endl << "{"
2649 << endl << " mat4 matTransf;"
2650 << endl << " uvec4 viewport;"
2651 << endl << " ivec2 numTiles;"
2652 << endl << "} dispatchData;"
2653 << endl
2656 return ost.str();
2659 std::string ClusterShadingStage::getClusteringProgSnippet(bool is_frag_shader) const
2661 using namespace std;
2663 const UInt32 tile_size = getTileSize();
2665 stringstream ost;
2667 if (is_frag_shader)
2670 << endl << "const int tile_size = " << tile_size << ";"
2671 << endl << ""
2675 << endl << "//"
2676 << endl << "// We write our results into the global light grid with:"
2677 << endl << "// imageStore(" << getLightGridVariableName() << ", ivec3(i,j,k), uvec4(o,c,0,0))"
2678 << endl << "// and read it back with:"
2679 << endl << "// imageLoad(" << getLightGridVariableName() << ", ivec3(i,j,k)).xy"
2680 << endl << "// were:"
2681 << endl << "// i,j,k define the cluster index"
2682 << endl << "// o is the offset from the beginning of the global light index list for the cluster"
2683 << endl << "// c is the number if lights that are to be used for the cluster"
2684 << endl << "//"
2685 << endl << "layout(binding = " << getLightGridBindingPnt() << ", rg32ui) uniform uimage2DArray " << getLightGridVariableName() << ";"
2686 << endl << ""
2687 << endl << "//"
2688 << endl << "// The data we need to calc the cluster depth values from the cluster key k"
2689 << endl << "//"
2690 << endl << "layout (std140) uniform " << getClusteringDataBlockName()
2691 << endl << "{"
2692 << endl << " float zNear; // positive near plane distance from eye zNear > 0"
2693 << endl << " float zFar; // positive far plane distance from eye zFar > zNear > 0"
2694 //<< endl << " float D; // positive near plane offset D >= 0 // for testing"
2695 << endl << " float nD; // zNear + D : shader optimization"
2696 << endl << " float lg_nD; // log2(nD) : shader optimization"
2697 << endl << " float a; // precalculated factor (c-1)/log2(f/(n+D))"
2698 << endl << " float b; // precalculated factor log2(f/(n+D))/(c-1)"
2699 //<< endl << " int c; // number of cluster planes // for testing"
2700 << endl << " int c_1; // number of cluster planes minus one : shader optimization"
2701 << endl << " ivec2 p_v; // viewport corner points"
2702 //<< endl << " ivec3 n_c; // number of clusters // for testing"
2703 << endl << " bool enabled;// cluster shadingd enabled"
2704 << endl << "} " << getClusteringDataVariableName() << ";"
2705 << endl << ""
2706 << endl << "//"
2707 << endl << "// cluster_z calculates the cluster eye space z from the cluster key k value"
2708 << endl << "// z_e = cluster_z(k, ...) with z_e in [-n, -f]"
2709 << endl << "//"
2710 << endl << "// Formular:"
2711 << endl << "// z_e = -n if k == 0"
2712 << endl << "// z_e = -(n+D) * exp2( (k-1)*(log2(f/(n+D))/(c-1)) ) else"
2713 << endl << "// z_e = -f if k > c-1"
2714 << endl << "//"
2716 if (!is_frag_shader)
2719 //<< endl << "float cluster_z("
2720 //<< endl << " in const uint k, // cluster coordinate, 0 <= k <= c"
2721 //<< endl << " in const float n, // near plane distance from viewer n > 0"
2722 //<< endl << " in const float f, // far plane distance from viewer f > n > 0"
2723 //<< endl << " in const float D, // near plane offset"
2724 //<< endl << " in const int c) // number of cluster planes"
2725 //<< endl << "{"
2726 //<< endl << " if (k == 0) return -n;"
2727 //<< endl << " if (k > c-1) return -f;"
2728 //<< endl << ""
2729 //<< endl << " float z_e = -(n+D) * exp2(float(k-1) * (log2(f/(n+D))/float(c-1)));"
2730 //<< endl << " return z_e;"
2731 //<< endl << "}"
2732 //<< endl << ""
2733 << endl << "float cluster_z("
2734 << endl << " in const uint k, // cluster coordinate, 0 <= k <= c"
2735 << endl << " in const float n, // near plane distance from viewer n > 0"
2736 << endl << " in const float f, // far plane distance from viewer f > n > 0"
2737 << endl << " in const float nD, // near plane distance from viewer plus offset, nD = n+D with n > 0, D > 0"
2738 << endl << " in const float b, // factor log2(f/(n+D))/(c-1)"
2739 << endl << " in const int c_1) // number of cluster planes"
2740 << endl << "{"
2741 << endl << " if (k == 0) return -n;"
2742 << endl << " if (k > c_1) return -f;"
2743 << endl << ""
2744 << endl << " float z_e = -nD * exp2(float(k-1) * b);"
2745 << endl << " return z_e;"
2746 << endl << "}"
2747 << endl << ""
2748 << endl << "//"
2749 << endl << "// Helper function that provides the tile frustum index of the current cluster"
2750 << endl << "//"
2751 << endl << "vec2 getClusterDepth()"
2752 << endl << "{"
2753 << endl << " return vec2(cluster_z(gl_WorkGroupID.z, "
2754 << getClusteringDataVariableName() << ".zNear, "
2755 << getClusteringDataVariableName() << ".zFar, "
2756 << getClusteringDataVariableName() << ".nD, "
2757 << getClusteringDataVariableName() << ".b, "
2758 << getClusteringDataVariableName() << ".c_1),"
2759 << endl << " cluster_z(gl_WorkGroupID.z+1, "
2760 << getClusteringDataVariableName() << ".zNear, "
2761 << getClusteringDataVariableName() << ".zFar, "
2762 << getClusteringDataVariableName() << ".nD, "
2763 << getClusteringDataVariableName() << ".b, "
2764 << getClusteringDataVariableName() << ".c_1));"
2765 << endl << "}"
2768 else
2771 << endl << "//"
2772 << endl << "// cluster_k and cluster_k_verbose calculates the cluster key from the eye space z value"
2773 << endl << "// k = cluster_k(z_e, ...) with k in [0, c["
2774 << endl << "//"
2775 << endl << "// Formular:"
2776 << endl << "// k = 0 if z_e >= -(n+D)"
2777 << endl << "// k = 1 + (c-1) * log2(z_e/-(n+D)) / log2(f/(n+D)) else"
2778 << endl << "// k = c-1 if z_e <= -f"
2779 << endl << "//"
2780 //<< endl << "int cluster_k("
2781 //<< endl << " in const float z_e, // eye space z-position, z_e < 0"
2782 //<< endl << " in const float n, // near plane distance from viewer n > 0"
2783 //<< endl << " in const float f, // far plane distance from viewer f > n > 0"
2784 //<< endl << " in const float D, // near plane offset"
2785 //<< endl << " in const int c) // number of cluster planes"
2786 //<< endl << "{"
2787 //<< endl << " if (z_e >= -(n+D)) return 0;"
2788 //<< endl << " if (z_e <= -f) return c-1;"
2789 //<< endl << ""
2790 //<< endl << " float s = 1.0+(float(c-1)/log2(f/(n+D)))*log2(z_e/-(n+D));"
2791 //<< endl << " return clamp(int(s), 0, c-1);"
2792 //<< endl << "}"
2793 //<< endl << ""
2794 << endl << "int cluster_k("
2795 << endl << " in const float z_e, // eye space z-position, z_e < 0"
2796 << endl << " in const float nD, // near plane distance plus the offset D from viewer n > 0, D > 0"
2797 << endl << " in const float lg_nD,// log2(nD)"
2798 << endl << " in const float f, // far plane distance from viewer f > n > 0"
2799 << endl << " in const float a, // (c-1)/log2(f/(n+D))"
2800 << endl << " in const int c_1) // number of cluster planes minus 1"
2801 << endl << "{"
2802 << endl << " if (z_e >= -nD) return 0;"
2803 << endl << " if (z_e <= -f) return c_1;"
2804 << endl << ""
2805 << endl << " float s = 1.0 + a * (log2(-z_e) - lg_nD);"
2806 << endl << " return clamp(int(s), 0, c_1);"
2807 << endl << "}"
2808 << endl << ""
2809 << endl << "//"
2810 << endl << "// OpenGL window space is defined such that pixel centers are on half-integer boundaries."
2811 << endl << "// So the center of the lower-left pixel is (0.5,0.5). Using pixel_center_integer? adjust"
2812 << endl << "// gl_FragCoord such that whole integer values represent pixel centers."
2813 << endl << "// This feature exist to be compatible with D3D's window space. Unless you need your shaders"
2814 << endl << "// to have this compatibility, you are advised not to use these features."
2815 << endl << "// => We do not use it!"
2816 << endl << "//"
2817 << endl << "// Provide a accessor key to probe the light grid."
2818 << endl << "// in p_w : xy-screen position provided by gl_FragCoord.xy: lower-left is (0.5, 0.5)"
2819 << endl << "// in z_e : fragment eye space z from vPositionES"
2820 << endl << "// out : 3D image coordinate"
2821 << endl << "//"
2822 << endl << "ivec3 gridAccessor("
2823 << endl << " in const vec2 p_w,"
2824 << endl << " in const float z_e)"
2825 << endl << "{"
2826 << endl << " ivec2 q_w = ivec2(p_w - vec2(0.5, 0.5));"
2827 << endl << " int k = cluster_k(z_e, "
2828 << getClusteringDataVariableName() << ".nD, "
2829 << getClusteringDataVariableName() << ".lg_nD, "
2830 << getClusteringDataVariableName() << ".zFar, "
2831 << getClusteringDataVariableName() << ".a, "
2832 << getClusteringDataVariableName() << ".c_1);"
2833 << endl << " ivec2 p = (q_w - " << getClusteringDataVariableName() << ".p_v) / tile_size;"
2834 << endl << " return ivec3(p.xy, k);"
2835 << endl << "}"
2836 << endl << ""
2837 << endl << "//"
2838 << endl << "// Retrieve the cluster light grid data."
2839 << endl << "// in p_w : xy-screen position provided by gl_FragCoord.xy: lower-left is (0.5, 0.5)"
2840 << endl << "// in z_e : fragment eye space z from vPositionES"
2841 << endl << "// out : (light index list start position, number of lights)"
2842 << endl << "//"
2843 << endl << "uvec2 getGridData("
2844 << endl << " in const vec2 p_w,"
2845 << endl << " in const float z_e)"
2846 << endl << "{"
2847 << endl << " ivec3 accessor = gridAccessor(p_w, z_e);"
2848 << endl << " return imageLoad(" << getLightGridVariableName() << ", accessor).xy;"
2849 << endl << "}"
2853 << endl
2856 return ost.str();
2859 std::string ClusterShadingStage::getFrustumProgSnippet() const
2861 using namespace std;
2863 stringstream ost;
2866 << endl << "//"
2867 << endl << "// A simple Plane abstraction"
2868 << endl << "//"
2869 << endl << "struct Plane"
2870 << endl << "{"
2871 << endl << " vec3 N;"
2872 << endl << " float d;"
2873 << endl << "};"
2874 << endl << ""
2875 << endl << "//"
2876 << endl << "// The left, right, top and bottom planes of the tile frustum"
2877 << endl << "//"
2878 << endl << "struct Frustum"
2879 << endl << "{"
2880 << endl << " Plane planes[4];"
2881 << endl << "};"
2882 << endl << ""
2883 << endl << "layout (std430) buffer Frustums"
2884 << endl << "{"
2885 << endl << " Frustum frustum[];"
2886 << endl << "} frustums;"
2887 << endl
2890 return ost.str();
2893 std::string ClusterShadingStage::getLightIndexProgSnippet() const
2895 using namespace std;
2897 stringstream ost;
2900 << endl << "//"
2901 << endl << "// Index list into the global light list that contain only"
2902 << endl << "// those lights that affect shading in the view frustum."
2903 << endl << "//"
2904 << endl << "layout (std430) buffer " << getAffectedLightIndexListBlockName()
2905 << endl << "{"
2906 << endl << " uint idx[];"
2907 << endl << "} " << getAffectedLightIndexListVariableName() << ";"
2908 << endl << ""
2909 << endl << "//"
2910 << endl << "// Index list into the global light list. All sequences of lights"
2911 << endl << "// contributing to shading of the individal clusters are assembled"
2912 << endl << "// in this list. The start position and number of lights for each"
2913 << endl << "// cluster are provided by the light grid image."
2914 << endl << "//"
2915 << endl << "layout (std430) buffer " << getLightIndexListBlockName()
2916 << endl << "{"
2917 << endl << " uint idx[];"
2918 << endl << "} " << getLightIndexListVariableName() << ";"
2919 << endl
2922 return ost.str();
2925 std::string ClusterShadingStage::getFragmentProgramSnippet() const
2927 using namespace std;
2929 stringstream ost;
2932 << endl << "//"
2933 << endl << "//-- ClusterShadingStage::getFragmentProgramSnippet() --"
2934 << endl << "//"
2935 << getMultiLightChunk()->getLightProgSnippet()
2936 << endl << ""
2937 << getClusteringProgSnippet(true)
2938 << endl << ""
2939 << getLightIndexProgSnippet()
2940 << endl << ""
2941 << endl << "//------------------------------------------------------"
2942 << endl << ""
2943 << endl
2946 return ost.str();
2949 OSG_END_NAMESPACE