nel/src/3d/mesh_mrm_skinned_template.cpp

   1 /**
   2  * File not compiled. Included from mesh_mrm_skinned.cpp. It is an "old school" template.
   3  */
   4
   5 // NeL - MMORPG Framework <http://dev.ryzom.com/projects/nel/>
   6 // Copyright (C) 2010  Winch Gate Property Limited
   7 //
   8 // This program is free software: you can redistribute it and/or modify
   9 // it under the terms of the GNU Affero General Public License as
  10 // published by the Free Software Foundation, either version 3 of the
  11 // License, or (at your option) any later version.
  12 //
  13 // This program is distributed in the hope that it will be useful,
  14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 // GNU Affero General Public License for more details.
  17 //
  18 // You should have received a copy of the GNU Affero General Public License
  19 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
  20
  21 #include "std3d.h"
  22
  23 #ifdef DEBUG_NEW
  24 #define new DEBUG_NEW
  25 #endif
  26
  27 // ***************************************************************************
  28 // ***************************************************************************
  29 // Raw "Vertex/Normal only" ApplySkin methods.
  30 // ***************************************************************************
  31 // ***************************************************************************
  32
  33 // ace: before including this, #define this define to use it
  34 //      the goal is to be able to compile every .cpp file with no
  35 //      special case (GNU/Linux needs)
  36 #ifdef ADD_MESH_MRM_SKINNED_TEMPLATE
  37
  38
  39 #define NL3D_RAWSKIN_NORMAL_OFF         12
  40 #define NL3D_RAWSKIN_UV_OFF                     24
  41 #define NL3D_RAWSKIN_VERTEX_SIZE        32
  42
  43
  44 /* Speed Feature test.
  45         Don't use precaching for now, cause its seems to be slower on some configs (P4-2.4Ghz),
  46         but maybe faster on other (P3-800)
  47         On a P4-2.4Ghz, for 40000 vertices skinned, both no precaching and asm
  48         saves 27% of execution time in the applyRawSkinNormal*() loop (ie 1 ms)
  49 */
  50 #if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
  51 //#define       NL3D_RAWSKIN_PRECACHE
  52 #define NL3D_RAWSKIN_ASM
  53 #endif
  54
  55
  56 // ***************************************************************************
  57 void            CMeshMRMSkinnedGeom::applyArrayRawSkinNormal1(CRawVertexNormalSkinned1 *src, uint8 *destVertexPtr,
  58         CMatrix3x4 *boneMat3x4, uint nInf)
  59 {
  60         // must write contigously in AGP, and ASM is hardcoded...
  61         nlctassert(NL3D_RAWSKIN_NORMAL_OFF==12);
  62         nlctassert(NL3D_RAWSKIN_UV_OFF==24);
  63
  64         /*extern        uint TESTYOYO_NumRawSkinVertices1;
  65         TESTYOYO_NumRawSkinVertices1+= nInf;
  66         H_AUTO( TestYoyo_RawSkin1 );*/
  67
  68 #ifdef  NL3D_RAWSKIN_PRECACHE
  69         for(;nInf>0;)
  70         {
  71                 // number of vertices to process for this block.
  72                 uint    nBlockInf= min(NumCacheVertexNormal1, nInf);
  73                 // next block.
  74                 nInf-= nBlockInf;
  75
  76                 // cache the data in L1 cache.
  77                 CFastMem::precache(src, nBlockInf * sizeof(CRawVertexNormalSkinned1));
  78 #else
  79         {
  80                 uint    nBlockInf= nInf;
  81 #endif
  82
  83
  84 #ifndef NL3D_RAWSKIN_ASM
  85                 //  for all InfluencedVertices only.
  86                 for(;nBlockInf>0;nBlockInf--, src++, destVertexPtr+=NL3D_RAWSKIN_VERTEX_SIZE)
  87                 {
  88                         CVector                         *dstVertex= (CVector*)(destVertexPtr);
  89                         CVector                         *dstNormal= (CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF);
  90
  91                         // For 1 matrix, can write directly to AGP (if destVertexPtr is AGP...)
  92                         // Vertex.
  93                         boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex, *(CVector*)(destVertexPtr) );
  94                         // Normal.
  95                         boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Normal, *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF) );
  96                         // UV copy.
  97                         *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->UV;
  98                 }
  99 #else
 100                 // ASM hard coded for 36
 101                 nlctassert(sizeof(CRawVertexNormalSkinned1)==36);
 102
 103                 /*  116 cycles / loop typical
 104                         58 cycles / loop in theory (no memory problem)
 105                 */
 106                 __asm
 107                 {
 108                         mov             ecx, nBlockInf
 109                         mov             esi, src
 110                         mov             edi, destVertexPtr
 111                         mov             edx, boneMat3x4
 112                 theLoop:
 113                         // Vertex.
 114                         // **** boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex, *(CVector*)(destVertexPtr) );
 115
 116                         // eax= matrix
 117                         mov             eax, [esi]src.MatrixId                          // uop: 0/1
 118                         lea             eax, [eax*2+eax]
 119                         shl             eax, 4
 120                         add             eax, edx                                                        // uop: 1/0
 121
 122                         // load x y z
 123                         fld             [esi]src.Vertex.x                                       // uop: 0/1
 124                         fld             [esi]src.Vertex.y                                       // uop: 0/1
 125                         fld             [esi]src.Vertex.z                                       // uop: 0/1
 126                         // vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14);
 127                         fld             [eax]CMatrix3x4.a11                             // uop: 0/1
 128                         fmul    st, st(3)                                                       // uop: 1/0 (5)
 129                         fld             [eax]CMatrix3x4.a12                             // uop: 0/1
 130                         fmul    st, st(3)                                                       // uop: 1/0 (5)
 131                         faddp   st(1), st                                                       // uop: 1/0 (3)
 132                         fld             [eax]CMatrix3x4.a13                             // uop: 0/1
 133                         fmul    st, st(2)                                                       // uop: 1/0 (5)
 134                         faddp   st(1), st                                                       // uop: 1/0 (3)
 135                         fld             [eax]CMatrix3x4.a14                             // uop: 0/1
 136                         faddp   st(1), st                                                       // uop: 1/0 (3)
 137                         fstp    dword ptr[edi]                                          // uop: 0/0/1/1
 138                         // vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24);
 139                         fld             [eax]CMatrix3x4.a21
 140                         fmul    st, st(3)
 141                         fld             [eax]CMatrix3x4.a22
 142                         fmul    st, st(3)
 143                         faddp   st(1), st
 144                         fld             [eax]CMatrix3x4.a23
 145                         fmul    st, st(2)
 146                         faddp   st(1), st
 147                         fld             [eax]CMatrix3x4.a24
 148                         faddp   st(1), st
 149                         fstp    dword ptr[edi+4]
 150                         // vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34);
 151                         fld             [eax]CMatrix3x4.a31
 152                         fmul    st, st(3)
 153                         fld             [eax]CMatrix3x4.a32
 154                         fmul    st, st(3)
 155                         faddp   st(1), st
 156                         fld             [eax]CMatrix3x4.a33
 157                         fmul    st, st(2)
 158                         faddp   st(1), st
 159                         fld             [eax]CMatrix3x4.a34
 160                         faddp   st(1), st
 161                         fstp    dword ptr[edi+8]
 162                         // free x y z
 163                         fstp    st                                                                      // uop: 1/0
 164                         fstp    st                                                                      // uop: 1/0
 165                         fstp    st                                                                      // uop: 1/0
 166
 167
 168                         // Normal
 169                         // **** boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Normal, *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF) );
 170
 171                         // load x y z
 172                         fld             [esi]src.Normal.x
 173                         fld             [esi]src.Normal.y
 174                         fld             [esi]src.Normal.z
 175                         // vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14);
 176                         fld             [eax]CMatrix3x4.a11                             // uop: 0/1
 177                         fmul    st, st(3)                                                       // uop: 1/0 (5)
 178                         fld             [eax]CMatrix3x4.a12                             // uop: 0/1
 179                         fmul    st, st(3)                                                       // uop: 1/0 (5)
 180                         faddp   st(1), st                                                       // uop: 1/0 (3)
 181                         fld             [eax]CMatrix3x4.a13                             // uop: 0/1
 182                         fmul    st, st(2)                                                       // uop: 1/0 (5)
 183                         faddp   st(1), st                                                       // uop: 1/0 (3)
 184                         fstp    dword ptr[edi+12]                                       // uop: 0/0/1/1
 185                         // vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24);
 186                         fld             [eax]CMatrix3x4.a21
 187                         fmul    st, st(3)
 188                         fld             [eax]CMatrix3x4.a22
 189                         fmul    st, st(3)
 190                         faddp   st(1), st
 191                         fld             [eax]CMatrix3x4.a23
 192                         fmul    st, st(2)
 193                         faddp   st(1), st
 194                         fstp    dword ptr[edi+16]
 195                         // vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34);
 196                         fld             [eax]CMatrix3x4.a31
 197                         fmul    st, st(3)
 198                         fld             [eax]CMatrix3x4.a32
 199                         fmul    st, st(3)
 200                         faddp   st(1), st
 201                         fld             [eax]CMatrix3x4.a33
 202                         fmul    st, st(2)
 203                         faddp   st(1), st
 204                         fstp    dword ptr[edi+20]
 205                         // free x y z
 206                         fstp    st
 207                         fstp    st
 208                         fstp    st
 209
 210
 211                         // UV copy.
 212                         // **** *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->UV;
 213                         mov             eax, [esi]src.UV.U                                      // uop: 0/1
 214                         mov             dword ptr[edi+24], eax                          // uop: 0/0/1/1
 215                         mov             eax, [esi]src.UV.V                                      // uop: 0/1
 216                         mov             dword ptr[edi+28], eax                          // uop: 0/0/1/1
 217
 218
 219                         // **** next
 220                         add             esi, 36                                                         // uop: 1/0
 221                         add             edi, NL3D_RAWSKIN_VERTEX_SIZE           // uop: 1/0
 222                         dec             ecx                                                                     // uop: 1/0
 223                         jnz             theLoop                                                         // uop: 1/1 (p1)
 224
 225                         mov             nBlockInf, ecx
 226                         mov             src, esi
 227                         mov             destVertexPtr, edi
 228                 }
 229 #endif
 230         }
 231
 232
 233 }
 234
 235 // ***************************************************************************
 236 void            CMeshMRMSkinnedGeom::applyArrayRawSkinNormal2(CRawVertexNormalSkinned2 *src, uint8 *destVertexPtr,
 237         CMatrix3x4 *boneMat3x4, uint nInf)
 238 {
 239         // must write contigously in AGP, and ASM is hardcoded...
 240         nlctassert(NL3D_RAWSKIN_NORMAL_OFF==12);
 241         nlctassert(NL3D_RAWSKIN_UV_OFF==24);
 242
 243         /*extern        uint TESTYOYO_NumRawSkinVertices2;
 244         TESTYOYO_NumRawSkinVertices2+= nInf;
 245         H_AUTO( TestYoyo_RawSkin2 );*/
 246
 247         // Since VertexPtr may be a AGP Ram, MUST NOT read into it! (mulAdd*() do it!)
 248         CVector tmpVert;
 249
 250 #ifdef  NL3D_RAWSKIN_PRECACHE
 251         for(;nInf>0;)
 252         {
 253                 // number of vertices to process for this block.
 254                 uint    nBlockInf= min(NumCacheVertexNormal2, nInf);
 255                 // next block.
 256                 nInf-= nBlockInf;
 257
 258                 // cache the data in L1 cache.
 259                 CFastMem::precache(src, nBlockInf * sizeof(CRawVertexNormalSkinned2));
 260 #else
 261         {
 262                 uint    nBlockInf= nInf;
 263 #endif
 264
 265
 266 #ifndef NL3D_RAWSKIN_ASM
 267                 //  for all InfluencedVertices only.
 268                 for(;nBlockInf>0;nBlockInf--, src++, destVertexPtr+=NL3D_RAWSKIN_VERTEX_SIZE)
 269                 {
 270                         // Vertex.
 271                         boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex, src->Weights[0], tmpVert);
 272                         boneMat3x4[ src->MatrixId[1] ].mulAddPoint( src->Vertex, src->Weights[1], tmpVert);
 273                         *(CVector*)(destVertexPtr)= tmpVert;
 274                         // Normal.
 275                         boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Normal, src->Weights[0], tmpVert);
 276                         boneMat3x4[ src->MatrixId[1] ].mulAddVector( src->Normal, src->Weights[1], tmpVert);
 277                         *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert;
 278                         // UV copy.
 279                         *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->UV;
 280                 }
 281 #else
 282                 // ASM harcoded for 48
 283                 nlctassert(sizeof(CRawVertexNormalSkinned2)==48);
 284
 285                 /*  154 cycles / loop typical
 286                         124 cycles / loop in theory (no memory problem)
 287                 */
 288                 __asm
 289                 {
 290                         mov             ecx, nBlockInf
 291                         mov             esi, src
 292                         mov             edi, destVertexPtr
 293                         mov             edx, boneMat3x4
 294                 theLoop:
 295                         // Vertex.
 296                         // **** boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex, *(CVector*)(destVertexPtr) );
 297
 298                         // eax= matrix0
 299                         mov             eax, [esi+0]src.MatrixId                        // uop: 0/1
 300                         lea             eax, [eax*2+eax]
 301                         shl             eax, 4
 302                         add             eax, edx                                                        // uop: 1/0
 303                         // ebx= matrix1
 304                         mov             ebx, [esi+4]src.MatrixId                        // uop: 0/1
 305                         lea             ebx, [ebx*2+ebx]
 306                         shl             ebx, 4
 307                         add             ebx, edx                                                        // uop: 1/0
 308
 309                         // load x y z
 310                         fld             [esi]src.Vertex.x                                       // uop: 0/1
 311                         fld             [esi]src.Vertex.y                                       // uop: 0/1
 312                         fld             [esi]src.Vertex.z                                       // uop: 0/1
 313
 314                         // **** vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14);
 315                         // 1st Matrix
 316                         fld             [eax]CMatrix3x4.a11                             // uop: 0/1
 317                         fmul    st, st(3)                                                       // uop: 1/0 (5)
 318                         fld             [eax]CMatrix3x4.a12                             // uop: 0/1
 319                         fmul    st, st(3)                                                       // uop: 1/0 (5)
 320                         faddp   st(1), st                                                       // uop: 1/0 (3)
 321                         fld             [eax]CMatrix3x4.a13                             // uop: 0/1
 322                         fmul    st, st(2)                                                       // uop: 1/0 (5)
 323                         faddp   st(1), st                                                       // uop: 1/0 (3)
 324                         fld             [eax]CMatrix3x4.a14                             // uop: 0/1
 325                         faddp   st(1), st                                                       // uop: 1/0 (3)
 326                         // mul by scale
 327                         fmul    [esi+0]src.Weights
 328
 329                         // 2nd matrix
 330                         fld             [ebx]CMatrix3x4.a11
 331                         fmul    st, st(4)
 332                         fld             [ebx]CMatrix3x4.a12
 333                         fmul    st, st(4)
 334                         faddp   st(1), st
 335                         fld             [ebx]CMatrix3x4.a13
 336                         fmul    st, st(3)
 337                         faddp   st(1), st
 338                         fld             [ebx]CMatrix3x4.a14
 339                         faddp   st(1), st
 340                         // mul by scale, and append
 341                         fmul    [esi+4]src.Weights
 342                         faddp   st(1), st
 343
 344                         // store
 345                         fstp    dword ptr[edi]                                          // uop: 0/0/1/1
 346
 347                         // **** vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24);
 348                         fld             [eax]CMatrix3x4.a21
 349                         fmul    st, st(3)
 350                         fld             [eax]CMatrix3x4.a22
 351                         fmul    st, st(3)
 352                         faddp   st(1), st
 353                         fld             [eax]CMatrix3x4.a23
 354                         fmul    st, st(2)
 355                         faddp   st(1), st
 356                         fld             [eax]CMatrix3x4.a24
 357                         faddp   st(1), st
 358                         // mul by scale
 359                         fmul    [esi+0]src.Weights
 360
 361                         // 2nd matrix
 362                         fld             [ebx]CMatrix3x4.a21
 363                         fmul    st, st(4)
 364                         fld             [ebx]CMatrix3x4.a22
 365                         fmul    st, st(4)
 366                         faddp   st(1), st
 367                         fld             [ebx]CMatrix3x4.a23
 368                         fmul    st, st(3)
 369                         faddp   st(1), st
 370                         fld             [ebx]CMatrix3x4.a24
 371                         faddp   st(1), st
 372                         // mul by scale, and append
 373                         fmul    [esi+4]src.Weights
 374                         faddp   st(1), st
 375
 376                         // store
 377                         fstp    dword ptr[edi+4]
 378
 379                         // **** vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34);
 380                         fld             [eax]CMatrix3x4.a31
 381                         fmul    st, st(3)
 382                         fld             [eax]CMatrix3x4.a32
 383                         fmul    st, st(3)
 384                         faddp   st(1), st
 385                         fld             [eax]CMatrix3x4.a33
 386                         fmul    st, st(2)
 387                         faddp   st(1), st
 388                         fld             [eax]CMatrix3x4.a34
 389                         faddp   st(1), st
 390                         // mul by scale
 391                         fmul    [esi+0]src.Weights
 392
 393                         // 2nd matrix
 394                         fld             [ebx]CMatrix3x4.a31
 395                         fmul    st, st(4)
 396                         fld             [ebx]CMatrix3x4.a32
 397                         fmul    st, st(4)
 398                         faddp   st(1), st
 399                         fld             [ebx]CMatrix3x4.a33
 400                         fmul    st, st(3)
 401                         faddp   st(1), st
 402                         fld             [ebx]CMatrix3x4.a34
 403                         faddp   st(1), st
 404                         // mul by scale, and append
 405                         fmul    [esi+4]src.Weights
 406                         faddp   st(1), st
 407
 408                         // store
 409                         fstp    dword ptr[edi+8]
 410
 411                         // free x y z
 412                         fstp    st                                                                      // uop: 1/0
 413                         fstp    st                                                                      // uop: 1/0
 414                         fstp    st                                                                      // uop: 1/0
 415
 416
 417                         // Normal
 418                         // **** boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Normal, *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF) );
 419
 420                         // load x y z
 421                         fld             [esi]src.Normal.x
 422                         fld             [esi]src.Normal.y
 423                         fld             [esi]src.Normal.z
 424
 425                         // **** vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14);
 426                         fld             [eax]CMatrix3x4.a11                             // uop: 0/1
 427                         fmul    st, st(3)                                                       // uop: 1/0 (5)
 428                         fld             [eax]CMatrix3x4.a12                             // uop: 0/1
 429                         fmul    st, st(3)                                                       // uop: 1/0 (5)
 430                         faddp   st(1), st                                                       // uop: 1/0 (3)
 431                         fld             [eax]CMatrix3x4.a13                             // uop: 0/1
 432                         fmul    st, st(2)                                                       // uop: 1/0 (5)
 433                         faddp   st(1), st                                                       // uop: 1/0 (3)
 434                         // mul by scale
 435                         fmul    [esi+0]src.Weights
 436
 437                         // 2nd matrix
 438                         fld             [ebx]CMatrix3x4.a11
 439                         fmul    st, st(4)
 440                         fld             [ebx]CMatrix3x4.a12
 441                         fmul    st, st(4)
 442                         faddp   st(1), st
 443                         fld             [ebx]CMatrix3x4.a13
 444                         fmul    st, st(3)
 445                         faddp   st(1), st
 446                         // mul by scale, and append
 447                         fmul    [esi+4]src.Weights
 448                         faddp   st(1), st
 449
 450                         // store
 451                         fstp    dword ptr[edi+12]                                       // uop: 0/0/1/1
 452
 453                         // **** vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24);
 454                         fld             [eax]CMatrix3x4.a21
 455                         fmul    st, st(3)
 456                         fld             [eax]CMatrix3x4.a22
 457                         fmul    st, st(3)
 458                         faddp   st(1), st
 459                         fld             [eax]CMatrix3x4.a23
 460                         fmul    st, st(2)
 461                         faddp   st(1), st
 462                         // mul by scale
 463                         fmul    [esi+0]src.Weights
 464
 465                         // 2nd matrix
 466                         fld             [ebx]CMatrix3x4.a21
 467                         fmul    st, st(4)
 468                         fld             [ebx]CMatrix3x4.a22
 469                         fmul    st, st(4)
 470                         faddp   st(1), st
 471                         fld             [ebx]CMatrix3x4.a23
 472                         fmul    st, st(3)
 473                         faddp   st(1), st
 474                         // mul by scale, and append
 475                         fmul    [esi+4]src.Weights
 476                         faddp   st(1), st
 477
 478                         // store
 479                         fstp    dword ptr[edi+16]
 480
 481                         // **** vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34);
 482                         fld             [eax]CMatrix3x4.a31
 483                         fmul    st, st(3)
 484                         fld             [eax]CMatrix3x4.a32
 485                         fmul    st, st(3)
 486                         faddp   st(1), st
 487                         fld             [eax]CMatrix3x4.a33
 488                         fmul    st, st(2)
 489                         faddp   st(1), st
 490                         // mul by scale
 491                         fmul    [esi+0]src.Weights
 492
 493                         // 2nd matrix
 494                         fld             [ebx]CMatrix3x4.a31
 495                         fmul    st, st(4)
 496                         fld             [ebx]CMatrix3x4.a32
 497                         fmul    st, st(4)
 498                         faddp   st(1), st
 499                         fld             [ebx]CMatrix3x4.a33
 500                         fmul    st, st(3)
 501                         faddp   st(1), st
 502                         // mul by scale, and append
 503                         fmul    [esi+4]src.Weights
 504                         faddp   st(1), st
 505
 506                         // store
 507                         fstp    dword ptr[edi+20]
 508
 509                         // free x y z
 510                         fstp    st
 511                         fstp    st
 512                         fstp    st
 513
 514
 515                         // UV copy.
 516                         // **** *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->UV;
 517                         mov             eax, [esi]src.UV.U                                      // uop: 0/1
 518                         mov             dword ptr[edi+24], eax                          // uop: 0/0/1/1
 519                         mov             eax, [esi]src.UV.V                                      // uop: 0/1
 520                         mov             dword ptr[edi+28], eax                          // uop: 0/0/1/1
 521
 522
 523                         // **** next
 524                         add             esi, 48                                                         // uop: 1/0
 525                         add             edi, NL3D_RAWSKIN_VERTEX_SIZE           // uop: 1/0
 526                         dec             ecx                                                                     // uop: 1/0
 527                         jnz             theLoop                                                         // uop: 1/1 (p1)
 528
 529                         mov             nBlockInf, ecx
 530                         mov             src, esi
 531                         mov             destVertexPtr, edi
 532                 }
 533 #endif
 534         }
 535
 536 }
 537
 538 // ***************************************************************************
 539 void            CMeshMRMSkinnedGeom::applyArrayRawSkinNormal3(CRawVertexNormalSkinned3 *src, uint8 *destVertexPtr,
 540         CMatrix3x4 *boneMat3x4, uint nInf)
 541 {
 542         // must write contigously in AGP, and ASM is hardcoded...
 543         nlctassert(NL3D_RAWSKIN_NORMAL_OFF==12);
 544         nlctassert(NL3D_RAWSKIN_UV_OFF==24);
 545
 546         /*extern        uint TESTYOYO_NumRawSkinVertices3;
 547         TESTYOYO_NumRawSkinVertices3+= nInf;
 548         H_AUTO( TestYoyo_RawSkin3 );*/
 549
 550         // Since VertexPtr may be a AGP Ram, MUST NOT read into it! (mulAdd*() do it!)
 551         CVector tmpVert;
 552
 553 #ifdef  NL3D_RAWSKIN_PRECACHE
 554         for(;nInf>0;)
 555         {
 556                 // number of vertices to process for this block.
 557                 uint    nBlockInf= min(NumCacheVertexNormal3, nInf);
 558                 // next block.
 559                 nInf-= nBlockInf;
 560
 561                 // cache the data in L1 cache.
 562                 CFastMem::precache(src, nBlockInf * sizeof(CRawVertexNormalSkinned3));
 563 #else
 564         {
 565                 uint    nBlockInf= nInf;
 566 #endif
 567
 568
 569 #ifndef NL3D_RAWSKIN_ASM
 570                 //  for all InfluencedVertices only.
 571                 for(;nBlockInf>0;nBlockInf--, src++, destVertexPtr+=NL3D_RAWSKIN_VERTEX_SIZE)
 572                 {
 573                         // Vertex.
 574                         boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex, src->Weights[0], tmpVert);
 575                         boneMat3x4[ src->MatrixId[1] ].mulAddPoint( src->Vertex, src->Weights[1], tmpVert);
 576                         boneMat3x4[ src->MatrixId[2] ].mulAddPoint( src->Vertex, src->Weights[2], tmpVert);
 577                         *(CVector*)(destVertexPtr)= tmpVert;
 578                         // Normal.
 579                         boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Normal, src->Weights[0], tmpVert);
 580                         boneMat3x4[ src->MatrixId[1] ].mulAddVector( src->Normal, src->Weights[1], tmpVert);
 581                         boneMat3x4[ src->MatrixId[2] ].mulAddVector( src->Normal, src->Weights[2], tmpVert);
 582                         *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert;
 583                         // UV copy.
 584                         *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->UV;
 585                 }
 586 #else
 587                 // ASM hard coded for 56
 588                 nlctassert(sizeof(CRawVertexNormalSkinned3)==56);
 589
 590
 591                 /*  226 cycles / loop typical
 592                         192 cycles / loop in theory (no memory problem)
 593                         148 optimal
 594                 */
 595                 __asm
 596                 {
 597                         mov             ecx, nBlockInf
 598                         mov             esi, src
 599                         mov             edi, destVertexPtr
 600                 theLoop:
 601                         // Vertex.
 602                         // **** boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex, *(CVector*)(destVertexPtr) );
 603
 604                         // eax= matrix0
 605                         mov             eax, [esi+0]src.MatrixId                        // uop: 0/1
 606                         lea             eax, [eax*2+eax]
 607                         shl             eax, 4
 608                         add             eax, boneMat3x4                                         // uop: 1/0
 609                         // ebx= matrix1
 610                         mov             ebx, [esi+4]src.MatrixId                        // uop: 0/1
 611                         lea             ebx, [ebx*2+ebx]
 612                         shl             ebx, 4
 613                         add             ebx, boneMat3x4                                         // uop: 1/0
 614                         // edx= matrix2
 615                         mov             edx, [esi+8]src.MatrixId                        // uop: 0/1
 616                         lea             edx, [edx*2+edx]
 617                         shl             edx, 4
 618                         add             edx, boneMat3x4                                         // uop: 1/0
 619
 620                         // load x y z
 621                         fld             [esi]src.Vertex.x                                       // uop: 0/1
 622                         fld             [esi]src.Vertex.y                                       // uop: 0/1
 623                         fld             [esi]src.Vertex.z                                       // uop: 0/1
 624
 625                         // **** vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14);
 626                         // 1st Matrix
 627                         fld             [eax]CMatrix3x4.a11                             // uop: 0/1
 628                         fmul    st, st(3)                                                       // uop: 1/0 (5)
 629                         fld             [eax]CMatrix3x4.a12                             // uop: 0/1
 630                         fmul    st, st(3)                                                       // uop: 1/0 (5)
 631                         faddp   st(1), st                                                       // uop: 1/0 (3)
 632                         fld             [eax]CMatrix3x4.a13                             // uop: 0/1
 633                         fmul    st, st(2)                                                       // uop: 1/0 (5)
 634                         faddp   st(1), st                                                       // uop: 1/0 (3)
 635                         fld             [eax]CMatrix3x4.a14                             // uop: 0/1
 636                         faddp   st(1), st                                                       // uop: 1/0 (3)
 637                         // mul by scale
 638                         fmul    [esi+0]src.Weights
 639
 640                         // 2nd matrix
 641                         fld             [ebx]CMatrix3x4.a11
 642                         fmul    st, st(4)
 643                         fld             [ebx]CMatrix3x4.a12
 644                         fmul    st, st(4)
 645                         faddp   st(1), st
 646                         fld             [ebx]CMatrix3x4.a13
 647                         fmul    st, st(3)
 648                         faddp   st(1), st
 649                         fld             [ebx]CMatrix3x4.a14
 650                         faddp   st(1), st
 651                         // mul by scale, and append
 652                         fmul    [esi+4]src.Weights
 653                         faddp   st(1), st
 654
 655                         // 3rd matrix
 656                         fld             [edx]CMatrix3x4.a11
 657                         fmul    st, st(4)
 658                         fld             [edx]CMatrix3x4.a12
 659                         fmul    st, st(4)
 660                         faddp   st(1), st
 661                         fld             [edx]CMatrix3x4.a13
 662                         fmul    st, st(3)
 663                         faddp   st(1), st
 664                         fld             [edx]CMatrix3x4.a14
 665                         faddp   st(1), st
 666                         // mul by scale, and append
 667                         fmul    [esi+8]src.Weights
 668                         faddp   st(1), st
 669
 670                         // store
 671                         fstp    dword ptr[edi]                                          // uop: 0/0/1/1
 672
 673                         // **** vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24);
 674                         fld             [eax]CMatrix3x4.a21
 675                         fmul    st, st(3)
 676                         fld             [eax]CMatrix3x4.a22
 677                         fmul    st, st(3)
 678                         faddp   st(1), st
 679                         fld             [eax]CMatrix3x4.a23
 680                         fmul    st, st(2)
 681                         faddp   st(1), st
 682                         fld             [eax]CMatrix3x4.a24
 683                         faddp   st(1), st
 684                         // mul by scale
 685                         fmul    [esi+0]src.Weights
 686
 687                         // 2nd matrix
 688                         fld             [ebx]CMatrix3x4.a21
 689                         fmul    st, st(4)
 690                         fld             [ebx]CMatrix3x4.a22
 691                         fmul    st, st(4)
 692                         faddp   st(1), st
 693                         fld             [ebx]CMatrix3x4.a23
 694                         fmul    st, st(3)
 695                         faddp   st(1), st
 696                         fld             [ebx]CMatrix3x4.a24
 697                         faddp   st(1), st
 698                         // mul by scale, and append
 699                         fmul    [esi+4]src.Weights
 700                         faddp   st(1), st
 701
 702                         // 3rd matrix
 703                         fld             [edx]CMatrix3x4.a21
 704                         fmul    st, st(4)
 705                         fld             [edx]CMatrix3x4.a22
 706                         fmul    st, st(4)
 707                         faddp   st(1), st
 708                         fld             [edx]CMatrix3x4.a23
 709                         fmul    st, st(3)
 710                         faddp   st(1), st
 711                         fld             [edx]CMatrix3x4.a24
 712                         faddp   st(1), st
 713                         // mul by scale, and append
 714                         fmul    [esi+8]src.Weights
 715                         faddp   st(1), st
 716
 717                         // store
 718                         fstp    dword ptr[edi+4]
 719
 720                         // **** vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34);
 721                         fld             [eax]CMatrix3x4.a31
 722                         fmul    st, st(3)
 723                         fld             [eax]CMatrix3x4.a32
 724                         fmul    st, st(3)
 725                         faddp   st(1), st
 726                         fld             [eax]CMatrix3x4.a33
 727                         fmul    st, st(2)
 728                         faddp   st(1), st
 729                         fld             [eax]CMatrix3x4.a34
 730                         faddp   st(1), st
 731                         // mul by scale
 732                         fmul    [esi+0]src.Weights
 733
 734                         // 2nd matrix
 735                         fld             [ebx]CMatrix3x4.a31
 736                         fmul    st, st(4)
 737                         fld             [ebx]CMatrix3x4.a32
 738                         fmul    st, st(4)
 739                         faddp   st(1), st
 740                         fld             [ebx]CMatrix3x4.a33
 741                         fmul    st, st(3)
 742                         faddp   st(1), st
 743                         fld             [ebx]CMatrix3x4.a34
 744                         faddp   st(1), st
 745                         // mul by scale, and append
 746                         fmul    [esi+4]src.Weights
 747                         faddp   st(1), st
 748
 749                         // 3rd matrix
 750                         fld             [edx]CMatrix3x4.a31
 751                         fmul    st, st(4)
 752                         fld             [edx]CMatrix3x4.a32
 753                         fmul    st, st(4)
 754                         faddp   st(1), st
 755                         fld             [edx]CMatrix3x4.a33
 756                         fmul    st, st(3)
 757                         faddp   st(1), st
 758                         fld             [edx]CMatrix3x4.a34
 759                         faddp   st(1), st
 760                         // mul by scale, and append
 761                         fmul    [esi+8]src.Weights
 762                         faddp   st(1), st
 763
 764                         // store
 765                         fstp    dword ptr[edi+8]
 766
 767                         // free x y z
 768                         fstp    st                                                                      // uop: 1/0
 769                         fstp    st                                                                      // uop: 1/0
 770                         fstp    st                                                                      // uop: 1/0
 771
 772
 773                         // Normal
 774                         // **** boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Normal, *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF) );
 775
 776                         // load x y z
 777                         fld             [esi]src.Normal.x
 778                         fld             [esi]src.Normal.y
 779                         fld             [esi]src.Normal.z
 780                         // **** vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14);
 781                         fld             [eax]CMatrix3x4.a11                             // uop: 0/1
 782                         fmul    st, st(3)                                                       // uop: 1/0 (5)
 783                         fld             [eax]CMatrix3x4.a12                             // uop: 0/1
 784                         fmul    st, st(3)                                                       // uop: 1/0 (5)
 785                         faddp   st(1), st                                                       // uop: 1/0 (3)
 786                         fld             [eax]CMatrix3x4.a13                             // uop: 0/1
 787                         fmul    st, st(2)                                                       // uop: 1/0 (5)
 788                         faddp   st(1), st                                                       // uop: 1/0 (3)
 789                         // mul by scale
 790                         fmul    [esi+0]src.Weights
 791
 792                         // 2nd matrix
 793                         fld             [ebx]CMatrix3x4.a11
 794                         fmul    st, st(4)
 795                         fld             [ebx]CMatrix3x4.a12
 796                         fmul    st, st(4)
 797                         faddp   st(1), st
 798                         fld             [ebx]CMatrix3x4.a13
 799                         fmul    st, st(3)
 800                         faddp   st(1), st
 801                         // mul by scale, and append
 802                         fmul    [esi+4]src.Weights
 803                         faddp   st(1), st
 804
 805                         // 3rd matrix
 806                         fld             [edx]CMatrix3x4.a11
 807                         fmul    st, st(4)
 808                         fld             [edx]CMatrix3x4.a12
 809                         fmul    st, st(4)
 810                         faddp   st(1), st
 811                         fld             [edx]CMatrix3x4.a13
 812                         fmul    st, st(3)
 813                         faddp   st(1), st
 814                         // mul by scale, and append
 815                         fmul    [esi+8]src.Weights
 816                         faddp   st(1), st
 817
 818                         // store
 819                         fstp    dword ptr[edi+12]                                       // uop: 0/0/1/1
 820
 821                         // **** vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24);
 822                         fld             [eax]CMatrix3x4.a21
 823                         fmul    st, st(3)
 824                         fld             [eax]CMatrix3x4.a22
 825                         fmul    st, st(3)
 826                         faddp   st(1), st
 827                         fld             [eax]CMatrix3x4.a23
 828                         fmul    st, st(2)
 829                         faddp   st(1), st
 830                         // mul by scale
 831                         fmul    [esi+0]src.Weights
 832
 833                         // 2nd matrix
 834                         fld             [ebx]CMatrix3x4.a21
 835                         fmul    st, st(4)
 836                         fld             [ebx]CMatrix3x4.a22
 837                         fmul    st, st(4)
 838                         faddp   st(1), st
 839                         fld             [ebx]CMatrix3x4.a23
 840                         fmul    st, st(3)
 841                         faddp   st(1), st
 842                         // mul by scale, and append
 843                         fmul    [esi+4]src.Weights
 844                         faddp   st(1), st
 845
 846                         // 3rd matrix
 847                         fld             [edx]CMatrix3x4.a21
 848                         fmul    st, st(4)
 849                         fld             [edx]CMatrix3x4.a22
 850                         fmul    st, st(4)
 851                         faddp   st(1), st
 852                         fld             [edx]CMatrix3x4.a23
 853                         fmul    st, st(3)
 854                         faddp   st(1), st
 855                         // mul by scale, and append
 856                         fmul    [esi+8]src.Weights
 857                         faddp   st(1), st
 858
 859                         // store
 860                         fstp    dword ptr[edi+16]
 861
 862                         // **** vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34);
 863                         fld             [eax]CMatrix3x4.a31
 864                         fmul    st, st(3)
 865                         fld             [eax]CMatrix3x4.a32
 866                         fmul    st, st(3)
 867                         faddp   st(1), st
 868                         fld             [eax]CMatrix3x4.a33
 869                         fmul    st, st(2)
 870                         faddp   st(1), st
 871                         // mul by scale
 872                         fmul    [esi+0]src.Weights
 873
 874                         // 2nd matrix
 875                         fld             [ebx]CMatrix3x4.a31
 876                         fmul    st, st(4)
 877                         fld             [ebx]CMatrix3x4.a32
 878                         fmul    st, st(4)
 879                         faddp   st(1), st
 880                         fld             [ebx]CMatrix3x4.a33
 881                         fmul    st, st(3)
 882                         faddp   st(1), st
 883                         // mul by scale, and append
 884                         fmul    [esi+4]src.Weights
 885                         faddp   st(1), st
 886
 887                         // 3rd matrix
 888                         fld             [edx]CMatrix3x4.a31
 889                         fmul    st, st(4)
 890                         fld             [edx]CMatrix3x4.a32
 891                         fmul    st, st(4)
 892                         faddp   st(1), st
 893                         fld             [edx]CMatrix3x4.a33
 894                         fmul    st, st(3)
 895                         faddp   st(1), st
 896                         // mul by scale, and append
 897                         fmul    [esi+8]src.Weights
 898                         faddp   st(1), st
 899
 900                         // store
 901                         fstp    dword ptr[edi+20]
 902
 903                         // free x y z
 904                         fstp    st
 905                         fstp    st
 906                         fstp    st
 907
 908
 909                         // UV copy.
 910                         // **** *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->UV;
 911                         mov             eax, [esi]src.UV.U                                      // uop: 0/1
 912                         mov             dword ptr[edi+24], eax                          // uop: 0/0/1/1
 913                         mov             eax, [esi]src.UV.V                                      // uop: 0/1
 914                         mov             dword ptr[edi+28], eax                          // uop: 0/0/1/1
 915
 916
 917                         // **** next
 918                         add             esi, 56                                                         // uop: 1/0
 919                         add             edi, NL3D_RAWSKIN_VERTEX_SIZE           // uop: 1/0
 920                         dec             ecx                                                                     // uop: 1/0
 921                         jnz             theLoop                                                         // uop: 1/1 (p1)
 922
 923                         mov             nBlockInf, ecx
 924                         mov             src, esi
 925                         mov             destVertexPtr, edi
 926                 }
 927 #endif
 928
 929         }
 930 }
 931
 932 // ***************************************************************************
 933 void            CMeshMRMSkinnedGeom::applyArrayRawSkinNormal4(CRawVertexNormalSkinned4 *src, uint8 *destVertexPtr,
 934         CMatrix3x4 *boneMat3x4, uint nInf)
 935 {
 936         // must write contigously in AGP, and ASM is hardcoded...
 937         nlctassert(NL3D_RAWSKIN_NORMAL_OFF==12);
 938         nlctassert(NL3D_RAWSKIN_UV_OFF==24);
 939
 940         /*extern        uint TESTYOYO_NumRawSkinVertices4;
 941         TESTYOYO_NumRawSkinVertices4+= nInf;
 942         H_AUTO( TestYoyo_RawSkin4 );*/
 943
 944         // Since VertexPtr may be a AGP Ram, MUST NOT read into it! (mulAdd*() do it!)
 945         CVector tmpVert;
 946
 947 #ifdef  NL3D_RAWSKIN_PRECACHE
 948         for(;nInf>0;)
 949         {
 950                 // number of vertices to process for this block.
 951                 uint    nBlockInf= min(NumCacheVertexNormal4, nInf);
 952                 // next block.
 953                 nInf-= nBlockInf;
 954
 955                 // cache the data in L1 cache.
 956                 CFastMem::precache(src, nBlockInf * sizeof(CRawVertexNormalSkinned4));
 957 #else
 958         {
 959                 uint    nBlockInf= nInf;
 960 #endif
 961
 962                 //  for all InfluencedVertices only.
 963                 for(;nBlockInf>0;nBlockInf--, src++, destVertexPtr+=NL3D_RAWSKIN_VERTEX_SIZE)
 964                 {
 965                         // Vertex.
 966                         boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex, src->Weights[0], tmpVert);
 967                         boneMat3x4[ src->MatrixId[1] ].mulAddPoint( src->Vertex, src->Weights[1], tmpVert);
 968                         boneMat3x4[ src->MatrixId[2] ].mulAddPoint( src->Vertex, src->Weights[2], tmpVert);
 969                         boneMat3x4[ src->MatrixId[3] ].mulAddPoint( src->Vertex, src->Weights[3], tmpVert);
 970                         *(CVector*)(destVertexPtr)= tmpVert;
 971                         // Normal.
 972                         boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Normal, src->Weights[0], tmpVert);
 973                         boneMat3x4[ src->MatrixId[1] ].mulAddVector( src->Normal, src->Weights[1], tmpVert);
 974                         boneMat3x4[ src->MatrixId[2] ].mulAddVector( src->Normal, src->Weights[2], tmpVert);
 975                         boneMat3x4[ src->MatrixId[3] ].mulAddVector( src->Normal, src->Weights[3], tmpVert);
 976                         *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert;
 977                         // UV copy.
 978                         *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->UV;
 979                 }
 980
 981                 // NB: ASM not done for 4 vertices, cause very rare and negligeable ...
 982         }
 983 }
 984
 985
 986 // ***************************************************************************
 987 void    CMeshMRMSkinnedGeom::applyRawSkinWithNormal(CLod &lod, CRawSkinnedNormalCache &rawSkinLod, const CSkeletonModel *skeleton, uint8 *vbHard, float alphaLod)
 988 {
 989         // Some assert
 990         //===========================
 991
 992         // assert, code below is written especially for 4 per vertex.
 993         nlassert( NL3D_MESH_MRM_SKINNED_MAX_MATRIX==4 );
 994
 995
 996         // Compute useful Matrix for this lod.
 997         //===========================
 998         // Those arrays map the array of bones in skeleton.
 999         static  vector<CMatrix3x4>                      boneMat3x4;
1000         computeBoneMatrixes3x4(boneMat3x4, lod.MatrixInfluences, skeleton);
1001
1002
1003         // TestYoyo
1004         /*extern        uint TESTYOYO_NumRawSkinVertices;
1005         TESTYOYO_NumRawSkinVertices+= rawSkinLod.Vertices1.size();
1006         TESTYOYO_NumRawSkinVertices+= rawSkinLod.Vertices2.size();
1007         TESTYOYO_NumRawSkinVertices+= rawSkinLod.Vertices3.size();
1008         TESTYOYO_NumRawSkinVertices+= rawSkinLod.Vertices4.size();*/
1009
1010
1011         uint    nInf;
1012
1013         // Manage "SoftVertices"
1014         if(rawSkinLod.TotalSoftVertices)
1015         {
1016                 // apply skinning into Temp RAM for vertices that are Src of Geomorph
1017                 //===========================
1018                 static  vector<uint8>   tempSkin;
1019                 uint    tempVbSize= rawSkinLod.TotalSoftVertices*NL3D_RAWSKIN_VERTEX_SIZE;
1020                 if(tempSkin.size() < tempVbSize)
1021                         tempSkin.resize(tempVbSize);
1022                 uint8           *destVertexPtr= &tempSkin[0];
1023
1024                 // 1 Matrix
1025                 nInf= rawSkinLod.SoftVertices[0];
1026                 if(nInf>0)
1027                 {
1028                         applyArrayRawSkinNormal1(&rawSkinLod.Vertices1[0], destVertexPtr, &boneMat3x4[0], nInf);
1029                         destVertexPtr+= nInf * NL3D_RAWSKIN_VERTEX_SIZE;
1030                 }
1031                 // 2 Matrix
1032                 nInf= rawSkinLod.SoftVertices[1];
1033                 if(nInf>0)
1034                 {
1035                         applyArrayRawSkinNormal2(&rawSkinLod.Vertices2[0], destVertexPtr, &boneMat3x4[0], nInf);
1036                         destVertexPtr+= nInf * NL3D_RAWSKIN_VERTEX_SIZE;
1037                 }
1038                 // 3 Matrix
1039                 nInf= rawSkinLod.SoftVertices[2];
1040                 if(nInf>0)
1041                 {
1042                         applyArrayRawSkinNormal3(&rawSkinLod.Vertices3[0], destVertexPtr, &boneMat3x4[0], nInf);
1043                         destVertexPtr+= nInf * NL3D_RAWSKIN_VERTEX_SIZE;
1044                 }
1045                 // 4 Matrix
1046                 nInf= rawSkinLod.SoftVertices[3];
1047                 if(nInf>0)
1048                 {
1049                         applyArrayRawSkinNormal4(&rawSkinLod.Vertices4[0], destVertexPtr, &boneMat3x4[0], nInf);
1050                         destVertexPtr+= nInf * NL3D_RAWSKIN_VERTEX_SIZE;
1051                 }
1052
1053                 // Fast Copy this into AGP Ram. NB: done before Geomorphs, because ensure some precaching this way!!
1054                 //===========================
1055                 // Skin geomorphs.
1056                 uint8   *vbHardStart= vbHard + rawSkinLod.Geomorphs.size()*NL3D_RAWSKIN_VERTEX_SIZE;
1057
1058                 // fast copy
1059                 CFastMem::memcpy(vbHardStart, &tempSkin[0], tempVbSize);
1060
1061                 // Geomorphs directly into AGP Ram
1062                 //===========================
1063                 clamp(alphaLod, 0.f, 1.f);
1064                 float           a= alphaLod;
1065                 float           a1= 1 - alphaLod;
1066
1067                 // Fast Geomorph
1068                 applyGeomorphPosNormalUV0(rawSkinLod.Geomorphs, &tempSkin[0], vbHard, NL3D_RAWSKIN_VERTEX_SIZE, a, a1);
1069         }
1070
1071         // Manage HardVertices
1072         if(rawSkinLod.TotalHardVertices)
1073         {
1074                 // apply skinning directly into AGP RAM for vertices that are not Src of Geomorph
1075                 //===========================
1076                 uint    startId;
1077
1078                 // Skip Geomorphs and SoftVertices.
1079                 uint8           *destVertexPtr= vbHard + (rawSkinLod.Geomorphs.size()+rawSkinLod.TotalSoftVertices)*NL3D_RAWSKIN_VERTEX_SIZE;
1080
1081                 // 1 Matrix
1082                 nInf= rawSkinLod.HardVertices[0];
1083                 startId= rawSkinLod.SoftVertices[0];
1084                 if(nInf>0)
1085                 {
1086                         applyArrayRawSkinNormal1(&rawSkinLod.Vertices1[startId], destVertexPtr, &boneMat3x4[0], nInf);
1087                         destVertexPtr+= nInf * NL3D_RAWSKIN_VERTEX_SIZE;
1088                 }
1089                 // 2 Matrix
1090                 nInf= rawSkinLod.HardVertices[1];
1091                 startId= rawSkinLod.SoftVertices[1];
1092                 if(nInf>0)
1093                 {
1094                         applyArrayRawSkinNormal2(&rawSkinLod.Vertices2[startId], destVertexPtr, &boneMat3x4[0], nInf);
1095                         destVertexPtr+= nInf * NL3D_RAWSKIN_VERTEX_SIZE;
1096                 }
1097                 // 3 Matrix
1098                 nInf= rawSkinLod.HardVertices[2];
1099                 startId= rawSkinLod.SoftVertices[2];
1100                 if(nInf>0)
1101                 {
1102                         applyArrayRawSkinNormal3(&rawSkinLod.Vertices3[startId], destVertexPtr, &boneMat3x4[0], nInf);
1103                         destVertexPtr+= nInf * NL3D_RAWSKIN_VERTEX_SIZE;
1104                 }
1105                 // 4 Matrix
1106                 nInf= rawSkinLod.HardVertices[3];
1107                 startId= rawSkinLod.SoftVertices[3];
1108                 if(nInf>0)
1109                 {
1110                         applyArrayRawSkinNormal4(&rawSkinLod.Vertices4[startId], destVertexPtr, &boneMat3x4[0], nInf);
1111                         destVertexPtr+= nInf * NL3D_RAWSKIN_VERTEX_SIZE;
1112                 }
1113         }
1114
1115 }
1116
1117 #endif // ADD_MESH_MRM_SKINNED_TEMPLATE