Fix potential problem in Messenger related to MPI window
[hoomd-blue.git] / libhoomd / cuda / TwoStepNPTMTKGPU.cu
blob0c936429a0abbc0dc0d3faf73b1c34d6b414de8d
1 /*
2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
4 the University of Michigan All rights reserved.
6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
7 copyright is held, by various Contributors who have granted The Regents of the
8 University of Michigan the right to modify and/or distribute such Contributions.
10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
11 and binary forms, provided you abide by the following conditions:
13 * Redistributions of source code must retain the above copyright notice, this
14 list of conditions, and the following disclaimer both in the code and
15 prominently in any materials provided with the distribution.
17 * Redistributions in binary form must reproduce the above copyright notice, this
18 list of conditions, and the following disclaimer in the documentation and/or
19 other materials provided with the distribution.
21 * All publications and presentations based on HOOMD-blue, including any reports
22 or published results obtained, in whole or in part, with HOOMD-blue, will
23 acknowledge its use according to the terms posted at the time of submission on:
24 http://codeblue.umich.edu/hoomd-blue/citations.html
26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
27 http://codeblue.umich.edu/hoomd-blue/
29 * Apart from the above required attributions, neither the name of the copyright
30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
31 promote products derived from this software without specific prior written
32 permission.
34 Disclaimer
36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
50 // Maintainer: jglaser
52 #include "TwoStepNPTMTKGPU.cuh"
54 #ifdef WIN32
55 #include <cassert>
56 #else
57 #include <assert.h>
58 #endif
60 /*! \file TwoStepNPTMTKGPU.cu
61     \brief Defines GPU kernel code for NPT integration on the GPU using the Martyna-Tobias-Klein update equations. Used by TwoStepNPTMTKGPU.
64 //! Shared memory used in reducing the sum of the squared velocities
65 extern __shared__ Scalar npt_mtk_sdata[];
67 //! Kernel to propagate the positions and velocities, first half of NPT update
68 __global__ void gpu_npt_mtk_step_one_kernel(Scalar4 *d_pos,
69                              Scalar4 *d_vel,
70                              const Scalar3 *d_accel,
71                              unsigned int *d_group_members,
72                              unsigned int group_size,
73                              Scalar exp_thermo_fac,
74                              Scalar mat_exp_v_xx,
75                              Scalar mat_exp_v_xy,
76                              Scalar mat_exp_v_xz,
77                              Scalar mat_exp_v_yy,
78                              Scalar mat_exp_v_yz,
79                              Scalar mat_exp_v_zz,
80                              Scalar mat_exp_v_int_xx,
81                              Scalar mat_exp_v_int_xy,
82                              Scalar mat_exp_v_int_xz,
83                              Scalar mat_exp_v_int_yy,
84                              Scalar mat_exp_v_int_yz,
85                              Scalar mat_exp_v_int_zz,
86                              Scalar mat_exp_r_xx,
87                              Scalar mat_exp_r_xy,
88                              Scalar mat_exp_r_xz,
89                              Scalar mat_exp_r_yy,
90                              Scalar mat_exp_r_yz,
91                              Scalar mat_exp_r_zz,
92                              Scalar mat_exp_r_int_xx,
93                              Scalar mat_exp_r_int_xy,
94                              Scalar mat_exp_r_int_xz,
95                              Scalar mat_exp_r_int_yy,
96                              Scalar mat_exp_r_int_yz,
97                              Scalar mat_exp_r_int_zz,
98                              Scalar deltaT,
99                              bool rescale_all)
100     {
101     // determine which particle this thread works on
102     int group_idx = blockIdx.x * blockDim.x + threadIdx.x;
104     // initialize eigenvectors
105     if (group_idx < group_size)
106         {
107         unsigned int idx = d_group_members[group_idx];
109         // fetch particle position
110         Scalar4 pos = d_pos[idx];
112         // fetch particle velocity and acceleration
113         Scalar4 vel = d_vel[idx];
114         Scalar3 v = make_scalar3(vel.x, vel.y, vel.z);
115         Scalar3 accel = d_accel[idx];;
116         Scalar3 r = make_scalar3(pos.x, pos.y, pos.z);
118         // apply thermostat update of velocity
119         v *= exp_thermo_fac;
121         // propagate velocity by half a time step and position by the full time step
122         // by multiplying with upper triangular matrix
123         v.x = mat_exp_v_xx * v.x + mat_exp_v_xy * v.y + mat_exp_v_xz * v.z;
124         v.y = mat_exp_v_yy * v.y + mat_exp_v_yz * v.z;
125         v.z = mat_exp_v_zz * v.z;
127         v.x += mat_exp_v_int_xx * accel.x + mat_exp_v_int_xy * accel.y + mat_exp_v_int_xz * accel.z;
128         v.y += mat_exp_v_int_yy * accel.y + mat_exp_v_int_yz * accel.z;
129         v.z += mat_exp_v_int_zz * accel.z;
131         if (!rescale_all)
132             {
133             // rescale this group of particles
134             r.x = mat_exp_r_xx * r.x + mat_exp_r_xy * r.y + mat_exp_r_xz * r.z;
135             r.y = mat_exp_r_yy * r.y + mat_exp_r_yz * r.z;
136             r.z = mat_exp_r_zz * r.z;
137             }
139         r.x += mat_exp_r_int_xx * v.x + mat_exp_r_int_xy * v.y + mat_exp_r_int_xz * v.z;
140         r.y += mat_exp_r_int_yy * v.y + mat_exp_r_int_yz * v.z;
141         r.z += mat_exp_r_int_zz * v.z;
143         // write out the results
144         d_pos[idx] = make_scalar4(r.x,r.y,r.z,pos.w);
145         d_vel[idx] = make_scalar4(v.x,v.y,v.z,vel.w);
146         }
147     }
149 /*! \param d_pos array of particle positions
150     \param d_vel array of particle velocities
151     \param d_accel array of particle accelerations
152     \param d_group_members Device array listing the indicies of the mebers of the group to integrate
153     \param group_size Number of members in the group
154     \param exp_thermo_fac Update factor for thermostat
155     \param mat_exp_v Matrix exponential for velocity update
156     \param mat_exp_v_int Integrated matrix exp for velocity update
157     \param mat_exp_r Matrix exponential for position update
158     \param mat_exp_r_int Integrated matrix exp for position update
159     \param deltaT Time to advance (for one full step)
160     \param deltaT Time to move forward in one whole step
161     \param rescale_all True if all particles in the system should be rescaled at once
163     This is just a kernel driver for gpu_npt_mtk_step_one_kernel(). See it for more details.
165 cudaError_t gpu_npt_mtk_step_one(Scalar4 *d_pos,
166                              Scalar4 *d_vel,
167                              const Scalar3 *d_accel,
168                              unsigned int *d_group_members,
169                              unsigned int group_size,
170                              Scalar exp_thermo_fac,
171                              Scalar *mat_exp_v,
172                              Scalar *mat_exp_v_int,
173                              Scalar *mat_exp_r,
174                              Scalar *mat_exp_r_int,
175                              Scalar deltaT,
176                              bool rescale_all)
177     {
178     // setup the grid to run the kernel
179     unsigned int block_size = 256;
180     dim3 grid( (group_size / block_size) + 1, 1, 1);
181     dim3 threads(block_size, 1, 1);
183     // run the kernel
184     gpu_npt_mtk_step_one_kernel<<< grid, threads >>>(d_pos,
185                                                  d_vel,
186                                                  d_accel,
187                                                  d_group_members,
188                                                  group_size,
189                                                  exp_thermo_fac,
190                                                  mat_exp_v[0],
191                                                  mat_exp_v[1],
192                                                  mat_exp_v[2],
193                                                  mat_exp_v[3],
194                                                  mat_exp_v[4],
195                                                  mat_exp_v[5],
196                                                  mat_exp_v_int[0],
197                                                  mat_exp_v_int[1],
198                                                  mat_exp_v_int[2],
199                                                  mat_exp_v_int[3],
200                                                  mat_exp_v_int[4],
201                                                  mat_exp_v_int[5],
202                                                  mat_exp_r[0],
203                                                  mat_exp_r[1],
204                                                  mat_exp_r[2],
205                                                  mat_exp_r[3],
206                                                  mat_exp_r[4],
207                                                  mat_exp_r[5],
208                                                  mat_exp_r_int[0],
209                                                  mat_exp_r_int[1],
210                                                  mat_exp_r_int[2],
211                                                  mat_exp_r_int[3],
212                                                  mat_exp_r_int[4],
213                                                  mat_exp_r_int[5],
214                                                  deltaT,
215                                                  rescale_all);
217     return cudaSuccess;
218     }
220 /*! \param N number of particles in the system
221     \param d_pos array of particle positions
222     \param d_image array of particle images
223     \param box The new box the particles where the particles now reside
225     Wrap particle positions for all particles in the box
227 extern "C" __global__
228 void gpu_npt_mtk_wrap_kernel(const unsigned int N,
229                              Scalar4 *d_pos,
230                              int3 *d_image,
231                              BoxDim box)
232     {
233     // determine which particle this thread works on
234     int idx = blockIdx.x * blockDim.x + threadIdx.x;
236     // wrap ALL particles in the box
237     if (idx < N)
238         {
239         // fetch particle position
240         Scalar4 postype = d_pos[idx];
241         Scalar3 pos = make_scalar3(postype.x, postype.y, postype.z);
243         // read in the image flags
244         int3 image = d_image[idx];
246         // fix periodic boundary conditions
247         box.wrap(pos, image);
249         // write out the results
250         d_pos[idx] = make_scalar4(pos.x, pos.y, pos.z, postype.w);
251         d_image[idx] = image;
252         }
253     }
255 /*! \param N number of particles in the system
256     \param d_pos array of particle positions
257     \param d_image array of particle images
258     \param box The new box the particles where the particles now reside
260     This is just a kernel driver for gpu_npt_mtk_wrap_kernel(). See it for more details.
262 cudaError_t gpu_npt_mtk_wrap(const unsigned int N,
263                              Scalar4 *d_pos,
264                              int3 *d_image,
265                              const BoxDim& box)
266     {
267     // setup the grid to run the kernel
268     unsigned int block_size=256;
269     dim3 grid( (N / block_size) + 1, 1, 1);
270     dim3 threads(block_size, 1, 1);
272     // run the kernel
273     gpu_npt_mtk_wrap_kernel<<< grid, threads >>>(N, d_pos, d_image, box);
275     return cudaSuccess;
276     }
278 //! Kernel to propagate the positions and velocities, second half of NPT update
279 __global__ void gpu_npt_mtk_step_two_kernel(Scalar4 *d_vel,
280                              Scalar3 *d_accel,
281                              const Scalar4 *d_net_force,
282                              unsigned int *d_group_members,
283                              unsigned int group_size,
284                              Scalar mat_exp_v_xx,
285                              Scalar mat_exp_v_xy,
286                              Scalar mat_exp_v_xz,
287                              Scalar mat_exp_v_yy,
288                              Scalar mat_exp_v_yz,
289                              Scalar mat_exp_v_zz,
290                              Scalar mat_exp_v_int_xx,
291                              Scalar mat_exp_v_int_xy,
292                              Scalar mat_exp_v_int_xz,
293                              Scalar mat_exp_v_int_yy,
294                              Scalar mat_exp_v_int_yz,
295                              Scalar mat_exp_v_int_zz,
296                              Scalar deltaT)
297     {
298     // determine which particle this thread works on
299     int group_idx = blockIdx.x * blockDim.x + threadIdx.x;
301     if (group_idx < group_size)
302         {
303         unsigned int idx = d_group_members[group_idx];
305         // fetch particle velocity and acceleration
306         Scalar4 vel = d_vel[idx];
308         // compute acceleration
309         Scalar minv = Scalar(1.0)/vel.w;
310         Scalar4 net_force = d_net_force[idx];
311         Scalar3 accel = make_scalar3(net_force.x, net_force.y, net_force.z);
312         accel *= minv;
314         Scalar3 v = make_scalar3(vel.x, vel.y, vel.z);
316         // propagate velocity by half a time step by multiplying with an upper triangular matrix
317         v.x = mat_exp_v_xx * v.x + mat_exp_v_xy * v.y + mat_exp_v_xz * v.z;
318         v.y = mat_exp_v_yy * v.y + mat_exp_v_yz * v.z;
319         v.z = mat_exp_v_zz * v.z;
321         v.x += mat_exp_v_int_xx * accel.x + mat_exp_v_int_xy * accel.y + mat_exp_v_int_xz * accel.z;
322         v.y += mat_exp_v_int_yy * accel.y + mat_exp_v_int_yz * accel.z;
323         v.z += mat_exp_v_int_zz * accel.z;
325         // write out velocity
326         d_vel[idx] = make_scalar4(v.x, v.y, v.z, vel.w);
328         // since we calculate the acceleration, we need to write it for the next step
329         d_accel[idx] = accel;
330         }
331     }
333 /*! \param d_vel array of particle velocities
334     \param d_accel array of particle accelerations
335     \param d_group_members Device array listing the indicies of the mebers of the group to integrate
336     \param group_size Number of members in the group
337     \param mat_exp_v Matrix exponential for velocity update
338     \param mat_exp_v_int Integrated matrix exp for velocity update
339     \param d_net_force Net force on each particle
341     \param deltaT Time to move forward in one whole step
343     This is just a kernel driver for gpu_npt_mtk_step_kernel(). See it for more details.
345 cudaError_t gpu_npt_mtk_step_two(Scalar4 *d_vel,
346                              Scalar3 *d_accel,
347                              unsigned int *d_group_members,
348                              unsigned int group_size,
349                              Scalar4 *d_net_force,
350                              Scalar* mat_exp_v,
351                              Scalar* mat_exp_v_int,
352                              Scalar deltaT)
353     {
354     // setup the grid to run the kernel
355     unsigned int block_size=256;
356     dim3 grid( (group_size / block_size) + 1, 1, 1);
357     dim3 threads(block_size, 1, 1);
359     // run the kernel
360     gpu_npt_mtk_step_two_kernel<<< grid, threads >>>(d_vel,
361                                                      d_accel,
362                                                      d_net_force,
363                                                      d_group_members,
364                                                      group_size,
365                                                      mat_exp_v[0],
366                                                      mat_exp_v[1],
367                                                      mat_exp_v[2],
368                                                      mat_exp_v[3],
369                                                      mat_exp_v[4],
370                                                      mat_exp_v[5],
371                                                      mat_exp_v_int[0],
372                                                      mat_exp_v_int[1],
373                                                      mat_exp_v_int[2],
374                                                      mat_exp_v_int[3],
375                                                      mat_exp_v_int[4],
376                                                      mat_exp_v_int[5],
377                                                      deltaT);
379     return cudaSuccess;
380     }
382 //! GPU kernel to perform partial reduction of temperature
383 __global__ void gpu_npt_mtk_temperature_partial(unsigned int *d_group_members,
384                                                 unsigned int group_size,
385                                                 Scalar *d_scratch,
386                                                 Scalar4 *d_velocity)
387     {
388     // determine which particle this thread works on
389     int group_idx = blockIdx.x * blockDim.x + threadIdx.x;
391     Scalar mv2_element; // element of scratch space read in
392     if (group_idx < group_size)
393         {
394         unsigned int idx = d_group_members[group_idx];
396         Scalar4 vel = d_velocity[idx];
397         Scalar mass = vel.w;
399         mv2_element =  mass * (vel.x*vel.x + vel.y*vel.y + vel.z*vel.z);
400         }
401     else
402         {
403         // non-participating thread: contribute 0 to the sum
404         mv2_element = Scalar(0.0);
405         }
407     npt_mtk_sdata[threadIdx.x] = mv2_element;
408     __syncthreads();
410     int offs = blockDim.x >> 1;
411     while (offs > 0)
412         {
413         if (threadIdx.x < offs)
414             npt_mtk_sdata[threadIdx.x] += npt_mtk_sdata[threadIdx.x + offs];
416         offs >>= 1;
417         __syncthreads();
418         }
420     // write out partial sum
421     if (threadIdx.x == 0)
422         d_scratch[blockIdx.x] = npt_mtk_sdata[0];
424      }
426 //! GPU kernel to perform final reduction of temperature
427 __global__ void gpu_npt_mtk_temperature_final_sum(Scalar *d_scratch,
428                                                   Scalar *d_temperature,
429                                                   unsigned int ndof,
430                                                   unsigned int num_partial_sums)
431     {
432     Scalar final_sum(0.0);
434     for (int start = 0; start < num_partial_sums; start += blockDim.x)
435         {
436         __syncthreads();
437         if (start + threadIdx.x < num_partial_sums)
438             {
439             npt_mtk_sdata[threadIdx.x] = d_scratch[start + threadIdx.x];
440             }
441         else
442             npt_mtk_sdata[threadIdx.x] = Scalar(0.0);
444         __syncthreads();
446         // reduce the sum in parallel
447         int offs = blockDim.x >> 1;
448         while (offs > 0)
449             {
450             if (threadIdx.x < offs)
451                 npt_mtk_sdata[threadIdx.x] += npt_mtk_sdata[threadIdx.x + offs];
453             offs >>=1;
454             __syncthreads();
455             }
457         if (threadIdx.x == 0)
458             final_sum += npt_mtk_sdata[0];
459         }
461     if (threadIdx.x == 0)
462         *d_temperature = final_sum/Scalar(ndof);
463     }
465 /*!\param d_temperature Device variable to store the temperature value (output)
466    \param d_vel Array of particle velocities and masses
467    \param d_scratch Temporary scratch space for reduction
468    \param num_blocks Number of CUDA blocks used in reduction
469    \param block_size Size of blocks used in reduction
470    \param d_group_members Members of group for which the reduction is performed
471    \param group_size Size of group
472    \param ndof Number of degrees of freedom of group
474    This function performs the reduction of the temperature on the GPU. It is just
475    a driver function that calls the appropriate GPU kernels.
476    */
477 cudaError_t gpu_npt_mtk_temperature(Scalar *d_temperature,
478                                     Scalar4 *d_vel,
479                                     Scalar *d_scratch,
480                                     unsigned int num_blocks,
481                                     unsigned int block_size,
482                                     unsigned int *d_group_members,
483                                     unsigned int group_size,
484                                     unsigned int ndof)
485     {
486     assert(d_temperature);
487     assert(d_vel);
488     assert(d_scratch);
490     dim3 grid(num_blocks,1,1);
491     dim3 threads(block_size,1,1);
493     unsigned int shared_bytes = sizeof(Scalar)*block_size;
495     // reduce squared velocity norm times mass, first pass
496     gpu_npt_mtk_temperature_partial<<<grid, threads, shared_bytes>>>(
497                                                 d_group_members,
498                                                 group_size,
499                                                 d_scratch,
500                                                 d_vel);
503     unsigned int final_block_size = 512;
504     grid = dim3(1,1,1);
505     threads = dim3(final_block_size, 1, 1);
506     shared_bytes = sizeof(Scalar)*final_block_size;
508     // reduction, second pass
509     gpu_npt_mtk_temperature_final_sum<<<grid, threads, shared_bytes>>>(
510                                                 d_scratch,
511                                                 d_temperature,
512                                                 ndof,
513                                                 num_blocks);
515     return cudaSuccess;
516     }
518 /*! \param d_vel array of particle velocities and masses
519     \param d_group_members Device array listing the indicies of the mebers of the group to integrate
520     \param group_size Number of members in the group
521     \param exp_v_fac_thermo scaling factor (per direction) for velocity update generated by thermostat
523     GPU kernel to thermostat velocities
525 __global__ void gpu_npt_mtk_thermostat_kernel(Scalar4 *d_vel,
526                              unsigned int *d_group_members,
527                              unsigned int group_size,
528                              Scalar exp_v_fac_thermo)
529     {
530     // determine which particle this thread works on
531     int group_idx = blockIdx.x * blockDim.x + threadIdx.x;
533     if (group_idx < group_size)
534         {
535         unsigned int idx = d_group_members[group_idx];
537         // fetch particle velocity and acceleration
538         Scalar4 vel = d_vel[idx];
539         Scalar3 v = make_scalar3(vel.x, vel.y, vel.z);
541         // rescale
542         v *= exp_v_fac_thermo;
544         // write out the results
545         d_vel[idx] = make_scalar4(v.x,v.y,v.z,vel.w);
546         }
547     }
549 /*! \param d_vel array of particle velocities
550     \param d_group_members Device array listing the indicies of the mebers of the group to integrate
551     \param group_size Number of members in the group
552     \param exp_v_fac_thermo Rescaling factor
553     \param block_size Kernel block size
555     This is just a kernel driver for gpu_npt_step_kernel(). See it for more details.
557 cudaError_t gpu_npt_mtk_thermostat(Scalar4 *d_vel,
558                              unsigned int *d_group_members,
559                              unsigned int group_size,
560                              Scalar exp_v_fac_thermo,
561                              unsigned int block_size)
562     {
563     // setup the grid to run the kernel
564     dim3 grid( (group_size / block_size) + 1, 1, 1);
565     dim3 threads(block_size, 1, 1);
567     // run the kernel
568     gpu_npt_mtk_thermostat_kernel<<< grid, threads >>>(d_vel,
569                                                      d_group_members,
570                                                      group_size,
571                                                      exp_v_fac_thermo);
573     return cudaSuccess;
574     }
576 __global__ void gpu_npt_mtk_rescale_kernel(unsigned int N,
577                                            Scalar4 *d_postype,
578                                            Scalar mat_exp_r_xx,
579                                            Scalar mat_exp_r_xy,
580                                            Scalar mat_exp_r_xz,
581                                            Scalar mat_exp_r_yy,
582                                            Scalar mat_exp_r_yz,
583                                            Scalar mat_exp_r_zz)
584     {
585     unsigned int idx = blockIdx.x*blockDim.x + threadIdx.x;
587     if (idx >= N) return;
589     // rescale position
590     Scalar4 postype = d_postype[idx];
591     Scalar3 r = make_scalar3(postype.x,postype.y,postype.z);
593     r.x = mat_exp_r_xx * r.x + mat_exp_r_xy * r.y + mat_exp_r_xz * r.z;
594     r.y = mat_exp_r_yy * r.y + mat_exp_r_yz* r.z;
595     r.z = mat_exp_r_zz * r.z;
597     d_postype[idx] = make_scalar4(r.x, r.y, r.z, postype.w);
598     }
600 void gpu_npt_mtk_rescale(unsigned int N,
601                        Scalar4 *d_postype,
602                        Scalar mat_exp_r_xx,
603                        Scalar mat_exp_r_xy,
604                        Scalar mat_exp_r_xz,
605                        Scalar mat_exp_r_yy,
606                        Scalar mat_exp_r_yz,
607                        Scalar mat_exp_r_zz)
608     {
609     unsigned int block_size = 256;
611     dim3 grid( (N / block_size) + 1, 1, 1);
612     dim3 threads(block_size, 1, 1);
614     gpu_npt_mtk_rescale_kernel<<<grid, threads>>> (N,
615         d_postype,
616         mat_exp_r_xx,
617         mat_exp_r_xy,
618         mat_exp_r_xz,
619         mat_exp_r_yy,
620         mat_exp_r_yz,
621         mat_exp_r_zz);
622     }