2 #include "../gmx_blas.h"
3 #include "../gmx_lapack.h"
6 F77_FUNC(slasd3
,SLASD3
)(int *nl
,
27 int q_dim1
, q_offset
, u_dim1
, u_offset
, u2_dim1
, u2_offset
, vt_dim1
,
28 vt_offset
, vt2_dim1
, vt2_offset
, i__1
, i__2
;
44 q_offset
= 1 + q_dim1
;
48 u_offset
= 1 + u_dim1
;
51 u2_offset
= 1 + u2_dim1
;
54 vt_offset
= 1 + vt_dim1
;
57 vt2_offset
= 1 + vt2_dim1
;
70 } else if (*sqre
!= 1 && *sqre
!= 0) {
80 d__
[1] = std::abs(z__
[1]);
81 F77_FUNC(scopy
,SCOPY
)(&m
, &vt2
[vt2_dim1
+ 1], ldvt2
, &vt
[vt_dim1
+ 1], ldvt
);
83 F77_FUNC(scopy
,SCOPY
)(&n
, &u2
[u2_dim1
+ 1], &c__1
, &u
[u_dim1
+ 1], &c__1
);
86 for (i__
= 1; i__
<= i__1
; ++i__
) {
87 u
[i__
+ u_dim1
] = -u2
[i__
+ u2_dim1
];
93 F77_FUNC(scopy
,SCOPY
)(k
, &z__
[1], &c__1
, &q
[q_offset
], &c__1
);
95 rho
= F77_FUNC(snrm2
,SNRM2
)(k
, &z__
[1], &c__1
);
96 F77_FUNC(slascl
,SLASCL
)("G", &c__0
, &c__0
, &rho
, &one
, k
, &c__1
, &z__
[1], k
, info
);
101 for (j
= 1; j
<= i__1
; ++j
) {
102 F77_FUNC(slasd4
,SLASD4
)(k
, &j
, &dsigma
[1], &z__
[1], &u
[j
* u_dim1
+ 1], &rho
, &d__
[j
],
103 &vt
[j
* vt_dim1
+ 1], info
);
111 for (i__
= 1; i__
<= i__1
; ++i__
) {
112 z__
[i__
] = u
[i__
+ *k
* u_dim1
] * vt
[i__
+ *k
* vt_dim1
];
114 for (j
= 1; j
<= i__2
; ++j
) {
115 z__
[i__
] *= u
[i__
+ j
* u_dim1
] * vt
[i__
+ j
* vt_dim1
] / (dsigma
[
116 i__
] - dsigma
[j
]) / (dsigma
[i__
] + dsigma
[j
]);
119 for (j
= i__
; j
<= i__2
; ++j
) {
120 z__
[i__
] *= u
[i__
+ j
* u_dim1
] * vt
[i__
+ j
* vt_dim1
] / (dsigma
[
121 i__
] - dsigma
[j
+ 1]) / (dsigma
[i__
] + dsigma
[j
+ 1]);
123 d__2
= std::sqrt(std::abs(z__
[i__
]));
124 z__
[i__
] = (q
[i__
+ q_dim1
] > 0) ? d__2
: -d__2
;
128 for (i__
= 1; i__
<= i__1
; ++i__
) {
129 vt
[i__
* vt_dim1
+ 1] = z__
[1] / u
[i__
* u_dim1
+ 1] / vt
[i__
*
131 u
[i__
* u_dim1
+ 1] = -1.;
133 for (j
= 2; j
<= i__2
; ++j
) {
134 vt
[j
+ i__
* vt_dim1
] = z__
[j
] / u
[j
+ i__
* u_dim1
] / vt
[j
+ i__
136 u
[j
+ i__
* u_dim1
] = dsigma
[j
] * vt
[j
+ i__
* vt_dim1
];
138 temp
= F77_FUNC(snrm2
,SNRM2
)(k
, &u
[i__
* u_dim1
+ 1], &c__1
);
139 q
[i__
* q_dim1
+ 1] = u
[i__
* u_dim1
+ 1] / temp
;
141 for (j
= 2; j
<= i__2
; ++j
) {
143 q
[j
+ i__
* q_dim1
] = u
[jc
+ i__
* u_dim1
] / temp
;
148 F77_FUNC(sgemm
,SGEMM
)("N", "N", &n
, k
, k
, &one
, &u2
[u2_offset
], ldu2
, &q
[q_offset
],
149 ldq
, &zero
, &u
[u_offset
], ldu
);
153 F77_FUNC(sgemm
,SGEMM
)("N", "N", nl
, k
, &ctot
[1], &one
, &u2
[(u2_dim1
<< 1) + 1],
154 ldu2
, &q
[q_dim1
+ 2], ldq
, &zero
, &u
[u_dim1
+ 1], ldu
);
156 ktemp
= ctot
[1] + 2 + ctot
[2];
157 F77_FUNC(sgemm
,SGEMM
)("N", "N", nl
, k
, &ctot
[3], &one
, &u2
[ktemp
* u2_dim1
+ 1]
158 , ldu2
, &q
[ktemp
+ q_dim1
], ldq
, &one
, &u
[u_dim1
+ 1],
161 } else if (ctot
[3] > 0) {
162 ktemp
= ctot
[1] + 2 + ctot
[2];
163 F77_FUNC(sgemm
,SGEMM
)("N", "N", nl
, k
, &ctot
[3], &one
, &u2
[ktemp
* u2_dim1
+ 1],
164 ldu2
, &q
[ktemp
+ q_dim1
], ldq
, &zero
, &u
[u_dim1
+ 1], ldu
);
166 F77_FUNC(slacpy
,SLACPY
)("F", nl
, k
, &u2
[u2_offset
], ldu2
, &u
[u_offset
], ldu
);
168 F77_FUNC(scopy
,SCOPY
)(k
, &q
[q_dim1
+ 1], ldq
, &u
[nlp1
+ u_dim1
], ldu
);
170 ctemp
= ctot
[2] + ctot
[3];
171 F77_FUNC(sgemm
,SGEMM
)("N", "N", nr
, k
, &ctemp
, &one
, &u2
[nlp2
+ ktemp
* u2_dim1
], ldu2
,
172 &q
[ktemp
+ q_dim1
], ldq
, &zero
, &u
[nlp2
+ u_dim1
], ldu
);
176 for (i__
= 1; i__
<= i__1
; ++i__
) {
177 temp
= F77_FUNC(snrm2
,SNRM2
)(k
, &vt
[i__
* vt_dim1
+ 1], &c__1
);
178 q
[i__
+ q_dim1
] = vt
[i__
* vt_dim1
+ 1] / temp
;
180 for (j
= 2; j
<= i__2
; ++j
) {
182 q
[i__
+ j
* q_dim1
] = vt
[jc
+ i__
* vt_dim1
] / temp
;
187 F77_FUNC(sgemm
,SGEMM
)("N", "N", k
, &m
, k
, &one
, &q
[q_offset
], ldq
, &vt2
[vt2_offset
]
188 , ldvt2
, &zero
, &vt
[vt_offset
], ldvt
);
192 F77_FUNC(sgemm
,SGEMM
)("N", "N", k
, &nlp1
, &ktemp
, &one
, &q
[q_dim1
+ 1], ldq
, &vt2
[
193 vt2_dim1
+ 1], ldvt2
, &zero
, &vt
[vt_dim1
+ 1], ldvt
);
194 ktemp
= ctot
[1] + 2 + ctot
[2];
195 if (ktemp
<= *ldvt2
) {
196 F77_FUNC(sgemm
,SGEMM
)("N", "N", k
, &nlp1
, &ctot
[3], &one
, &q
[ktemp
* q_dim1
+ 1],
197 ldq
, &vt2
[ktemp
+ vt2_dim1
], ldvt2
, &one
, &vt
[vt_dim1
+ 1],
205 for (i__
= 1; i__
<= i__1
; ++i__
) {
206 q
[i__
+ ktemp
* q_dim1
] = q
[i__
+ q_dim1
];
209 for (i__
= nlp2
; i__
<= i__1
; ++i__
) {
210 vt2
[ktemp
+ i__
* vt2_dim1
] = vt2
[i__
* vt2_dim1
+ 1];
213 ctemp
= ctot
[2] + 1 + ctot
[3];
214 F77_FUNC(sgemm
,SGEMM
)("N", "N", k
, &nrp1
, &ctemp
, &one
, &q
[ktemp
* q_dim1
+ 1], ldq
, &
215 vt2
[ktemp
+ nlp2
* vt2_dim1
], ldvt2
, &zero
, &vt
[nlp2
* vt_dim1
+