Revisão | a151270c4cbebcc9ddea54b99445edfa16260f85 (tree) |
---|---|
Hora | 2012-12-29 14:23:50 |
Autor | Katsuhiko Nishimra <ktns.87@gmai...> |
Commiter | Katsuhiko Nishimra |
Integrate Blas/Lapack_GNU.cpp into Blas/Lapack.cpp. #30409
git-svn-id: https://svn.sourceforge.jp/svnroot/molds/trunk@1220 1136aad2-a195-0410-b898-f5ea1d11b9d8
@@ -28,9 +28,9 @@ OPENBLAS_LIBS = -lopenblas | ||
28 | 28 | EXENAME = MolDS.out |
29 | 29 | DEPFILE = obj/objfile.dep |
30 | 30 | |
31 | -ALL_CPP_FILES = Main.cpp base/MolDSException.cpp wrappers/Blas_GNU.cpp wrappers/Lapack_GNU.cpp base/Utilities.cpp base/Enums.cpp base/MathUtilities.cpp base/MallocerFreer.cpp base/EularAngle.cpp base/Parameters.cpp base/atoms/Atom.cpp base/atoms/Hatom.cpp base/atoms/Liatom.cpp base/atoms/Catom.cpp base/atoms/Natom.cpp base/atoms/Oatom.cpp base/atoms/Satom.cpp base/factories/AtomFactory.cpp base/Molecule.cpp base/InputParser.cpp base/GTOExpansionSTO.cpp base/RealSphericalHarmonicsIndex.cpp base/loggers/MOLogger.cpp base/loggers/DensityLogger.cpp base/loggers/HoleDensityLogger.cpp base/loggers/ParticleDensityLogger.cpp cndo/Cndo2.cpp indo/Indo.cpp zindo/ZindoS.cpp mndo/Mndo.cpp am1/Am1.cpp am1/Am1D.cpp pm3/Pm3.cpp pm3/Pm3D.cpp pm3/Pm3Pddg.cpp base/factories/ElectronicStructureFactory.cpp md/MD.cpp mc/MC.cpp rpmd/RPMD.cpp nasco/NASCO.cpp optimization/Optimizer.cpp optimization/ConjugateGradient.cpp optimization/SteepestDescent.cpp optimization/BFGS.cpp base/factories/OptimizerFactory.cpp base/MolDS.cpp | |
31 | +ALL_CPP_FILES = Main.cpp base/MolDSException.cpp wrappers/Blas.cpp wrappers/Lapack.cpp base/Utilities.cpp base/Enums.cpp base/MathUtilities.cpp base/MallocerFreer.cpp base/EularAngle.cpp base/Parameters.cpp base/atoms/Atom.cpp base/atoms/Hatom.cpp base/atoms/Liatom.cpp base/atoms/Catom.cpp base/atoms/Natom.cpp base/atoms/Oatom.cpp base/atoms/Satom.cpp base/factories/AtomFactory.cpp base/Molecule.cpp base/InputParser.cpp base/GTOExpansionSTO.cpp base/RealSphericalHarmonicsIndex.cpp base/loggers/MOLogger.cpp base/loggers/DensityLogger.cpp base/loggers/HoleDensityLogger.cpp base/loggers/ParticleDensityLogger.cpp cndo/Cndo2.cpp indo/Indo.cpp zindo/ZindoS.cpp mndo/Mndo.cpp am1/Am1.cpp am1/Am1D.cpp pm3/Pm3.cpp pm3/Pm3D.cpp pm3/Pm3Pddg.cpp base/factories/ElectronicStructureFactory.cpp md/MD.cpp mc/MC.cpp rpmd/RPMD.cpp nasco/NASCO.cpp optimization/Optimizer.cpp optimization/ConjugateGradient.cpp optimization/SteepestDescent.cpp optimization/BFGS.cpp base/factories/OptimizerFactory.cpp base/MolDS.cpp | |
32 | 32 | ALL_HEAD_FILES = base/PrintController.h base/MolDSException.h base/Uncopyable.h wrappers/Blas.h wrappers/Lapack.h base/Utilities.h base/Enums.h base/MathUtilities.h base/MallocerFreer.h base/EularAngle.h base/Parameters.h base/atoms/Atom.h base/atoms/Hatom.h base/atoms/Liatom.h base/atoms/Catom.h base/atoms/Natom.h base/atoms/Oatom.h base/atoms/Satom.h base/factories/AtomFactory.h base/Molecule.h base/InputParser.h base/GTOExpansionSTO.h base/RealSphericalHarmonicsIndex.h base/loggers/MOLogger.h base/loggers/DensityLogger.h base/loggers/HoleDensityLogger.h base/loggers/ParticleDensityLogger.h base/ElectronicStructure.h cndo/Cndo2.h cndo/ReducedOverlapAOsParameters.h indo/Indo.h zindo/ZindoS.h mndo/Mndo.h am1/Am1.h am1/Am1D.h pm3/Pm3.h pm3/Pm3D.h pm3/Pm3Pddg.h base/factories/ElectronicStructureFactory.h md/MD.h mc/MC.h rpmd/RPMD.h nasco/NASCO.h optimization/Optimizer.h optimization/ConjugateGradient.h optimization/SteepestDescent.h optimization/BFGS.h base/factories/OptimizerFactory.h base/MolDS.h |
33 | -ALL_OBJ_FILES = obj/Main.o obj/MolDSException.o obj/Blas_GNU.o obj/Lapack_GNU.o obj/Utilities.o obj/Enums.o obj/MathUtilities.o obj/MallocerFreer.o obj/EularAngle.o obj/Parameters.o obj/Atom.o obj/Hatom.o obj/Liatom.o obj/Catom.o obj/Natom.o obj/Oatom.o obj/Satom.o obj/AtomFactory.o obj/Molecule.o obj/InputParser.o obj/GTOExpansionSTO.o obj/RealSphericalHarmonicsIndex.o obj/MOLogger.o obj/DensityLogger.o obj/HoleDensityLogger.o obj/ParticleDensityLogger.o obj/Cndo2.o obj/Indo.o obj/ZindoS.o obj/Mndo.o obj/Am1.o obj/Am1D.o obj/Pm3.o obj/Pm3D.o obj/Pm3Pddg.o obj/ElectronicStructureFactory.o obj/MD.o obj/MC.o obj/RPMD.o obj/NASCO.o obj/Optimizer.o obj/ConjugateGradient.o obj/SteepestDescent.o obj/BFGS.o obj/OptimizerFactory.o obj/MolDS.o | |
33 | +ALL_OBJ_FILES = obj/Main.o obj/MolDSException.o obj/Blas.o obj/Lapack.o obj/Utilities.o obj/Enums.o obj/MathUtilities.o obj/MallocerFreer.o obj/EularAngle.o obj/Parameters.o obj/Atom.o obj/Hatom.o obj/Liatom.o obj/Catom.o obj/Natom.o obj/Oatom.o obj/Satom.o obj/AtomFactory.o obj/Molecule.o obj/InputParser.o obj/GTOExpansionSTO.o obj/RealSphericalHarmonicsIndex.o obj/MOLogger.o obj/DensityLogger.o obj/HoleDensityLogger.o obj/ParticleDensityLogger.o obj/Cndo2.o obj/Indo.o obj/ZindoS.o obj/Mndo.o obj/Am1.o obj/Am1D.o obj/Pm3.o obj/Pm3D.o obj/Pm3Pddg.o obj/ElectronicStructureFactory.o obj/MD.o obj/MC.o obj/RPMD.o obj/NASCO.o obj/Optimizer.o obj/ConjugateGradient.o obj/SteepestDescent.o obj/BFGS.o obj/OptimizerFactory.o obj/MolDS.o | |
34 | 34 | |
35 | 35 | $(EXENAME): $(DEPFILE) $(ALL_OBJ_FILES) |
36 | 36 | $(CC) -o $@ -Wl,-rpath=$(OPENBLAS_LIB_DIR) $(ALL_OBJ_FILES) -L$(OPENBLAS_LIB_DIR) $(LIBS) $(OPENBLAS_LIBS) |
@@ -25,7 +25,11 @@ | ||
25 | 25 | #include<string> |
26 | 26 | #include<stdexcept> |
27 | 27 | #include<boost/format.hpp> |
28 | +#ifdef __INTEL_COMPILER | |
28 | 29 | #include"mkl.h" |
30 | +#else | |
31 | +#include"cblas.h" | |
32 | +#endif | |
29 | 33 | #include"../base/PrintController.h" |
30 | 34 | #include"../base/MolDSException.h" |
31 | 35 | #include"../base/Uncopyable.h" |
@@ -75,7 +79,12 @@ void Blas::Dcopy(molds_blas_int n, | ||
75 | 79 | void Blas::Dcopy(molds_blas_int n, |
76 | 80 | double const* vectorX, molds_blas_int incrementX, |
77 | 81 | double* vectorY, molds_blas_int incrementY) const{ |
82 | +#ifdef __INTEL_COMPILER | |
78 | 83 | dcopy(&n, vectorX, &incrementX, vectorY, &incrementY); |
84 | +#else | |
85 | + double* x = const_cast<double*>(&vectorX[0]); | |
86 | + cblas_dcopy(n, x, incrementX, vectorY, incrementY); | |
87 | +#endif | |
79 | 88 | } |
80 | 89 | |
81 | 90 | // vectorY = alpha*vectorX + vectorY |
@@ -95,7 +104,12 @@ void Blas::Daxpy(molds_blas_int n, double alpha, | ||
95 | 104 | void Blas::Daxpy(molds_blas_int n, double alpha, |
96 | 105 | double const* vectorX, molds_blas_int incrementX, |
97 | 106 | double* vectorY, molds_blas_int incrementY) const{ |
107 | +#ifdef __INTEL_COMPILER | |
98 | 108 | daxpy(&n, &alpha, vectorX, &incrementX, vectorY, &incrementY); |
109 | +#else | |
110 | + double* x = const_cast<double*>(&vectorX[0]); | |
111 | + cblas_daxpy(n, alpha, x, incrementX, vectorY, incrementY); | |
112 | +#endif | |
99 | 113 | } |
100 | 114 | |
101 | 115 | // returns vectorX^T*vectorY |
@@ -115,7 +129,13 @@ double Blas::Ddot(molds_blas_int n, | ||
115 | 129 | double Blas::Ddot(molds_blas_int n, |
116 | 130 | double const* vectorX, molds_blas_int incrementX, |
117 | 131 | double const* vectorY, molds_blas_int incrementY)const{ |
132 | +#ifdef __INTEL_COMPILER | |
118 | 133 | return ddot(&n, vectorX, &incrementX, vectorY, &incrementY); |
134 | +#else | |
135 | + double* x=const_cast<double*>(vectorX), | |
136 | + * y=const_cast<double*>(vectorY); | |
137 | + return cblas_ddot(n, x, incrementX, y, incrementY); | |
138 | +#endif | |
119 | 139 | } |
120 | 140 | |
121 | 141 | // vectorY = matrixA*vectorX |
@@ -147,6 +167,7 @@ void Blas::Dgemv(bool isColumnMajorMatrixA, | ||
147 | 167 | double beta, |
148 | 168 | double* vectorY, |
149 | 169 | molds_blas_int incrementY) const{ |
170 | +#ifdef __INTEL_COMPILER | |
150 | 171 | double const* a = &matrixA[0][0]; |
151 | 172 | char transA; |
152 | 173 | if(isColumnMajorMatrixA){ |
@@ -158,6 +179,20 @@ void Blas::Dgemv(bool isColumnMajorMatrixA, | ||
158 | 179 | } |
159 | 180 | molds_blas_int lda = m; |
160 | 181 | dgemv(&transA, &m, &n, &alpha, a, &lda, vectorX, &incrementX, &beta, vectorY, &incrementY); |
182 | +#else | |
183 | + double* a = const_cast<double*>(&matrixA[0][0]); | |
184 | + double* x = const_cast<double*>(&vectorX[0]); | |
185 | + CBLAS_TRANSPOSE transA; | |
186 | + if(isColumnMajorMatrixA){ | |
187 | + transA = CblasNoTrans; | |
188 | + } | |
189 | + else{ | |
190 | + transA = CblasTrans; | |
191 | + swap(m,n); | |
192 | + } | |
193 | + int lda = m; | |
194 | + cblas_dgemv(CblasColMajor, transA, m, n, alpha, a, lda, x, incrementX, beta, vectorY, incrementY); | |
195 | +#endif | |
161 | 196 | } |
162 | 197 | |
163 | 198 | // vectorY = matrixA*vectorX |
@@ -183,10 +218,18 @@ void Blas::Dsymv(molds_blas_int n, double alpha, | ||
183 | 218 | double const* vectorX, molds_blas_int incrementX, |
184 | 219 | double beta, |
185 | 220 | double* vectorY, molds_blas_int incrementY) const{ |
221 | +#ifdef __INTEL_COMPILER | |
186 | 222 | double const* a = &matrixA[0][0]; |
187 | 223 | char uploA='L'; |
188 | 224 | molds_blas_int lda = n; |
189 | 225 | dsymv(&uploA, &n, &alpha, a, &lda, vectorX, &incrementX, &beta, vectorY, &incrementY); |
226 | +#else | |
227 | + double* a = const_cast<double*>(&matrixA[0][0]); | |
228 | + double* x = const_cast<double*>(&vectorX[0]); | |
229 | + CBLAS_UPLO uploA=CblasUpper; | |
230 | + int lda = n; | |
231 | + cblas_dsymv(CblasRowMajor, uploA, n, alpha, a, lda, x, incrementX, beta, vectorY, incrementY); | |
232 | +#endif | |
190 | 233 | } |
191 | 234 | |
192 | 235 | // matrixA = alpha*vectorX*vectorX^T + matrixA |
@@ -203,9 +246,16 @@ void Blas::Dsyr(molds_blas_int n, double alpha, | ||
203 | 246 | double const* vectorX, molds_blas_int incrementX, |
204 | 247 | double ** matrixA)const{ |
205 | 248 | double* a = &matrixA[0][0]; |
249 | +#ifdef __INTEL_COMPILER | |
206 | 250 | char uploA='L'; |
207 | 251 | molds_blas_int lda = n; |
208 | 252 | dsyr(&uploA, &n, &alpha, vectorX, &incrementX, a, &lda); |
253 | +#else | |
254 | + double* x = const_cast<double*>(&vectorX[0]); | |
255 | + CBLAS_UPLO uploA=CblasUpper; | |
256 | + int lda = n; | |
257 | + cblas_dsyr(CblasRowMajor, uploA, n, alpha, x, incrementX, a, lda); | |
258 | +#endif | |
209 | 259 | #pragma omp parallel for schedule(auto) |
210 | 260 | for(molds_blas_int i=0;i<n;i++){ |
211 | 261 | for(molds_blas_int j=i+1;j<n;j++){ |
@@ -241,6 +291,7 @@ void Blas::Dgemm(bool isColumnMajorMatrixA, | ||
241 | 291 | double const* const* matrixB, |
242 | 292 | double beta, |
243 | 293 | double** matrixC) const{ |
294 | +#ifdef __INTEL_COMPILER | |
244 | 295 | double const* a = &matrixA[0][0]; |
245 | 296 | double const* b = &matrixB[0][0]; |
246 | 297 | double* c = &matrixC[0][0]; |
@@ -266,9 +317,40 @@ void Blas::Dgemm(bool isColumnMajorMatrixA, | ||
266 | 317 | transB = 'T'; //kb=k |
267 | 318 | ldb = n; |
268 | 319 | } |
320 | +#else | |
321 | + double* a = const_cast<double*>(&matrixA[0][0]); | |
322 | + double* b = const_cast<double*>(&matrixB[0][0]); | |
323 | + double* c = &matrixC[0][0]; | |
324 | + | |
325 | + int lda; | |
326 | + CBLAS_TRANSPOSE transA; | |
327 | + if(isColumnMajorMatrixA){ | |
328 | + transA = CblasNoTrans; | |
329 | + lda = m; | |
330 | + } | |
331 | + else{ | |
332 | + transA = CblasTrans; | |
333 | + lda = k; | |
334 | + } | |
335 | + | |
336 | + int ldb; | |
337 | + CBLAS_TRANSPOSE transB; | |
338 | + if(isColumnMajorMatrixB){ | |
339 | + transB = CblasNoTrans; | |
340 | + ldb = k; | |
341 | + } | |
342 | + else{ | |
343 | + transB = CblasTrans; | |
344 | + ldb = n; | |
345 | + } | |
346 | +#endif | |
269 | 347 | |
270 | 348 | double* tmpC; |
349 | +#ifdef __INTEL_COMPILER | |
271 | 350 | tmpC = (double*)mkl_malloc( sizeof(double)*m*n, 16 ); |
351 | +#else | |
352 | + tmpC = (double*)malloc( sizeof(double)*m*n); | |
353 | +#endif | |
272 | 354 | for(molds_blas_int i=0; i<m; i++){ |
273 | 355 | for(molds_blas_int j=0; j<n; j++){ |
274 | 356 | tmpC[i+j*m] = matrixC[i][j]; |
@@ -276,13 +358,21 @@ void Blas::Dgemm(bool isColumnMajorMatrixA, | ||
276 | 358 | } |
277 | 359 | molds_blas_int ldc = m; |
278 | 360 | //call blas |
361 | +#ifdef __INTEL_COMPILER | |
279 | 362 | dgemm(&transA, &transB, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, tmpC, &ldc); |
363 | +#else | |
364 | + cblas_dgemm(CblasColMajor, transA, transB, m, n, k, alpha, a, lda, b, ldb, beta, tmpC, ldc); | |
365 | +#endif | |
280 | 366 | for(molds_blas_int i=0; i<m; i++){ |
281 | 367 | for(molds_blas_int j=0; j<n; j++){ |
282 | 368 | matrixC[i][j] = tmpC[i+j*m]; |
283 | 369 | } |
284 | 370 | } |
371 | +#ifdef __INTEL_COMPILER | |
285 | 372 | mkl_free(tmpC); |
373 | +#else | |
374 | + free(tmpC); | |
375 | +#endif | |
286 | 376 | } |
287 | 377 | |
288 | 378 | } |
@@ -24,11 +24,30 @@ | ||
24 | 24 | #include<string> |
25 | 25 | #include<stdexcept> |
26 | 26 | #include<boost/format.hpp> |
27 | +#ifdef __INTEL_COMPILER | |
27 | 28 | #include"mkl.h" |
29 | +#else | |
30 | +#if ( __WORDSIZE == 32 ) | |
31 | +#else | |
32 | +#define HAVE_LAPACK_CONFIG_H | |
33 | +#define LAPACK_ILP64 | |
34 | +#endif | |
35 | +#include"lapacke.h" | |
36 | +#endif | |
28 | 37 | #include"../base/PrintController.h" |
29 | 38 | #include"../base/MolDSException.h" |
30 | 39 | #include"../base/Uncopyable.h" |
31 | 40 | #include"Lapack.h" |
41 | + | |
42 | +#ifdef __INTEL_COMPILER | |
43 | +#define MOLDS_LAPACK_malloc(a,b) mkl_malloc(a,b) | |
44 | +#define MOLDS_LAPACK_free(a) mkl_free(a) | |
45 | +#else | |
46 | +#define MOLDS_LAPACK_malloc(a,b) malloc(a) | |
47 | +#define MOLDS_LAPACK_free(a) free(a) | |
48 | +#endif | |
49 | + | |
50 | + | |
32 | 51 | using namespace std; |
33 | 52 | using namespace MolDS_base; |
34 | 53 |
@@ -121,10 +140,10 @@ molds_lapack_int Lapack::Dsyevd(double** matrix, double* eigenValues, molds_lapa | ||
121 | 140 | } |
122 | 141 | |
123 | 142 | // malloc |
124 | - work = (double*)mkl_malloc( sizeof(double)*lwork, 16 ); | |
125 | - iwork = (molds_lapack_int*)mkl_malloc( sizeof(molds_lapack_int)*liwork, 16 ); | |
126 | - convertedMatrix = (double*)mkl_malloc( sizeof(double)*size*size, 16 ); | |
127 | - tempEigenValues = (double*)mkl_malloc( sizeof(double)*size, 16 ); | |
143 | + work = (double*)MOLDS_LAPACK_malloc( sizeof(double)*lwork, 16 ); | |
144 | + iwork = (molds_lapack_int*)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)*liwork, 16 ); | |
145 | + convertedMatrix = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size*size, 16 ); | |
146 | + tempEigenValues = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size, 16 ); | |
128 | 147 | |
129 | 148 | for(molds_lapack_int i = 0; i < size; i++){ |
130 | 149 | for(molds_lapack_int j = i; j < size; j++){ |
@@ -133,7 +152,11 @@ molds_lapack_int Lapack::Dsyevd(double** matrix, double* eigenValues, molds_lapa | ||
133 | 152 | } |
134 | 153 | |
135 | 154 | // call Lapack |
155 | +#ifdef __INTEL_COMPILER | |
136 | 156 | dsyevd(&job, &uplo, &size, convertedMatrix, &lda, tempEigenValues, work, &lwork, iwork, &liwork, &info); |
157 | +#else | |
158 | + info = LAPACKE_dsyevd_work(LAPACK_COL_MAJOR, job, uplo, size, convertedMatrix, lda, tempEigenValues, work, lwork, iwork, liwork); | |
159 | +#endif | |
137 | 160 | |
138 | 161 | for(molds_lapack_int i = 0; i < size; i++){ |
139 | 162 | for(molds_lapack_int j = 0; j < size; j++){ |
@@ -160,10 +183,10 @@ molds_lapack_int Lapack::Dsyevd(double** matrix, double* eigenValues, molds_lapa | ||
160 | 183 | //this->OutputLog(boost::format("size=%d lwork=%d liwork=%d k=%d info=%d\n") % size % lwork % liwork % k % info); |
161 | 184 | |
162 | 185 | // free |
163 | - mkl_free(work); | |
164 | - mkl_free(iwork); | |
165 | - mkl_free(convertedMatrix); | |
166 | - mkl_free(tempEigenValues); | |
186 | + MOLDS_LAPACK_free(work); | |
187 | + MOLDS_LAPACK_free(iwork); | |
188 | + MOLDS_LAPACK_free(convertedMatrix); | |
189 | + MOLDS_LAPACK_free(tempEigenValues); | |
167 | 190 | |
168 | 191 | if(info != 0){ |
169 | 192 | stringstream ss; |
@@ -199,9 +222,9 @@ molds_lapack_int Lapack::Dsysv(double const* const* matrix, double* b, molds_lap | ||
199 | 222 | } |
200 | 223 | |
201 | 224 | // malloc |
202 | - ipiv = (molds_lapack_int*)mkl_malloc( sizeof(molds_lapack_int)*2*size, 16 ); | |
203 | - convertedMatrix = (double*)mkl_malloc( sizeof(double)*size*size, 16 ); | |
204 | - tempB = (double*)mkl_malloc( sizeof(double)*size, 16 ); | |
225 | + ipiv = (molds_lapack_int*)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)*2*size, 16 ); | |
226 | + convertedMatrix = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size*size, 16 ); | |
227 | + tempB = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size, 16 ); | |
205 | 228 | |
206 | 229 | for(molds_lapack_int i = 0; i < size; i++){ |
207 | 230 | for(molds_lapack_int j = i; j < size; j++){ |
@@ -218,24 +241,32 @@ molds_lapack_int Lapack::Dsysv(double const* const* matrix, double* b, molds_lap | ||
218 | 241 | { |
219 | 242 | lwork = -1; |
220 | 243 | double tempWork[3]={0.0, 0.0, 0.0}; |
221 | - dsysv(&uplo, &size, &nrhs, convertedMatrix, &lda, ipiv, tempB, &ldb, tempWork, &lwork, &info); | |
244 | +#ifdef __INTEL_COMPILER | |
245 | + dsysv(&uplo, &size, &nrhs, convertedMatrix, &lda, ipiv, tempB, &ldb, tempWork, &lwork, &info); | |
246 | +#else | |
247 | + info = LAPACKE_dsysv_work(LAPACK_COL_MAJOR, uplo, size, nrhs, convertedMatrix, lda, ipiv, tempB, ldb, tempWork, lwork); | |
248 | +#endif | |
222 | 249 | blockSize = tempWork[0]/size; |
223 | 250 | } |
224 | 251 | info = 0; |
225 | 252 | lwork = blockSize*size; |
226 | - work = (double*)mkl_malloc( sizeof(double)*lwork, 16 ); | |
253 | + work = (double*)MOLDS_LAPACK_malloc( sizeof(double)*lwork, 16 ); | |
227 | 254 | |
228 | 255 | // call Lapack |
256 | +#ifdef __INTEL_COMPILER | |
229 | 257 | dsysv(&uplo, &size, &nrhs, convertedMatrix, &lda, ipiv, tempB, &ldb, work, &lwork, &info); |
258 | +#else | |
259 | + info = LAPACKE_dsysv_work(LAPACK_COL_MAJOR, uplo, size, nrhs, convertedMatrix, lda, ipiv, tempB, ldb, work, lwork); | |
260 | +#endif | |
230 | 261 | for(molds_lapack_int i = 0; i < size; i++){ |
231 | 262 | b[i] = tempB[i]; |
232 | 263 | } |
233 | 264 | |
234 | 265 | // free |
235 | - mkl_free(convertedMatrix); | |
236 | - mkl_free(ipiv); | |
237 | - mkl_free(work); | |
238 | - mkl_free(tempB); | |
266 | + MOLDS_LAPACK_free(convertedMatrix); | |
267 | + MOLDS_LAPACK_free(ipiv); | |
268 | + MOLDS_LAPACK_free(work); | |
269 | + MOLDS_LAPACK_free(tempB); | |
239 | 270 | |
240 | 271 | if(info != 0){ |
241 | 272 | stringstream ss; |
@@ -271,9 +302,9 @@ molds_lapack_int Lapack::Dgetrs(double const* const* matrix, double** b, molds_l | ||
271 | 302 | |
272 | 303 | try{ |
273 | 304 | // malloc |
274 | - ipiv = (molds_lapack_int*)mkl_malloc( sizeof(molds_lapack_int)*2*size, 16 ); | |
275 | - convertedMatrix = (double*)mkl_malloc( sizeof(double)*size*size, 16 ); | |
276 | - convertedB = (double*)mkl_malloc( sizeof(double)*nrhs*size, 16 ); | |
305 | + ipiv = (molds_lapack_int*)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)*2*size, 16 ); | |
306 | + convertedMatrix = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size*size, 16 ); | |
307 | + convertedB = (double*)MOLDS_LAPACK_malloc( sizeof(double)*nrhs*size, 16 ); | |
277 | 308 | for(molds_lapack_int i = 0; i < size; i++){ |
278 | 309 | for(molds_lapack_int j = 0; j < size; j++){ |
279 | 310 | convertedMatrix[i+j*size] = matrix[i][j]; |
@@ -285,7 +316,11 @@ molds_lapack_int Lapack::Dgetrs(double const* const* matrix, double** b, molds_l | ||
285 | 316 | } |
286 | 317 | } |
287 | 318 | this->Dgetrf(convertedMatrix, ipiv, size, size); |
319 | +#ifdef __INTEL_COMPILER | |
288 | 320 | dgetrs(&trans, &size, &nrhs, convertedMatrix, &lda, ipiv, convertedB, &ldb, &info); |
321 | +#else | |
322 | + info = LAPACKE_dgetrs_work(LAPACK_COL_MAJOR, trans, size, nrhs, convertedMatrix, lda, ipiv, convertedB, ldb); | |
323 | +#endif | |
289 | 324 | for(molds_lapack_int i = 0; i < nrhs; i++){ |
290 | 325 | for(molds_lapack_int j = 0; j < size; j++){ |
291 | 326 | b[i][j] = convertedB[j+i*size]; |
@@ -294,15 +329,15 @@ molds_lapack_int Lapack::Dgetrs(double const* const* matrix, double** b, molds_l | ||
294 | 329 | } |
295 | 330 | catch(MolDSException ex){ |
296 | 331 | // free |
297 | - mkl_free(convertedMatrix); | |
298 | - mkl_free(convertedB); | |
299 | - mkl_free(ipiv); | |
332 | + MOLDS_LAPACK_free(convertedMatrix); | |
333 | + MOLDS_LAPACK_free(convertedB); | |
334 | + MOLDS_LAPACK_free(ipiv); | |
300 | 335 | throw ex; |
301 | 336 | } |
302 | 337 | // free |
303 | - mkl_free(convertedMatrix); | |
304 | - mkl_free(convertedB); | |
305 | - mkl_free(ipiv); | |
338 | + MOLDS_LAPACK_free(convertedMatrix); | |
339 | + MOLDS_LAPACK_free(convertedB); | |
340 | + MOLDS_LAPACK_free(ipiv); | |
306 | 341 | |
307 | 342 | if(info != 0){ |
308 | 343 | stringstream ss; |
@@ -316,9 +351,9 @@ molds_lapack_int Lapack::Dgetrs(double const* const* matrix, double** b, molds_l | ||
316 | 351 | // Argument "matrix" is sizeM*sizeN matrix. |
317 | 352 | // Argument "matrix" will be LU-decomposed. |
318 | 353 | molds_lapack_int Lapack::Dgetrf(double** matrix, molds_lapack_int sizeM, molds_lapack_int sizeN) const{ |
319 | - molds_lapack_int* ipiv = (molds_lapack_int*)mkl_malloc( sizeof(molds_lapack_int)*2*sizeM, 16 ); | |
354 | + molds_lapack_int* ipiv = (molds_lapack_int*)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)*2*sizeM, 16 ); | |
320 | 355 | this->Dgetrf(matrix, ipiv, sizeM, sizeN); |
321 | - mkl_free(ipiv); | |
356 | + MOLDS_LAPACK_free(ipiv); | |
322 | 357 | molds_lapack_int info = 0; |
323 | 358 | return info; |
324 | 359 | } |
@@ -326,7 +361,7 @@ molds_lapack_int Lapack::Dgetrf(double** matrix, molds_lapack_int sizeM, molds_l | ||
326 | 361 | // Argument "matrix" is sizeM*sizeN matrix. |
327 | 362 | // Argument "matrix" will be LU-decomposed. |
328 | 363 | molds_lapack_int Lapack::Dgetrf(double** matrix, molds_lapack_int* ipiv, molds_lapack_int sizeM, molds_lapack_int sizeN) const{ |
329 | - double* convertedMatrix = (double*)mkl_malloc( sizeof(double)*sizeM*sizeN, 16 ); | |
364 | + double* convertedMatrix = (double*)MOLDS_LAPACK_malloc( sizeof(double)*sizeM*sizeN, 16 ); | |
330 | 365 | for(molds_lapack_int i=0; i<sizeM; i++){ |
331 | 366 | for(molds_lapack_int j=0; j<sizeN; j++){ |
332 | 367 | convertedMatrix[i+j*sizeM] = matrix[i][j]; |
@@ -338,7 +373,7 @@ molds_lapack_int Lapack::Dgetrf(double** matrix, molds_lapack_int* ipiv, molds_l | ||
338 | 373 | matrix[i][j] = convertedMatrix[i+j*sizeM]; |
339 | 374 | } |
340 | 375 | } |
341 | - mkl_free(convertedMatrix); | |
376 | + MOLDS_LAPACK_free(convertedMatrix); | |
342 | 377 | molds_lapack_int info = 0; |
343 | 378 | return info; |
344 | 379 | } |
@@ -348,7 +383,11 @@ molds_lapack_int Lapack::Dgetrf(double** matrix, molds_lapack_int* ipiv, molds_l | ||
348 | 383 | molds_lapack_int Lapack::Dgetrf(double* matrix, molds_lapack_int* ipiv, molds_lapack_int sizeM, molds_lapack_int sizeN) const{ |
349 | 384 | molds_lapack_int info = 0; |
350 | 385 | molds_lapack_int lda = sizeM; |
386 | +#ifdef __INTEL_COMPILER | |
351 | 387 | dgetrf(&sizeM, &sizeN, matrix, &lda, ipiv, &info); |
388 | +#else | |
389 | + info = LAPACKE_dgetrf_work(LAPACK_COL_MAJOR, sizeM, sizeN, matrix, lda, ipiv); | |
390 | +#endif | |
352 | 391 | if(info != 0){ |
353 | 392 | stringstream ss; |
354 | 393 | ss << errorMessageDgetrfInfo; |