リビジョン | 5613cccc631750dc5dfa929dce1de388f9e39eca (tree) |
---|---|
日時 | 2013-08-19 19:05:47 |
作者 | Mikiya Fujii <mikiya.fujii@gmai...> |
コミッター | Mikiya Fujii |
Asynchronous MPI communication is implemented. #31814
git-svn-id: https://svn.sourceforge.jp/svnroot/molds/trunk@1474 1136aad2-a195-0410-b898-f5ea1d11b9d8
@@ -26,7 +26,7 @@ endif | ||
26 | 26 | BOOST_TOP_DIR = /usr/local/boost/ |
27 | 27 | BOOST_INC_DIR = $(BOOST_TOP_DIR)include/ |
28 | 28 | BOOST_LIB_DIR = $(BOOST_TOP_DIR)lib/ |
29 | -BOOST_LIBS = -lboost_serialization -lboost_mpi | |
29 | +BOOST_LIBS = -lboost_serialization -lboost_mpi -lboost_thread | |
30 | 30 | LIBSBASE = -lmkl_intel_thread -lmkl_core -liomp5 -lpthread |
31 | 31 | ifeq ($(INTEL), 64) |
32 | 32 | LIBS = -lmkl_intel_ilp64 $(LIBSBASE) $(BOOST_LIBS) |
@@ -37,9 +37,9 @@ EXENAME = MolDS.out | ||
37 | 37 | DEPFILE = obj/objfile.dep |
38 | 38 | LDFLAGS = |
39 | 39 | |
40 | -ALL_CPP_FILES = base/Enums.cpp base/PrintController.cpp base/MolDSException.cpp base/MallocerFreer.cpp mpi/MpiProcess.cpp wrappers/Blas.cpp wrappers/Lapack.cpp base/Utilities.cpp base/MathUtilities.cpp base/EularAngle.cpp base/Parameters.cpp base/atoms/Atom.cpp base/atoms/Hatom.cpp base/atoms/Liatom.cpp base/atoms/Catom.cpp base/atoms/Natom.cpp base/atoms/Oatom.cpp base/atoms/Satom.cpp base/factories/AtomFactory.cpp base/Molecule.cpp base/InputParser.cpp base/GTOExpansionSTO.cpp base/RealSphericalHarmonicsIndex.cpp base/loggers/MOLogger.cpp base/loggers/DensityLogger.cpp base/loggers/HoleDensityLogger.cpp base/loggers/ParticleDensityLogger.cpp cndo/Cndo2.cpp indo/Indo.cpp zindo/ZindoS.cpp mndo/Mndo.cpp am1/Am1.cpp am1/Am1D.cpp pm3/Pm3.cpp pm3/Pm3D.cpp pm3/Pm3Pddg.cpp base/factories/ElectronicStructureFactory.cpp md/MD.cpp mc/MC.cpp rpmd/RPMD.cpp nasco/NASCO.cpp optimization/Optimizer.cpp optimization/ConjugateGradient.cpp optimization/SteepestDescent.cpp optimization/BFGS.cpp base/factories/OptimizerFactory.cpp base/MolDS.cpp Main.cpp | |
41 | -ALL_HEAD_FILES = base/Enums.h base/Uncopyable.h base/PrintController.h base/MolDSException.h base/MallocerFreer.h mpi/MpiProcess.h wrappers/Blas.h wrappers/Lapack.h base/Utilities.h base/MathUtilities.h base/EularAngle.h base/Parameters.h base/atoms/Atom.h base/atoms/Hatom.h base/atoms/Liatom.h base/atoms/Catom.h base/atoms/Natom.h base/atoms/Oatom.h base/atoms/Satom.h base/factories/AtomFactory.h base/Molecule.h base/InputParser.h base/GTOExpansionSTO.h base/RealSphericalHarmonicsIndex.h base/loggers/MOLogger.h base/loggers/DensityLogger.h base/loggers/HoleDensityLogger.h base/loggers/ParticleDensityLogger.h base/ElectronicStructure.h cndo/Cndo2.h cndo/ReducedOverlapAOsParameters.h indo/Indo.h zindo/ZindoS.h mndo/Mndo.h am1/Am1.h am1/Am1D.h pm3/Pm3.h pm3/Pm3D.h pm3/Pm3Pddg.h base/factories/ElectronicStructureFactory.h md/MD.h mc/MC.h rpmd/RPMD.h nasco/NASCO.h optimization/Optimizer.h optimization/ConjugateGradient.h optimization/SteepestDescent.h optimization/BFGS.h base/factories/OptimizerFactory.h base/MolDS.h | |
42 | -ALL_OBJ_FILES = obj/Enums.o obj/PrintController.o obj/MolDSException.o obj/MallocerFreer.o obj/MpiProcess.o obj/Blas.o obj/Lapack.o obj/Utilities.o obj/MathUtilities.o obj/EularAngle.o obj/Parameters.o obj/Atom.o obj/Hatom.o obj/Liatom.o obj/Catom.o obj/Natom.o obj/Oatom.o obj/Satom.o obj/AtomFactory.o obj/Molecule.o obj/InputParser.o obj/GTOExpansionSTO.o obj/RealSphericalHarmonicsIndex.o obj/MOLogger.o obj/DensityLogger.o obj/HoleDensityLogger.o obj/ParticleDensityLogger.o obj/Cndo2.o obj/Indo.o obj/ZindoS.o obj/Mndo.o obj/Am1.o obj/Am1D.o obj/Pm3.o obj/Pm3D.o obj/Pm3Pddg.o obj/ElectronicStructureFactory.o obj/MD.o obj/MC.o obj/RPMD.o obj/NASCO.o obj/Optimizer.o obj/ConjugateGradient.o obj/SteepestDescent.o obj/BFGS.o obj/OptimizerFactory.o obj/MolDS.o obj/Main.o | |
40 | +ALL_CPP_FILES = base/Enums.cpp base/PrintController.cpp base/MolDSException.cpp base/MallocerFreer.cpp mpi/MpiProcess.cpp mpi/AsyncCommunicator.cpp wrappers/Blas.cpp wrappers/Lapack.cpp base/Utilities.cpp base/MathUtilities.cpp base/EularAngle.cpp base/Parameters.cpp base/atoms/Atom.cpp base/atoms/Hatom.cpp base/atoms/Liatom.cpp base/atoms/Catom.cpp base/atoms/Natom.cpp base/atoms/Oatom.cpp base/atoms/Satom.cpp base/factories/AtomFactory.cpp base/Molecule.cpp base/InputParser.cpp base/GTOExpansionSTO.cpp base/RealSphericalHarmonicsIndex.cpp base/loggers/MOLogger.cpp base/loggers/DensityLogger.cpp base/loggers/HoleDensityLogger.cpp base/loggers/ParticleDensityLogger.cpp cndo/Cndo2.cpp indo/Indo.cpp zindo/ZindoS.cpp mndo/Mndo.cpp am1/Am1.cpp am1/Am1D.cpp pm3/Pm3.cpp pm3/Pm3D.cpp pm3/Pm3Pddg.cpp base/factories/ElectronicStructureFactory.cpp md/MD.cpp mc/MC.cpp rpmd/RPMD.cpp nasco/NASCO.cpp optimization/Optimizer.cpp optimization/ConjugateGradient.cpp optimization/SteepestDescent.cpp optimization/BFGS.cpp base/factories/OptimizerFactory.cpp base/MolDS.cpp Main.cpp | |
41 | +ALL_HEAD_FILES = base/Enums.h base/Uncopyable.h base/PrintController.h base/MolDSException.h base/containers/ThreadSafeQueue.h base/MallocerFreer.h mpi/MpiProcess.h mpi/AsyncCommunicator.h wrappers/Blas.h wrappers/Lapack.h base/Utilities.h base/MathUtilities.h base/EularAngle.h base/Parameters.h base/atoms/Atom.h base/atoms/Hatom.h base/atoms/Liatom.h base/atoms/Catom.h base/atoms/Natom.h base/atoms/Oatom.h base/atoms/Satom.h base/factories/AtomFactory.h base/Molecule.h base/InputParser.h base/GTOExpansionSTO.h base/RealSphericalHarmonicsIndex.h base/loggers/MOLogger.h base/loggers/DensityLogger.h base/loggers/HoleDensityLogger.h base/loggers/ParticleDensityLogger.h base/ElectronicStructure.h cndo/Cndo2.h cndo/ReducedOverlapAOsParameters.h indo/Indo.h zindo/ZindoS.h mndo/Mndo.h am1/Am1.h am1/Am1D.h pm3/Pm3.h pm3/Pm3D.h pm3/Pm3Pddg.h base/factories/ElectronicStructureFactory.h md/MD.h mc/MC.h rpmd/RPMD.h nasco/NASCO.h optimization/Optimizer.h optimization/ConjugateGradient.h optimization/SteepestDescent.h optimization/BFGS.h base/factories/OptimizerFactory.h base/MolDS.h | |
42 | +ALL_OBJ_FILES = obj/Enums.o obj/PrintController.o obj/MolDSException.o obj/MallocerFreer.o obj/MpiProcess.o obj/AsyncCommunicator.o obj/Blas.o obj/Lapack.o obj/Utilities.o obj/MathUtilities.o obj/EularAngle.o obj/Parameters.o obj/Atom.o obj/Hatom.o obj/Liatom.o obj/Catom.o obj/Natom.o obj/Oatom.o obj/Satom.o obj/AtomFactory.o obj/Molecule.o obj/InputParser.o obj/GTOExpansionSTO.o obj/RealSphericalHarmonicsIndex.o obj/MOLogger.o obj/DensityLogger.o obj/HoleDensityLogger.o obj/ParticleDensityLogger.o obj/Cndo2.o obj/Indo.o obj/ZindoS.o obj/Mndo.o obj/Am1.o obj/Am1D.o obj/Pm3.o obj/Pm3D.o obj/Pm3Pddg.o obj/ElectronicStructureFactory.o obj/MD.o obj/MC.o obj/RPMD.o obj/NASCO.o obj/Optimizer.o obj/ConjugateGradient.o obj/SteepestDescent.o obj/BFGS.o obj/OptimizerFactory.o obj/MolDS.o obj/Main.o | |
43 | 43 | |
44 | 44 | $(EXENAME): $(ALL_OBJ_FILES) |
45 | 45 | $(CC) -o $@ -Wl,-rpath=$(BOOST_LIB_DIR) -L$(BOOST_LIB_DIR) $(LDFLAGS) $(ALL_OBJ_FILES) $(LIBS) |
@@ -23,7 +23,7 @@ override CFLAGS += -fopenmp | ||
23 | 23 | BOOST_TOP_DIR = /usr/local/boost/ |
24 | 24 | BOOST_INC_DIR = $(BOOST_TOP_DIR)include/ |
25 | 25 | BOOST_LIB_DIR = $(BOOST_TOP_DIR)lib/ |
26 | -BOOST_LIBS = -lboost_serialization -lboost_mpi | |
26 | +BOOST_LIBS = -lboost_serialization -lboost_mpi -lboost_thread | |
27 | 27 | OPENBLAS_TOP_DIR = /usr/local/openblas/ |
28 | 28 | OPENBLAS_INC_DIR = $(OPENBLAS_TOP_DIR)include/ |
29 | 29 | OPENBLAS_LIB_DIR = $(OPENBLAS_TOP_DIR)lib/ |
@@ -34,9 +34,9 @@ EXENAME = MolDS.out | ||
34 | 34 | DEPFILE = obj/objfile.dep |
35 | 35 | LDFLAGS = |
36 | 36 | |
37 | -ALL_CPP_FILES = base/Enums.cpp base/PrintController.cpp base/MolDSException.cpp base/MallocerFreer.cpp mpi/MpiProcess.cpp wrappers/Blas.cpp wrappers/Lapack.cpp base/Utilities.cpp base/MathUtilities.cpp base/EularAngle.cpp base/Parameters.cpp base/atoms/Atom.cpp base/atoms/Hatom.cpp base/atoms/Liatom.cpp base/atoms/Catom.cpp base/atoms/Natom.cpp base/atoms/Oatom.cpp base/atoms/Satom.cpp base/factories/AtomFactory.cpp base/Molecule.cpp base/InputParser.cpp base/GTOExpansionSTO.cpp base/RealSphericalHarmonicsIndex.cpp base/loggers/MOLogger.cpp base/loggers/DensityLogger.cpp base/loggers/HoleDensityLogger.cpp base/loggers/ParticleDensityLogger.cpp cndo/Cndo2.cpp indo/Indo.cpp zindo/ZindoS.cpp mndo/Mndo.cpp am1/Am1.cpp am1/Am1D.cpp pm3/Pm3.cpp pm3/Pm3D.cpp pm3/Pm3Pddg.cpp base/factories/ElectronicStructureFactory.cpp md/MD.cpp mc/MC.cpp rpmd/RPMD.cpp nasco/NASCO.cpp optimization/Optimizer.cpp optimization/ConjugateGradient.cpp optimization/SteepestDescent.cpp optimization/BFGS.cpp base/factories/OptimizerFactory.cpp base/MolDS.cpp Main.cpp | |
38 | -ALL_HEAD_FILES = base/Enums.h base/Uncopyable.h base/PrintController.h base/MolDSException.h base/MallocerFreer.h mpi/MpiProcess.h wrappers/Blas.h wrappers/Lapack.h base/Utilities.h base/MathUtilities.h base/EularAngle.h base/Parameters.h base/atoms/Atom.h base/atoms/Hatom.h base/atoms/Liatom.h base/atoms/Catom.h base/atoms/Natom.h base/atoms/Oatom.h base/atoms/Satom.h base/factories/AtomFactory.h base/Molecule.h base/InputParser.h base/GTOExpansionSTO.h base/RealSphericalHarmonicsIndex.h base/loggers/MOLogger.h base/loggers/DensityLogger.h base/loggers/HoleDensityLogger.h base/loggers/ParticleDensityLogger.h base/ElectronicStructure.h cndo/Cndo2.h cndo/ReducedOverlapAOsParameters.h indo/Indo.h zindo/ZindoS.h mndo/Mndo.h am1/Am1.h am1/Am1D.h pm3/Pm3.h pm3/Pm3D.h pm3/Pm3Pddg.h base/factories/ElectronicStructureFactory.h md/MD.h mc/MC.h rpmd/RPMD.h nasco/NASCO.h optimization/Optimizer.h optimization/ConjugateGradient.h optimization/SteepestDescent.h optimization/BFGS.h base/factories/OptimizerFactory.h base/MolDS.h | |
39 | -ALL_OBJ_FILES = obj/Enums.o obj/PrintController.o obj/MolDSException.o obj/MallocerFreer.o obj/MpiProcess.o obj/Blas.o obj/Lapack.o obj/Utilities.o obj/MathUtilities.o obj/EularAngle.o obj/Parameters.o obj/Atom.o obj/Hatom.o obj/Liatom.o obj/Catom.o obj/Natom.o obj/Oatom.o obj/Satom.o obj/AtomFactory.o obj/Molecule.o obj/InputParser.o obj/GTOExpansionSTO.o obj/RealSphericalHarmonicsIndex.o obj/MOLogger.o obj/DensityLogger.o obj/HoleDensityLogger.o obj/ParticleDensityLogger.o obj/Cndo2.o obj/Indo.o obj/ZindoS.o obj/Mndo.o obj/Am1.o obj/Am1D.o obj/Pm3.o obj/Pm3D.o obj/Pm3Pddg.o obj/ElectronicStructureFactory.o obj/MD.o obj/MC.o obj/RPMD.o obj/NASCO.o obj/Optimizer.o obj/ConjugateGradient.o obj/SteepestDescent.o obj/BFGS.o obj/OptimizerFactory.o obj/MolDS.o obj/Main.o | |
37 | +ALL_CPP_FILES = base/Enums.cpp base/PrintController.cpp base/MolDSException.cpp base/MallocerFreer.cpp mpi/MpiProcess.cpp mpi/AsyncCommunicator.cpp wrappers/Blas.cpp wrappers/Lapack.cpp base/Utilities.cpp base/MathUtilities.cpp base/EularAngle.cpp base/Parameters.cpp base/atoms/Atom.cpp base/atoms/Hatom.cpp base/atoms/Liatom.cpp base/atoms/Catom.cpp base/atoms/Natom.cpp base/atoms/Oatom.cpp base/atoms/Satom.cpp base/factories/AtomFactory.cpp base/Molecule.cpp base/InputParser.cpp base/GTOExpansionSTO.cpp base/RealSphericalHarmonicsIndex.cpp base/loggers/MOLogger.cpp base/loggers/DensityLogger.cpp base/loggers/HoleDensityLogger.cpp base/loggers/ParticleDensityLogger.cpp cndo/Cndo2.cpp indo/Indo.cpp zindo/ZindoS.cpp mndo/Mndo.cpp am1/Am1.cpp am1/Am1D.cpp pm3/Pm3.cpp pm3/Pm3D.cpp pm3/Pm3Pddg.cpp base/factories/ElectronicStructureFactory.cpp md/MD.cpp mc/MC.cpp rpmd/RPMD.cpp nasco/NASCO.cpp optimization/Optimizer.cpp optimization/ConjugateGradient.cpp optimization/SteepestDescent.cpp optimization/BFGS.cpp base/factories/OptimizerFactory.cpp base/MolDS.cpp Main.cpp | |
38 | +ALL_HEAD_FILES = base/Enums.h base/Uncopyable.h base/PrintController.h base/MolDSException.h base/containers/ThreadSafeQueue.h base/MallocerFreer.h mpi/MpiProcess.h mpi/AsyncCommunicator.h wrappers/Blas.h wrappers/Lapack.h base/Utilities.h base/MathUtilities.h base/EularAngle.h base/Parameters.h base/atoms/Atom.h base/atoms/Hatom.h base/atoms/Liatom.h base/atoms/Catom.h base/atoms/Natom.h base/atoms/Oatom.h base/atoms/Satom.h base/factories/AtomFactory.h base/Molecule.h base/InputParser.h base/GTOExpansionSTO.h base/RealSphericalHarmonicsIndex.h base/loggers/MOLogger.h base/loggers/DensityLogger.h base/loggers/HoleDensityLogger.h base/loggers/ParticleDensityLogger.h base/ElectronicStructure.h cndo/Cndo2.h cndo/ReducedOverlapAOsParameters.h indo/Indo.h zindo/ZindoS.h mndo/Mndo.h am1/Am1.h am1/Am1D.h pm3/Pm3.h pm3/Pm3D.h pm3/Pm3Pddg.h base/factories/ElectronicStructureFactory.h md/MD.h mc/MC.h rpmd/RPMD.h nasco/NASCO.h optimization/Optimizer.h optimization/ConjugateGradient.h optimization/SteepestDescent.h optimization/BFGS.h base/factories/OptimizerFactory.h base/MolDS.h | |
39 | +ALL_OBJ_FILES = obj/Enums.o obj/PrintController.o obj/MolDSException.o obj/MallocerFreer.o obj/MpiProcess.o obj/AsyncCommunicator.o obj/Blas.o obj/Lapack.o obj/Utilities.o obj/MathUtilities.o obj/EularAngle.o obj/Parameters.o obj/Atom.o obj/Hatom.o obj/Liatom.o obj/Catom.o obj/Natom.o obj/Oatom.o obj/Satom.o obj/AtomFactory.o obj/Molecule.o obj/InputParser.o obj/GTOExpansionSTO.o obj/RealSphericalHarmonicsIndex.o obj/MOLogger.o obj/DensityLogger.o obj/HoleDensityLogger.o obj/ParticleDensityLogger.o obj/Cndo2.o obj/Indo.o obj/ZindoS.o obj/Mndo.o obj/Am1.o obj/Am1D.o obj/Pm3.o obj/Pm3D.o obj/Pm3Pddg.o obj/ElectronicStructureFactory.o obj/MD.o obj/MC.o obj/RPMD.o obj/NASCO.o obj/Optimizer.o obj/ConjugateGradient.o obj/SteepestDescent.o obj/BFGS.o obj/OptimizerFactory.o obj/MolDS.o obj/Main.o | |
40 | 40 | |
41 | 41 | $(EXENAME): $(ALL_OBJ_FILES) |
42 | 42 | $(CC) -o $@ $(LDFLAGS) -Wl,-rpath=$(BOOST_LIB_DIR) -Wl,-rpath=$(OPENBLAS_LIB_DIR) $(LDFLAGS) $(ALL_OBJ_FILES) -L$(BOOST_LIB_DIR) -L$(OPENBLAS_LIB_DIR) $(LIBS) |
@@ -154,9 +154,17 @@ RENUMSTR_END() | ||
154 | 154 | |
155 | 155 | RENUMSTR_BEGIN( ExceptionKey, ExceptionKeyStr ) |
156 | 156 | RENUMSTR( LapackInfo, "LapackInfo" ) |
157 | + RENUMSTR( EmptyQueue, "EmptyQueue" ) | |
157 | 158 | RENUMSTR( ExceptionKey_end, "ExceptionKey_end" ) |
158 | 159 | RENUMSTR_END() |
159 | 160 | |
161 | +RENUMSTR_BEGIN( MpiFunctionType, MpiFunctionTypeStr ) | |
162 | + RENUMSTR( Send, "Send" ) | |
163 | + RENUMSTR( Recv, "Recv" ) | |
164 | + RENUMSTR( Broadcast, "Broadcast" ) | |
165 | + RENUMSTR( MpiFunctionType_end, "MpiFunctionType_end" ) | |
166 | +RENUMSTR_END() | |
167 | + | |
160 | 168 | } |
161 | 169 | #endif |
162 | 170 |
@@ -24,6 +24,7 @@ | ||
24 | 24 | #include<string> |
25 | 25 | #include<vector> |
26 | 26 | #include<stdexcept> |
27 | +#include<omp.h> | |
27 | 28 | #include<boost/format.hpp> |
28 | 29 | #include"Enums.h" |
29 | 30 | #include"Uncopyable.h" |
@@ -0,0 +1,41 @@ | ||
1 | +//************************************************************************// | |
2 | +// Copyright (C) 2011-2013 Mikiya Fujii // | |
3 | +// // | |
4 | +// This file is part of MolDS. // | |
5 | +// // | |
6 | +// MolDS is free software: you can redistribute it and/or modify // | |
7 | +// it under the terms of the GNU General Public License as published by // | |
8 | +// the Free Software Foundation, either version 3 of the License, or // | |
9 | +// (at your option) any later version. // | |
10 | +// // | |
11 | +// MolDS is distributed in the hope that it will be useful, // | |
12 | +// but WITHOUT ANY WARRANTY; without even the implied warranty of // | |
13 | +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // | |
14 | +// GNU General Public License for more details. // | |
15 | +// // | |
16 | +// You should have received a copy of the GNU General Public License // | |
17 | +// along with MolDS. If not, see <http://www.gnu.org/licenses/>. // | |
18 | +//************************************************************************// | |
19 | +#include<stdio.h> | |
20 | +#include<sstream> | |
21 | +#include<queue> | |
22 | +#include<boost/format.hpp> | |
23 | +#include"../Enums.h" | |
24 | +#include"../Uncopyable.h" | |
25 | +#include"../PrintController.h" | |
26 | +#include"../MolDSException.h" | |
27 | +#include"ThreadSafeQueue.h" | |
28 | +using namespace std; | |
29 | +//using namespace MolDS_base; | |
30 | + | |
31 | +namespace MolDS_base_containers{ | |
32 | + int ThreadSafeQueue::Size(){ | |
33 | + boost::mutex::scoped_lock lk(this->stateGuard); | |
34 | + return this->stdQueue.size(); | |
35 | + } | |
36 | + | |
37 | + bool ThreadSafeQueue::Empty(){ | |
38 | + boost::mutex::scoped_lock lk(this->stateGuard); | |
39 | + return this->stdQueue.empty(); | |
40 | + } | |
41 | +}; |
@@ -0,0 +1,75 @@ | ||
1 | +//************************************************************************// | |
2 | +// Copyright (C) 2011-2013 Mikiya Fujii // | |
3 | +// Copyright (C) 2012-2012 Katushiko Nishimra // | |
4 | +// // | |
5 | +// This file is part of MolDS. // | |
6 | +// // | |
7 | +// MolDS is free software: you can redistribute it and/or modify // | |
8 | +// it under the terms of the GNU General Public License as published by // | |
9 | +// the Free Software Foundation, either version 3 of the License, or // | |
10 | +// (at your option) any later version. // | |
11 | +// // | |
12 | +// MolDS is distributed in the hope that it will be useful, // | |
13 | +// but WITHOUT ANY WARRANTY; without even the implied warranty of // | |
14 | +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // | |
15 | +// GNU General Public License for more details. // | |
16 | +// // | |
17 | +// You should have received a copy of the GNU General Public License // | |
18 | +// along with MolDS. If not, see <http://www.gnu.org/licenses/>. // | |
19 | +//************************************************************************// | |
20 | +#ifndef INCLUDED_THREADSAFEQUEQUE | |
21 | +#define INCLUDED_THREADSAFEQUEQUE | |
22 | +#include<queue> | |
23 | +#include<boost/shared_ptr.hpp> | |
24 | +#include<boost/thread.hpp> | |
25 | +#include<boost/thread/condition.hpp> | |
26 | +namespace MolDS_base_containers{ | |
27 | + | |
28 | +// This Queue class is thread-safe | |
29 | + | |
30 | +template <typename T> | |
31 | +class ThreadSafeQueue | |
32 | +{ | |
33 | +public: | |
34 | + ThreadSafeQueue(){} | |
35 | + ~ThreadSafeQueue(){} | |
36 | + | |
37 | + void Push(const T& data){ | |
38 | + boost::mutex::scoped_lock lk(this->stateGuard); | |
39 | + this->stdQueue.push(data); | |
40 | + this->stateChange.notify_all(); | |
41 | + } | |
42 | + | |
43 | + T FrontPop(){ | |
44 | + boost::mutex::scoped_lock lk(this->stateGuard); | |
45 | + if(this->stdQueue.empty()){ | |
46 | + std::stringstream ss; | |
47 | + ss << "naitive queue has no member\n"; | |
48 | + MolDS_base::MolDSException ex(ss.str()); | |
49 | + int info = 0; | |
50 | + ex.SetKeyValue<int>(MolDS_base::EmptyQueue, info); | |
51 | + throw ex; | |
52 | + } | |
53 | + T ret = this->stdQueue.front(); | |
54 | + this->stdQueue.pop(); | |
55 | + this->stateChange.notify_all(); | |
56 | + return ret; | |
57 | + } | |
58 | + | |
59 | + int Size(){ | |
60 | + boost::mutex::scoped_lock lk(this->stateGuard); | |
61 | + return this->stdQueue.size(); | |
62 | + } | |
63 | + | |
64 | + bool Empty(){ | |
65 | + boost::mutex::scoped_lock lk(this->stateGuard); | |
66 | + return this->stdQueue.empty(); | |
67 | + } | |
68 | +private: | |
69 | + std::queue<T> stdQueue; | |
70 | + boost::mutex stateGuard; | |
71 | + boost::condition_variable stateChange; | |
72 | +}; | |
73 | + | |
74 | +} | |
75 | +#endif |
@@ -33,7 +33,9 @@ | ||
33 | 33 | #include"../base/PrintController.h" |
34 | 34 | #include"../base/MolDSException.h" |
35 | 35 | #include"../base/MallocerFreer.h" |
36 | +#include"../base/containers/ThreadSafeQueue.h" | |
36 | 37 | #include"../mpi/MpiProcess.h" |
38 | +#include"../mpi/AsyncCommunicator.h" | |
37 | 39 | #include"../wrappers/Blas.h" |
38 | 40 | #include"../wrappers/Lapack.h" |
39 | 41 | #include"../base/MathUtilities.h" |
@@ -1374,95 +1376,100 @@ void Cndo2::CalcFockMatrix(double** fockMatrix, | ||
1374 | 1376 | double const* atomicElectronPopulation, |
1375 | 1377 | double const* const* const* const* const* const* twoElecTwoCore, |
1376 | 1378 | bool isGuess) const{ |
1377 | - int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank(); | |
1378 | - int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize(); | |
1379 | 1379 | int totalNumberAOs = molecule.GetTotalNumberAOs(); |
1380 | 1380 | int totalNumberAtoms = molecule.GetNumberAtoms(); |
1381 | - MallocerFreer::GetInstance()->Initialize<double>(fockMatrix, | |
1382 | - totalNumberAOs, | |
1383 | - totalNumberAOs); | |
1381 | + | |
1382 | + // MPI setting of each rank | |
1383 | + int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank(); | |
1384 | + int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize(); | |
1385 | + int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank(); | |
1386 | + int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(totalNumberAOs); | |
1387 | + MolDS_mpi::AsyncCommunicator asyncCommunicator; | |
1388 | + boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>, | |
1389 | + &asyncCommunicator, | |
1390 | + mPassingTimes) ); | |
1391 | + | |
1392 | + MallocerFreer::GetInstance()->Initialize<double>(fockMatrix, totalNumberAOs, totalNumberAOs); | |
1384 | 1393 | for(int A=0; A<totalNumberAtoms; A++){ |
1385 | 1394 | const Atom& atomA = *molecule.GetAtom(A); |
1386 | 1395 | int firstAOIndexA = atomA.GetFirstAOIndex(); |
1387 | 1396 | int lastAOIndexA = atomA.GetLastAOIndex(); |
1388 | 1397 | for(int mu=firstAOIndexA; mu<=lastAOIndexA; mu++){ |
1389 | - if(mu%mpiSize != mpiRank){continue;} | |
1390 | - | |
1391 | - stringstream ompErrors; | |
1392 | -#pragma omp parallel for schedule(auto) | |
1393 | - for(int B=A; B<totalNumberAtoms; B++){ | |
1394 | - try{ | |
1395 | - const Atom& atomB = *molecule.GetAtom(B); | |
1396 | - int firstAOIndexB = atomB.GetFirstAOIndex(); | |
1397 | - int lastAOIndexB = atomB.GetLastAOIndex(); | |
1398 | - for(int nu=firstAOIndexB; nu<=lastAOIndexB; nu++){ | |
1399 | - if(mu == nu){ | |
1400 | - // diagonal part | |
1401 | - fockMatrix[mu][mu] = this->GetFockDiagElement(atomA, | |
1402 | - A, | |
1403 | - mu, | |
1404 | - molecule, | |
1405 | - gammaAB, | |
1406 | - orbitalElectronPopulation, | |
1407 | - atomicElectronPopulation, | |
1408 | - twoElecTwoCore, | |
1409 | - isGuess); | |
1410 | - } | |
1411 | - else if(mu < nu){ | |
1412 | - // upper right part | |
1413 | - fockMatrix[mu][nu] = this->GetFockOffDiagElement(atomA, | |
1414 | - atomB, | |
1398 | + int calcRank = mu%mpiSize; | |
1399 | + if(mpiRank == calcRank){ | |
1400 | + int maxThreads = omp_get_max_threads(); | |
1401 | + stringstream ompErrors; | |
1402 | +#pragma omp parallel for schedule(auto) num_threads(maxThreads-1) | |
1403 | + for(int B=A; B<totalNumberAtoms; B++){ | |
1404 | + try{ | |
1405 | + const Atom& atomB = *molecule.GetAtom(B); | |
1406 | + int firstAOIndexB = atomB.GetFirstAOIndex(); | |
1407 | + int lastAOIndexB = atomB.GetLastAOIndex(); | |
1408 | + for(int nu=firstAOIndexB; nu<=lastAOIndexB; nu++){ | |
1409 | + if(mu == nu){ | |
1410 | + // diagonal part | |
1411 | + fockMatrix[mu][mu] = this->GetFockDiagElement(atomA, | |
1415 | 1412 | A, |
1416 | - B, | |
1417 | 1413 | mu, |
1418 | - nu, | |
1419 | 1414 | molecule, |
1420 | 1415 | gammaAB, |
1421 | - overlapAOs, | |
1422 | 1416 | orbitalElectronPopulation, |
1417 | + atomicElectronPopulation, | |
1423 | 1418 | twoElecTwoCore, |
1424 | 1419 | isGuess); |
1425 | - } | |
1426 | - else{ | |
1427 | - // lower left part (not calculated) | |
1428 | - } | |
1429 | - } // end of loop nu | |
1430 | - } // end of try | |
1431 | - catch(MolDSException ex){ | |
1420 | + } | |
1421 | + else if(mu < nu){ | |
1422 | + // upper right part | |
1423 | + fockMatrix[mu][nu] = this->GetFockOffDiagElement(atomA, | |
1424 | + atomB, | |
1425 | + A, | |
1426 | + B, | |
1427 | + mu, | |
1428 | + nu, | |
1429 | + molecule, | |
1430 | + gammaAB, | |
1431 | + overlapAOs, | |
1432 | + orbitalElectronPopulation, | |
1433 | + twoElecTwoCore, | |
1434 | + isGuess); | |
1435 | + } | |
1436 | + else{ | |
1437 | + // lower left part (not calculated) | |
1438 | + } | |
1439 | + } // end of loop nu | |
1440 | + } // end of try | |
1441 | + catch(MolDSException ex){ | |
1432 | 1442 | #pragma omp critical |
1433 | - ex.Serialize(ompErrors); | |
1443 | + ex.Serialize(ompErrors); | |
1444 | + } | |
1445 | + } // end of loop B parallelized with openMP | |
1446 | + // Exception throwing for omp-region | |
1447 | + if(!ompErrors.str().empty()){ | |
1448 | + throw MolDSException::Deserialize(ompErrors); | |
1434 | 1449 | } |
1435 | - } // end of loop B parallelized with openMP | |
1436 | - // Exception throwing for omp-region | |
1437 | - if(!ompErrors.str().empty()){ | |
1438 | - throw MolDSException::Deserialize(ompErrors); | |
1450 | + } // end of if(mpiRank == calcRank) | |
1451 | + | |
1452 | + // set data to gater in mpiHeadRank with asynchronous MPI | |
1453 | + int tag = mu; | |
1454 | + int source = calcRank; | |
1455 | + int dest = mpiHeadRank; | |
1456 | + if(mpiRank == mpiHeadRank && mpiRank != calcRank){ | |
1457 | + asyncCommunicator.SetRecvedVector(&fockMatrix[mu][mu], | |
1458 | + totalNumberAOs-mu, | |
1459 | + source, | |
1460 | + tag); | |
1461 | + } | |
1462 | + if(mpiRank != mpiHeadRank && mpiRank == calcRank){ | |
1463 | + asyncCommunicator.SetSentVector(&fockMatrix[mu][mu], | |
1464 | + totalNumberAOs-mu, | |
1465 | + dest, | |
1466 | + tag); | |
1439 | 1467 | } |
1440 | 1468 | } // end of loop mu parallelized with MPI |
1441 | 1469 | } // end of loop A |
1442 | - | |
1443 | - // communication to collect all matrix data on head-rank | |
1444 | - int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank(); | |
1445 | - if(mpiRank == mpiHeadRank){ | |
1446 | - // receive the matrix data from other ranks | |
1447 | - for(int mu=0; mu<totalNumberAOs; mu++){ | |
1448 | - if(mu%mpiSize == mpiHeadRank){continue;} | |
1449 | - int source = mu%mpiSize; | |
1450 | - int tag = mu; | |
1451 | - MolDS_mpi::MpiProcess::GetInstance()->Recv(source, tag, &fockMatrix[mu][mu], totalNumberAOs-mu); | |
1452 | - } | |
1453 | - } | |
1454 | - else{ | |
1455 | - // send the matrix data to head-rank | |
1456 | - for(int mu=0; mu<totalNumberAOs; mu++){ | |
1457 | - if(mu%mpiSize != mpiRank){continue;} | |
1458 | - int dest = mpiHeadRank; | |
1459 | - int tag = mu; | |
1460 | - MolDS_mpi::MpiProcess::GetInstance()->Send(dest, tag, &fockMatrix[mu][mu], totalNumberAOs-mu); | |
1461 | - } | |
1462 | - } | |
1463 | - // broadcast all matrix data to all rank | |
1464 | - int root=mpiHeadRank; | |
1465 | - MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&fockMatrix[0][0], totalNumberAOs*totalNumberAOs, root); | |
1470 | + // Delete the communication thread. | |
1471 | + communicationThread.join(); | |
1472 | + MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&fockMatrix[0][0], totalNumberAOs*totalNumberAOs, mpiHeadRank); | |
1466 | 1473 | |
1467 | 1474 | /* |
1468 | 1475 | this->OutputLog("fock matrix\n"); |
@@ -1590,104 +1597,109 @@ void Cndo2::CalcAtomicElectronPopulation(double* atomicElectronPopulation, | ||
1590 | 1597 | |
1591 | 1598 | // calculate gammaAB matrix. (B.56) and (B.62) in J. A. Pople book. |
1592 | 1599 | void Cndo2::CalcGammaAB(double** gammaAB, const Molecule& molecule) const{ |
1593 | - int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank(); | |
1594 | - int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize(); | |
1595 | 1600 | int totalAtomNumber = molecule.GetNumberAtoms(); |
1596 | 1601 | |
1602 | + // MPI setting of each rank | |
1603 | + int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank(); | |
1604 | + int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize(); | |
1605 | + int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank(); | |
1606 | + int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(totalAtomNumber); | |
1607 | + MolDS_mpi::AsyncCommunicator asyncCommunicator; | |
1608 | + boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>, | |
1609 | + &asyncCommunicator, | |
1610 | + mPassingTimes) ); | |
1611 | + | |
1597 | 1612 | // This loop (A) is parallelized by MPI |
1598 | 1613 | for(int A=0; A<totalAtomNumber; A++){ |
1599 | - if(A%mpiSize != mpiRank){continue;} | |
1600 | - const Atom& atomA = *molecule.GetAtom(A); | |
1601 | - int na = atomA.GetValenceShellType() + 1; | |
1602 | - double orbitalExponentA = atomA.GetOrbitalExponent( | |
1603 | - atomA.GetValenceShellType(), s, this->theory); | |
1604 | - stringstream ompErrors; | |
1605 | -#pragma omp parallel for schedule(auto) | |
1606 | - for(int B=A; B<totalAtomNumber; B++){ | |
1607 | - try{ | |
1608 | - const Atom& atomB = *molecule.GetAtom(B); | |
1609 | - int nb = atomB.GetValenceShellType() + 1; | |
1610 | - double orbitalExponentB = atomB.GetOrbitalExponent( | |
1611 | - atomB.GetValenceShellType(), s, this->theory); | |
1612 | - | |
1613 | - double value = 0.0; | |
1614 | - double R = molecule.GetDistanceAtoms(A, B); | |
1615 | - double temp = 0.0; | |
1616 | - if(R>0.0){ | |
1617 | - // (B.56) | |
1618 | - value = pow(0.5*R, 2.0*na); | |
1619 | - value *= this->GetReducedOverlapAOs(2*na-1, 0, 2.0*orbitalExponentA*R, 0); | |
1620 | - | |
1621 | - for(int l=1; l<=2*nb; l++){ | |
1622 | - temp = 0.0; | |
1623 | - temp = l; | |
1624 | - temp *= pow(2.0*orbitalExponentB, 2*nb-l); | |
1625 | - temp /= Factorial(2*nb-l)*2.0*nb; | |
1626 | - temp *= pow(0.5*R, 2.0*nb-l+2.0*na); | |
1627 | - temp *= this->GetReducedOverlapAOs(2*na-1, | |
1628 | - 2*nb-l, | |
1629 | - 2.0*orbitalExponentA*R, | |
1630 | - 2.0*orbitalExponentB*R); | |
1631 | - value -= temp; | |
1632 | - } | |
1614 | + int calcRank = A%mpiSize; | |
1615 | + if(mpiRank == calcRank){ | |
1616 | + const Atom& atomA = *molecule.GetAtom(A); | |
1617 | + int na = atomA.GetValenceShellType() + 1; | |
1618 | + double orbitalExponentA = atomA.GetOrbitalExponent( | |
1619 | + atomA.GetValenceShellType(), s, this->theory); | |
1620 | + int maxThreads = omp_get_max_threads(); | |
1621 | + stringstream ompErrors; | |
1622 | +#pragma omp parallel for schedule(auto) num_threads(maxThreads-1) | |
1623 | + for(int B=A; B<totalAtomNumber; B++){ | |
1624 | + try{ | |
1625 | + const Atom& atomB = *molecule.GetAtom(B); | |
1626 | + int nb = atomB.GetValenceShellType() + 1; | |
1627 | + double orbitalExponentB = atomB.GetOrbitalExponent( | |
1628 | + atomB.GetValenceShellType(), s, this->theory); | |
1629 | + | |
1630 | + double value = 0.0; | |
1631 | + double R = molecule.GetDistanceAtoms(A, B); | |
1632 | + double temp = 0.0; | |
1633 | + if(R>0.0){ | |
1634 | + // (B.56) | |
1635 | + value = pow(0.5*R, 2.0*na); | |
1636 | + value *= this->GetReducedOverlapAOs(2*na-1, 0, 2.0*orbitalExponentA*R, 0); | |
1637 | + | |
1638 | + for(int l=1; l<=2*nb; l++){ | |
1639 | + temp = 0.0; | |
1640 | + temp = l; | |
1641 | + temp *= pow(2.0*orbitalExponentB, 2*nb-l); | |
1642 | + temp /= Factorial(2*nb-l)*2.0*nb; | |
1643 | + temp *= pow(0.5*R, 2.0*nb-l+2.0*na); | |
1644 | + temp *= this->GetReducedOverlapAOs(2*na-1, | |
1645 | + 2*nb-l, | |
1646 | + 2.0*orbitalExponentA*R, | |
1647 | + 2.0*orbitalExponentB*R); | |
1648 | + value -= temp; | |
1649 | + } | |
1633 | 1650 | |
1634 | - value *= pow(2.0*orbitalExponentA, 2.0*na+1.0); | |
1635 | - value /= Factorial(2*na); | |
1636 | - } | |
1637 | - else{ | |
1638 | - // (B.62) | |
1639 | - value = Factorial(2*na-1); | |
1640 | - value /= pow(2.0*orbitalExponentA, 2.0*na); | |
1641 | - | |
1642 | - for(int l=1; l<=2*nb; l++){ | |
1643 | - temp = l; | |
1644 | - temp *= pow(2.0*orbitalExponentB, 2*nb-l); | |
1645 | - temp *= Factorial(2*na+2*nb-l-1); | |
1646 | - temp /= Factorial(2*nb-l); | |
1647 | - temp /= 2.0*nb; | |
1648 | - temp /= pow( 2.0*orbitalExponentA + 2.0*orbitalExponentB, 2.0*(na+nb)-l ); | |
1649 | - value -= temp; | |
1651 | + value *= pow(2.0*orbitalExponentA, 2.0*na+1.0); | |
1652 | + value /= Factorial(2*na); | |
1653 | + } | |
1654 | + else{ | |
1655 | + // (B.62) | |
1656 | + value = Factorial(2*na-1); | |
1657 | + value /= pow(2.0*orbitalExponentA, 2.0*na); | |
1658 | + | |
1659 | + for(int l=1; l<=2*nb; l++){ | |
1660 | + temp = l; | |
1661 | + temp *= pow(2.0*orbitalExponentB, 2*nb-l); | |
1662 | + temp *= Factorial(2*na+2*nb-l-1); | |
1663 | + temp /= Factorial(2*nb-l); | |
1664 | + temp /= 2.0*nb; | |
1665 | + temp /= pow( 2.0*orbitalExponentA + 2.0*orbitalExponentB, 2.0*(na+nb)-l ); | |
1666 | + value -= temp; | |
1667 | + } | |
1668 | + value *= pow(2.0*orbitalExponentA, 2.0*na+1); | |
1669 | + value /= Factorial(2*na); | |
1650 | 1670 | } |
1651 | - value *= pow(2.0*orbitalExponentA, 2.0*na+1); | |
1652 | - value /= Factorial(2*na); | |
1671 | + gammaAB[A][B] = value; | |
1653 | 1672 | } |
1654 | - gammaAB[A][B] = value; | |
1655 | - } | |
1656 | - catch(MolDSException ex){ | |
1657 | - #pragma omp critical | |
1658 | - ex.Serialize(ompErrors); | |
1673 | + catch(MolDSException ex){ | |
1674 | + #pragma omp critical | |
1675 | + ex.Serialize(ompErrors); | |
1676 | + } | |
1677 | + } // end of loop B parallelized by openMP | |
1678 | + // Exception throwing for omp-region | |
1679 | + if(!ompErrors.str().empty()){ | |
1680 | + throw MolDSException::Deserialize(ompErrors); | |
1659 | 1681 | } |
1660 | - } // end of loop B parallelized by openMP | |
1661 | - // Exception throwing for omp-region | |
1662 | - if(!ompErrors.str().empty()){ | |
1663 | - throw MolDSException::Deserialize(ompErrors); | |
1682 | + } // end of if(mpiRank==calcRank) | |
1683 | + | |
1684 | + // set data to gater in mpiHeadRank with asynchronous MPI | |
1685 | + int tag = A; | |
1686 | + int source = calcRank; | |
1687 | + int dest = mpiHeadRank; | |
1688 | + if(mpiRank == mpiHeadRank && mpiRank != calcRank){ | |
1689 | + asyncCommunicator.SetRecvedVector(&gammaAB[A][A], | |
1690 | + totalAtomNumber-A, | |
1691 | + source, | |
1692 | + tag); | |
1693 | + } | |
1694 | + if(mpiRank != mpiHeadRank && mpiRank == calcRank){ | |
1695 | + asyncCommunicator.SetSentVector(&gammaAB[A][A], | |
1696 | + totalAtomNumber-A, | |
1697 | + dest, | |
1698 | + tag); | |
1664 | 1699 | } |
1665 | 1700 | } // end of loop A prallelized by MPI |
1666 | - | |
1667 | - // communication to collect all matrix data on head-rank | |
1668 | - int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank(); | |
1669 | - if(mpiRank == mpiHeadRank){ | |
1670 | - // receive the matrix data from other ranks | |
1671 | - for(int A=0; A<totalAtomNumber; A++){ | |
1672 | - if(A%mpiSize == mpiHeadRank){continue;} | |
1673 | - int source = A%mpiSize; | |
1674 | - int tag = A; | |
1675 | - MolDS_mpi::MpiProcess::GetInstance()->Recv(source, tag, &gammaAB[A][A], totalAtomNumber-A); | |
1676 | - } | |
1677 | - } | |
1678 | - else{ | |
1679 | - // send the matrix data to head-rank | |
1680 | - for(int A=0; A<totalAtomNumber; A++){ | |
1681 | - if(A%mpiSize != mpiRank){continue;} | |
1682 | - int dest = mpiHeadRank; | |
1683 | - int tag = A; | |
1684 | - MolDS_mpi::MpiProcess::GetInstance()->Send(dest, tag, &gammaAB[A][A], totalAtomNumber-A); | |
1685 | - } | |
1686 | - } | |
1687 | - // broadcast all matrix data to all rank | |
1688 | - int root=mpiHeadRank; | |
1689 | - MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&gammaAB[0][0], totalAtomNumber*totalAtomNumber, root); | |
1690 | - | |
1701 | + communicationThread.join(); | |
1702 | + MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&gammaAB[0][0], totalAtomNumber*totalAtomNumber, mpiHeadRank); | |
1691 | 1703 | |
1692 | 1704 | #pragma omp parallel for schedule(auto) |
1693 | 1705 | for(int A=0; A<totalAtomNumber; A++){ |
@@ -1786,44 +1798,97 @@ void Cndo2::CalcElectronicTransitionDipoleMoment(double* transitionDipoleMoment, | ||
1786 | 1798 | void Cndo2::CalcCartesianMatrixByGTOExpansion(double*** cartesianMatrix, |
1787 | 1799 | const Molecule& molecule, |
1788 | 1800 | STOnGType stonG) const{ |
1789 | - int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank(); | |
1790 | - int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize(); | |
1791 | 1801 | int totalAONumber = molecule.GetTotalNumberAOs(); |
1792 | 1802 | int totalAtomNumber = molecule.GetNumberAtoms(); |
1793 | 1803 | |
1804 | + // MPI setting of each rank | |
1805 | + int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank(); | |
1806 | + int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize(); | |
1807 | + int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank(); | |
1808 | + int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(totalAtomNumber); | |
1809 | + mPassingTimes *= CartesianType_end; | |
1810 | + MolDS_mpi::AsyncCommunicator asyncCommunicator; | |
1811 | + boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>, | |
1812 | + &asyncCommunicator, | |
1813 | + mPassingTimes) ); | |
1814 | + | |
1794 | 1815 | // This loop (A and mu) is parallelized by MPI |
1795 | 1816 | for(int A=0; A<totalAtomNumber; A++){ |
1796 | - const Atom& atomA = *molecule.GetAtom(A); | |
1797 | - int firstAOIndexAtomA = atomA.GetFirstAOIndex(); | |
1798 | - for(int a=0; a<atomA.GetValenceSize(); a++){ | |
1799 | - int mu = firstAOIndexAtomA + a; | |
1800 | - if(mu%mpiSize != mpiRank){continue;} | |
1801 | - stringstream ompErrors; | |
1802 | - #pragma omp parallel for schedule(auto) | |
1803 | - for(int B=0; B<totalAtomNumber; B++){ | |
1804 | - try{ | |
1805 | - const Atom& atomB = *molecule.GetAtom(B); | |
1806 | - int firstAOIndexAtomB = atomB.GetFirstAOIndex(); | |
1807 | - for(int b=0; b<atomB.GetValenceSize(); b++){ | |
1808 | - int nu = firstAOIndexAtomB + b; | |
1809 | - this->CalcCartesianMatrixElementsByGTOExpansion(cartesianMatrix[XAxis][mu][nu], | |
1810 | - cartesianMatrix[YAxis][mu][nu], | |
1811 | - cartesianMatrix[ZAxis][mu][nu], | |
1812 | - atomA, a, atomB, b, stonG); | |
1817 | + const Atom& atomA = *molecule.GetAtom(A); | |
1818 | + int firstAOIndexA = atomA.GetFirstAOIndex(); | |
1819 | + int numValenceAOsA = atomA.GetValenceSize(); | |
1820 | + int calcRank = A%mpiSize; | |
1821 | + if(mpiRank == calcRank){ | |
1822 | + for(int a=0; a<numValenceAOsA; a++){ | |
1823 | + int mu = firstAOIndexA + a; | |
1824 | + int maxThreads = omp_get_max_threads(); | |
1825 | + stringstream ompErrors; | |
1826 | +#pragma omp parallel for schedule(auto) num_threads(maxThreads-1) | |
1827 | + for(int B=0; B<totalAtomNumber; B++){ | |
1828 | + try{ | |
1829 | + const Atom& atomB = *molecule.GetAtom(B); | |
1830 | + int firstAOIndexB = atomB.GetFirstAOIndex(); | |
1831 | + int numValenceAOsB = atomB.GetValenceSize(); | |
1832 | + for(int b=0; b<numValenceAOsB; b++){ | |
1833 | + int nu = firstAOIndexB + b; | |
1834 | + this->CalcCartesianMatrixElementsByGTOExpansion(cartesianMatrix[XAxis][mu][nu], | |
1835 | + cartesianMatrix[YAxis][mu][nu], | |
1836 | + cartesianMatrix[ZAxis][mu][nu], | |
1837 | + atomA, a, atomB, b, stonG); | |
1838 | + } | |
1813 | 1839 | } |
1840 | + catch(MolDSException ex){ | |
1841 | +#pragma omp critical | |
1842 | + ex.Serialize(ompErrors); | |
1843 | + } | |
1844 | + }// end of loop for int B with openMP | |
1845 | + // Exception throwing for omp-region | |
1846 | + if(!ompErrors.str().empty()){ | |
1847 | + throw MolDSException::Deserialize(ompErrors); | |
1814 | 1848 | } |
1815 | - catch(MolDSException ex){ | |
1816 | - #pragma omp critical | |
1817 | - ex.Serialize(ompErrors); | |
1818 | - } | |
1819 | - }// end of loop for int B with openMP | |
1820 | - // Exception throwing for omp-region | |
1821 | - if(!ompErrors.str().empty()){ | |
1822 | - throw MolDSException::Deserialize(ompErrors); | |
1823 | - } | |
1824 | - } | |
1825 | - } // end of loop for int A with openMP | |
1826 | - | |
1849 | + } | |
1850 | + } // end lof if(mpiRank == calcRank) | |
1851 | + | |
1852 | + // set data to gater in mpiHeadRank with asynchronous MPI | |
1853 | + int tagX = A* CartesianType_end + XAxis; | |
1854 | + int tagY = A* CartesianType_end + YAxis; | |
1855 | + int tagZ = A* CartesianType_end + ZAxis; | |
1856 | + int source = calcRank; | |
1857 | + int dest = mpiHeadRank; | |
1858 | + if(mpiRank == mpiHeadRank && mpiRank != calcRank){ | |
1859 | + asyncCommunicator.SetRecvedVector(&cartesianMatrix[XAxis][firstAOIndexA][0], | |
1860 | + numValenceAOsA*totalAONumber, | |
1861 | + source, | |
1862 | + tagX); | |
1863 | + asyncCommunicator.SetRecvedVector(&cartesianMatrix[YAxis][firstAOIndexA][0], | |
1864 | + numValenceAOsA*totalAONumber, | |
1865 | + source, | |
1866 | + tagY); | |
1867 | + asyncCommunicator.SetRecvedVector(&cartesianMatrix[ZAxis][firstAOIndexA][0], | |
1868 | + numValenceAOsA*totalAONumber, | |
1869 | + source, | |
1870 | + tagZ); | |
1871 | + } | |
1872 | + if(mpiRank != mpiHeadRank && mpiRank == calcRank){ | |
1873 | + asyncCommunicator.SetSentVector(&cartesianMatrix[XAxis][firstAOIndexA][0], | |
1874 | + numValenceAOsA*totalAONumber, | |
1875 | + dest, | |
1876 | + tagX); | |
1877 | + asyncCommunicator.SetSentVector(&cartesianMatrix[YAxis][firstAOIndexA][0], | |
1878 | + numValenceAOsA*totalAONumber, | |
1879 | + dest, | |
1880 | + tagY); | |
1881 | + asyncCommunicator.SetSentVector(&cartesianMatrix[ZAxis][firstAOIndexA][0], | |
1882 | + numValenceAOsA*totalAONumber, | |
1883 | + dest, | |
1884 | + tagZ); | |
1885 | + } | |
1886 | + } // end of loop for int A with MPI | |
1887 | + // Delete the communication thread. | |
1888 | + communicationThread.join(); | |
1889 | + MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&cartesianMatrix[0][0][0], CartesianType_end*totalAONumber*totalAONumber, mpiHeadRank); | |
1890 | + | |
1891 | +/* | |
1827 | 1892 | // communication to collect all matrix data on head-rank |
1828 | 1893 | int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank(); |
1829 | 1894 | if(mpiRank == mpiHeadRank){ |
@@ -1857,6 +1922,7 @@ void Cndo2::CalcCartesianMatrixByGTOExpansion(double*** cartesianMatrix, | ||
1857 | 1922 | // broadcast all matrix data to all rank |
1858 | 1923 | int root=mpiHeadRank; |
1859 | 1924 | MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&cartesianMatrix[0][0][0], CartesianType_end*totalAONumber*totalAONumber, root); |
1925 | + */ | |
1860 | 1926 | } |
1861 | 1927 | |
1862 | 1928 | // Calculate elements of Cartesian matrix between atomic orbitals. |
@@ -3817,10 +3883,19 @@ void Cndo2::CalcOverlapESsWithAnotherElectronicStructure(double** overlapESs, | ||
3817 | 3883 | |
3818 | 3884 | // calculate OverlapAOs matrix. E.g. S_{\mu\nu} in (3.74) in J. A. Pople book. |
3819 | 3885 | void Cndo2::CalcOverlapAOs(double** overlapAOs, const Molecule& molecule) const{ |
3820 | - int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank(); | |
3821 | - int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize(); | |
3822 | 3886 | int totalAONumber = molecule.GetTotalNumberAOs(); |
3823 | 3887 | int totalAtomNumber = molecule.GetNumberAtoms(); |
3888 | + | |
3889 | + // MPI setting of each rank | |
3890 | + int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank(); | |
3891 | + int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize(); | |
3892 | + int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank(); | |
3893 | + int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(totalAtomNumber); | |
3894 | + MolDS_mpi::AsyncCommunicator asyncCommunicator; | |
3895 | + boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>, | |
3896 | + &asyncCommunicator, | |
3897 | + mPassingTimes) ); | |
3898 | + | |
3824 | 3899 | MallocerFreer::GetInstance()->Initialize<double>(overlapAOs, |
3825 | 3900 | totalAONumber, |
3826 | 3901 | totalAONumber); |
@@ -3828,51 +3903,73 @@ void Cndo2::CalcOverlapAOs(double** overlapAOs, const Molecule& molecule) const{ | ||
3828 | 3903 | // This loop A is parallelized with MPI |
3829 | 3904 | for(int A=0; A<totalAtomNumber; A++){ |
3830 | 3905 | const Atom& atomA = *molecule.GetAtom(A); |
3831 | - if(A%mpiSize != mpiRank){continue;} | |
3832 | - | |
3833 | - stringstream ompErrors; | |
3834 | -#pragma omp parallel | |
3835 | - { | |
3836 | - double** diatomicOverlapAOs = NULL; | |
3837 | - double** rotatingMatrix = NULL; | |
3838 | - try{ | |
3839 | - // malloc | |
3840 | - MallocerFreer::GetInstance()->Malloc<double>(&diatomicOverlapAOs, | |
3841 | - OrbitalType_end, | |
3842 | - OrbitalType_end); | |
3843 | - MallocerFreer::GetInstance()->Malloc<double>(&rotatingMatrix, | |
3844 | - OrbitalType_end, | |
3845 | - OrbitalType_end); | |
3846 | - | |
3906 | + int firstAOIndexA = atomA.GetFirstAOIndex(); | |
3907 | + int numValenceAOs = atomA.GetValenceSize(); | |
3908 | + int calcRank = A%mpiSize; | |
3909 | + if(mpiRank == calcRank){ | |
3910 | + int maxThreads = omp_get_max_threads(); | |
3911 | + stringstream ompErrors; | |
3912 | +#pragma omp parallel num_threads(maxThreads-1) | |
3913 | + { | |
3914 | + double** diatomicOverlapAOs = NULL; | |
3915 | + double** rotatingMatrix = NULL; | |
3916 | + try{ | |
3917 | + // malloc | |
3918 | + MallocerFreer::GetInstance()->Malloc<double>(&diatomicOverlapAOs, | |
3919 | + OrbitalType_end, | |
3920 | + OrbitalType_end); | |
3921 | + MallocerFreer::GetInstance()->Malloc<double>(&rotatingMatrix, | |
3922 | + OrbitalType_end, | |
3923 | + OrbitalType_end); | |
3924 | + bool symmetrize = false; | |
3847 | 3925 | #pragma omp for schedule(auto) |
3848 | - for(int B=A+1; B<totalAtomNumber; B++){ | |
3849 | - const Atom& atomB = *molecule.GetAtom(B); | |
3850 | - this->CalcDiatomicOverlapAOsInDiatomicFrame(diatomicOverlapAOs, atomA, atomB); | |
3851 | - this->CalcRotatingMatrix(rotatingMatrix, atomA, atomB); | |
3852 | - this->RotateDiatmicOverlapAOsToSpaceFrame(diatomicOverlapAOs, rotatingMatrix); | |
3853 | - this->SetOverlapAOsElement(overlapAOs, diatomicOverlapAOs, atomA, atomB); | |
3854 | - } // end of loop B parallelized with openMP | |
3926 | + for(int B=A+1; B<totalAtomNumber; B++){ | |
3927 | + const Atom& atomB = *molecule.GetAtom(B); | |
3928 | + this->CalcDiatomicOverlapAOsInDiatomicFrame(diatomicOverlapAOs, atomA, atomB); | |
3929 | + this->CalcRotatingMatrix(rotatingMatrix, atomA, atomB); | |
3930 | + this->RotateDiatmicOverlapAOsToSpaceFrame(diatomicOverlapAOs, rotatingMatrix); | |
3931 | + this->SetOverlapAOsElement(overlapAOs, diatomicOverlapAOs, atomA, atomB, symmetrize); | |
3932 | + } // end of loop B parallelized with openMP | |
3855 | 3933 | |
3856 | - } // end of try | |
3857 | - catch(MolDSException ex){ | |
3934 | + } // end of try | |
3935 | + catch(MolDSException ex){ | |
3858 | 3936 | #pragma omp critical |
3859 | - ex.Serialize(ompErrors); | |
3937 | + ex.Serialize(ompErrors); | |
3938 | + } | |
3939 | + this->FreeDiatomicOverlapAOsAndRotatingMatrix(&diatomicOverlapAOs, &rotatingMatrix); | |
3940 | + } // end of omp-parallelized region | |
3941 | + // Exception throwing for omp-region | |
3942 | + if(!ompErrors.str().empty()){ | |
3943 | + throw MolDSException::Deserialize(ompErrors); | |
3860 | 3944 | } |
3861 | - this->FreeDiatomicOverlapAOsAndRotatingMatrix(&diatomicOverlapAOs, &rotatingMatrix); | |
3862 | - } // end of omp-parallelized region | |
3863 | - // Exception throwing for omp-region | |
3864 | - if(!ompErrors.str().empty()){ | |
3865 | - throw MolDSException::Deserialize(ompErrors); | |
3945 | + } // end of if(mpiRank == calcRnak) | |
3946 | + | |
3947 | + // set data to gater in mpiHeadRank with asynchronous MPI | |
3948 | + int tag = A; | |
3949 | + int source = calcRank; | |
3950 | + int dest = mpiHeadRank; | |
3951 | + if(mpiRank == mpiHeadRank && mpiRank != calcRank){ | |
3952 | + asyncCommunicator.SetRecvedVector(overlapAOs[firstAOIndexA], | |
3953 | + totalAONumber*numValenceAOs, | |
3954 | + source, | |
3955 | + tag); | |
3956 | + } | |
3957 | + if(mpiRank != mpiHeadRank && mpiRank == calcRank){ | |
3958 | + asyncCommunicator.SetSentVector(overlapAOs[firstAOIndexA], | |
3959 | + totalAONumber*numValenceAOs, | |
3960 | + dest, | |
3961 | + tag); | |
3866 | 3962 | } |
3867 | 3963 | } // end of loop A parallelized with MPI |
3868 | - | |
3869 | - // communication to reduce thsi->matrixForce on all node (namely, all_reduce) | |
3870 | - int numTransported = totalAONumber*totalAONumber; | |
3871 | - MolDS_mpi::MpiProcess::GetInstance()->AllReduce(&overlapAOs[0][0], numTransported, std::plus<double>()); | |
3964 | + communicationThread.join(); | |
3965 | + MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&overlapAOs[0][0], totalAONumber*totalAONumber, mpiHeadRank); | |
3872 | 3966 | |
3873 | 3967 | #pragma omp parallel for schedule(auto) |
3874 | 3968 | for(int mu=0; mu<totalAONumber; mu++){ |
3875 | 3969 | overlapAOs[mu][mu] = 1.0; |
3970 | + for(int nu=mu+1; nu<totalAONumber; nu++){ | |
3971 | + overlapAOs[nu][mu] = overlapAOs[mu][nu]; | |
3972 | + } | |
3876 | 3973 | } |
3877 | 3974 | |
3878 | 3975 | /* |
@@ -31,7 +31,9 @@ | ||
31 | 31 | #include"../base/PrintController.h" |
32 | 32 | #include"../base/MolDSException.h" |
33 | 33 | #include"../base/MallocerFreer.h" |
34 | +#include"../base/containers/ThreadSafeQueue.h" | |
34 | 35 | #include"../mpi/MpiProcess.h" |
36 | +#include"../mpi/AsyncCommunicator.h" | |
35 | 37 | #include"../wrappers/Blas.h" |
36 | 38 | #include"../wrappers/Lapack.h" |
37 | 39 | #include"../base/MallocerFreer.h" |
@@ -739,8 +741,9 @@ void Mndo::CalcCISMatrix(double** matrixCIS) const{ | ||
739 | 741 | // single excitation from I-th (occupied)MO to A-th (virtual)MO |
740 | 742 | int moI = this->GetActiveOccIndex(*this->molecule, k); |
741 | 743 | int moA = this->GetActiveVirIndex(*this->molecule, k); |
744 | + int maxThreads = omp_get_max_threads(); | |
742 | 745 | stringstream ompErrors; |
743 | -#pragma omp parallel for schedule(auto) | |
746 | +#pragma omp parallel for schedule(auto) num_threads(maxThreads-1) | |
744 | 747 | for(int l=k; l<this->matrixCISdimension; l++){ |
745 | 748 | try{ |
746 | 749 | // single excitation from J-th (occupied)MO to B-th (virtual)MO |
@@ -2552,8 +2555,9 @@ void Mndo::CalcForce(const vector<int>& elecStates){ | ||
2552 | 2555 | const Atom& atomA = *molecule->GetAtom(a); |
2553 | 2556 | int firstAOIndexA = atomA.GetFirstAOIndex(); |
2554 | 2557 | int lastAOIndexA = atomA.GetLastAOIndex(); |
2558 | + int maxThreads = omp_get_max_threads(); | |
2555 | 2559 | stringstream ompErrors; |
2556 | -#pragma omp parallel | |
2560 | +#pragma omp parallel num_threads(maxThreads-1) | |
2557 | 2561 | { |
2558 | 2562 | double*** diatomicOverlapAOs1stDerivs = NULL; |
2559 | 2563 | double***** diatomicTwoElecTwoCore1stDerivs = NULL; |
@@ -3416,74 +3420,113 @@ double Mndo::GetAuxiliaryKNRKRElement(int moI, int moJ, int moK, int moL) const{ | ||
3416 | 3420 | |
3417 | 3421 | void Mndo::CalcTwoElecTwoCore(double****** twoElecTwoCore, |
3418 | 3422 | const Molecule& molecule) const{ |
3419 | - int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank(); | |
3420 | - int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize(); | |
3421 | - int totalAtomNumber = molecule.GetNumberAtoms(); | |
3422 | - | |
3423 | + int totalNumberAtoms = molecule.GetNumberAtoms(); | |
3424 | + | |
3425 | + // MPI setting of each rank | |
3426 | + int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank(); | |
3427 | + int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize(); | |
3428 | + int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank(); | |
3429 | + int mPassingTimes = MolDS_mpi::MpiProcess::GetInstance()->GetMessagePassingTimes(totalNumberAtoms); | |
3430 | + MolDS_mpi::AsyncCommunicator asyncCommunicator; | |
3431 | + boost::thread communicationThread( boost::bind(&MolDS_mpi::AsyncCommunicator::Run<double>, | |
3432 | + &asyncCommunicator, | |
3433 | + mPassingTimes) ); | |
3423 | 3434 | #ifdef MOLDS_DBG |
3424 | 3435 | if(twoElecTwoCore == NULL){ |
3425 | 3436 | throw MolDSException(this->errorMessageCalcTwoElecTwoCoreNullMatrix); |
3426 | 3437 | } |
3427 | 3438 | #endif |
3428 | 3439 | MallocerFreer::GetInstance()->Initialize<double>(twoElecTwoCore, |
3429 | - totalAtomNumber, | |
3430 | - totalAtomNumber, | |
3440 | + totalNumberAtoms, | |
3441 | + totalNumberAtoms, | |
3431 | 3442 | dxy, dxy, dxy, dxy); |
3432 | 3443 | |
3433 | - | |
3434 | 3444 | // this loop-a is MPI-parallelized |
3435 | - for(int a=0; a<totalAtomNumber; a++){ | |
3436 | - if(a%mpiSize != mpiRank){continue;} | |
3437 | - stringstream ompErrors; | |
3438 | -#pragma omp parallel | |
3439 | - { | |
3440 | - double**** diatomicTwoElecTwoCore = NULL; | |
3441 | - double** tmpRotMat = NULL; | |
3442 | - try{ | |
3443 | - MallocerFreer::GetInstance()->Malloc<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy); | |
3444 | - MallocerFreer::GetInstance()->Malloc<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end); | |
3445 | - // note that terms with condition a==b are not needed to calculate. | |
3445 | + for(int a=0; a<totalNumberAtoms; a++){ | |
3446 | + int calcRank = a%mpiSize; | |
3447 | + if(mpiRank == calcRank){ | |
3448 | + int maxThreads = omp_get_max_threads(); | |
3449 | + stringstream ompErrors; | |
3450 | +#pragma omp parallel num_threads(maxThreads-1) | |
3451 | + { | |
3452 | + double**** diatomicTwoElecTwoCore = NULL; | |
3453 | + double** tmpRotMat = NULL; | |
3454 | + try{ | |
3455 | + MallocerFreer::GetInstance()->Malloc<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy); | |
3456 | + MallocerFreer::GetInstance()->Malloc<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end); | |
3457 | + // note that terms with condition a==b are not needed to calculate. | |
3446 | 3458 | #pragma omp for schedule(auto) |
3447 | - for(int b=a+1; b<totalAtomNumber; b++){ | |
3448 | - this->CalcDiatomicTwoElecTwoCore(diatomicTwoElecTwoCore, tmpRotMat, a, b); | |
3449 | - | |
3450 | - for(int mu=0; mu<dxy; mu++){ | |
3451 | - for(int nu=mu; nu<dxy; nu++){ | |
3452 | - for(int lambda=0; lambda<dxy; lambda++){ | |
3453 | - for(int sigma=lambda; sigma<dxy; sigma++){ | |
3454 | - double value = diatomicTwoElecTwoCore[mu][nu][lambda][sigma]; | |
3455 | - twoElecTwoCore[a][b][mu][nu][lambda][sigma] = value; | |
3456 | - twoElecTwoCore[a][b][mu][nu][sigma][lambda] = value; | |
3457 | - twoElecTwoCore[a][b][nu][mu][lambda][sigma] = value; | |
3458 | - twoElecTwoCore[a][b][nu][mu][sigma][lambda] = value; | |
3459 | - twoElecTwoCore[b][a][lambda][sigma][mu][nu] = value; | |
3460 | - twoElecTwoCore[b][a][lambda][sigma][nu][mu] = value; | |
3461 | - twoElecTwoCore[b][a][sigma][lambda][mu][nu] = value; | |
3462 | - twoElecTwoCore[b][a][sigma][lambda][nu][mu] = value; | |
3459 | + for(int b=a+1; b<totalNumberAtoms; b++){ | |
3460 | + this->CalcDiatomicTwoElecTwoCore(diatomicTwoElecTwoCore, tmpRotMat, a, b); | |
3461 | + | |
3462 | + for(int mu=0; mu<dxy; mu++){ | |
3463 | + for(int nu=mu; nu<dxy; nu++){ | |
3464 | + for(int lambda=0; lambda<dxy; lambda++){ | |
3465 | + for(int sigma=lambda; sigma<dxy; sigma++){ | |
3466 | + double value = diatomicTwoElecTwoCore[mu][nu][lambda][sigma]; | |
3467 | + twoElecTwoCore[a][b][mu][nu][lambda][sigma] = value; | |
3468 | + twoElecTwoCore[a][b][mu][nu][sigma][lambda] = value; | |
3469 | + twoElecTwoCore[a][b][nu][mu][lambda][sigma] = value; | |
3470 | + twoElecTwoCore[a][b][nu][mu][sigma][lambda] = value; | |
3471 | + } | |
3463 | 3472 | } |
3464 | 3473 | } |
3465 | 3474 | } |
3466 | - } | |
3467 | 3475 | |
3468 | - } // end of loop b parallelized with MPI | |
3476 | + } // end of loop b parallelized with MPI | |
3469 | 3477 | |
3470 | - } // end of try | |
3471 | - catch(MolDSException ex){ | |
3478 | + } // end of try | |
3479 | + catch(MolDSException ex){ | |
3472 | 3480 | #pragma omp critical |
3473 | - ex.Serialize(ompErrors); | |
3481 | + ex.Serialize(ompErrors); | |
3482 | + } | |
3483 | + MallocerFreer::GetInstance()->Free<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy); | |
3484 | + MallocerFreer::GetInstance()->Free<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end); | |
3485 | + } // end of omp-parallelized region | |
3486 | + // Exception throwing for omp-region | |
3487 | + if(!ompErrors.str().empty()){ | |
3488 | + throw MolDSException::Deserialize(ompErrors); | |
3474 | 3489 | } |
3475 | - MallocerFreer::GetInstance()->Free<double>(&diatomicTwoElecTwoCore, dxy, dxy, dxy, dxy); | |
3476 | - MallocerFreer::GetInstance()->Free<double>(&tmpRotMat, OrbitalType_end, OrbitalType_end); | |
3477 | - } // end of omp-parallelized region | |
3478 | - // Exception throwing for omp-region | |
3479 | - if(!ompErrors.str().empty()){ | |
3480 | - throw MolDSException::Deserialize(ompErrors); | |
3490 | + } // end of if(mpiRnak == calcRank) | |
3491 | + // set data to gater in mpiHeadRank with asynchronous MPI | |
3492 | + int tag = a; | |
3493 | + int source = calcRank; | |
3494 | + int dest = mpiHeadRank; | |
3495 | + int numTransported = totalNumberAtoms*dxy*dxy*dxy*dxy; | |
3496 | + if(mpiRank == mpiHeadRank && mpiRank != calcRank){ | |
3497 | + asyncCommunicator.SetRecvedVector(&twoElecTwoCore[a][0][0][0][0][0], | |
3498 | + numTransported, | |
3499 | + source, | |
3500 | + tag); | |
3501 | + } | |
3502 | + if(mpiRank != mpiHeadRank && mpiRank == calcRank){ | |
3503 | + asyncCommunicator.SetSentVector(&twoElecTwoCore[a][0][0][0][0][0], | |
3504 | + numTransported, | |
3505 | + dest, | |
3506 | + tag); | |
3481 | 3507 | } |
3482 | 3508 | } // end of loop a parallelized with MPI |
3509 | + communicationThread.join(); | |
3510 | + int numTransported = totalNumberAtoms*totalNumberAtoms*dxy*dxy*dxy*dxy; | |
3511 | + MolDS_mpi::MpiProcess::GetInstance()->Broadcast(&twoElecTwoCore[0][0][0][0][0][0], numTransported, mpiHeadRank); | |
3483 | 3512 | |
3484 | - // communication to reduce thsi->matrixForce on all node (namely, all_reduce) | |
3485 | - int numTransported = totalAtomNumber*totalAtomNumber*dxy*dxy*dxy*dxy; | |
3486 | - MolDS_mpi::MpiProcess::GetInstance()->AllReduce(&twoElecTwoCore[0][0][0][0][0][0], numTransported, std::plus<double>()); | |
3513 | + for(int a=0; a<totalNumberAtoms; a++){ | |
3514 | + for(int b=a+1; b<totalNumberAtoms; b++){ | |
3515 | + for(int mu=0; mu<dxy; mu++){ | |
3516 | + for(int nu=mu; nu<dxy; nu++){ | |
3517 | + for(int lambda=0; lambda<dxy; lambda++){ | |
3518 | + for(int sigma=lambda; sigma<dxy; sigma++){ | |
3519 | + double value = twoElecTwoCore[a][b][mu][nu][lambda][sigma]; | |
3520 | + twoElecTwoCore[b][a][lambda][sigma][mu][nu] = value; | |
3521 | + twoElecTwoCore[b][a][lambda][sigma][nu][mu] = value; | |
3522 | + twoElecTwoCore[b][a][sigma][lambda][mu][nu] = value; | |
3523 | + twoElecTwoCore[b][a][sigma][lambda][nu][mu] = value; | |
3524 | + } | |
3525 | + } | |
3526 | + } | |
3527 | + } | |
3528 | + } | |
3529 | + } | |
3487 | 3530 | } |
3488 | 3531 | |
3489 | 3532 | // Calculation of two electrons two cores integral (mu, nu | lambda, sigma) in space fixed frame, |
@@ -0,0 +1,43 @@ | ||
1 | +//************************************************************************// | |
2 | +// Copyright (C) 2011-2012 Mikiya Fujii // | |
3 | +// // | |
4 | +// This file is part of MolDS. // | |
5 | +// // | |
6 | +// MolDS is free software: you can redistribute it and/or modify // | |
7 | +// it under the terms of the GNU General Public License as published by // | |
8 | +// the Free Software Foundation, either version 3 of the License, or // | |
9 | +// (at your option) any later version. // | |
10 | +// // | |
11 | +// MolDS is distributed in the hope that it will be useful, // | |
12 | +// but WITHOUT ANY WARRANTY; without even the implied warranty of // | |
13 | +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // | |
14 | +// GNU General Public License for more details. // | |
15 | +// // | |
16 | +// You should have received a copy of the GNU General Public License // | |
17 | +// along with MolDS. If not, see <http://www.gnu.org/licenses/>. // | |
18 | +//************************************************************************// | |
19 | +#include<stdio.h> | |
20 | +#include<stdlib.h> | |
21 | +#include<iostream> | |
22 | +#include<sstream> | |
23 | +#include<math.h> | |
24 | +#include<string> | |
25 | +#include<stdexcept> | |
26 | +#include<boost/format.hpp> | |
27 | +#include"../base/Enums.h" | |
28 | +#include"../base/Uncopyable.h" | |
29 | +#include"../base/PrintController.h" | |
30 | +#include"../base/MolDSException.h" | |
31 | +#include"../base/containers/ThreadSafeQueue.h" | |
32 | +#include"../base/MallocerFreer.h" | |
33 | +#include"MpiProcess.h" | |
34 | +#include"AsyncCommunicator.h" | |
35 | +using namespace std; | |
36 | +namespace MolDS_mpi{ | |
37 | +AsyncCommunicator::AsyncCommunicator(){} | |
38 | +AsyncCommunicator::~AsyncCommunicator(){} | |
39 | +} | |
40 | + | |
41 | + | |
42 | + | |
43 | + |
@@ -0,0 +1,127 @@ | ||
1 | +//************************************************************************// | |
2 | +// Copyright (C) 2011-2013 Mikiya Fujii // | |
3 | +// // | |
4 | +// This file is part of MolDS. // | |
5 | +// // | |
6 | +// MolDS is free software: you can redistribute it and/or modify // | |
7 | +// it under the terms of the GNU General Public License as published by // | |
8 | +// the Free Software Foundation, either version 3 of the License, or // | |
9 | +// (at your option) any later version. // | |
10 | +// // | |
11 | +// MolDS is distributed in the hope that it will be useful, // | |
12 | +// but WITHOUT ANY WARRANTY; without even the implied warranty of // | |
13 | +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // | |
14 | +// GNU General Public License for more details. // | |
15 | +// // | |
16 | +// You should have received a copy of the GNU General Public License // | |
17 | +// along with MolDS. If not, see <http://www.gnu.org/licenses/>. // | |
18 | +//************************************************************************// | |
19 | +#ifndef INCLUDED_ASYNCCOMMUNICATOR | |
20 | +#define INCLUDED_ASYNCCOMMUNICATOR | |
21 | +#include<boost/thread.hpp> | |
22 | +#include<boost/thread/condition.hpp> | |
23 | +#include<boost/bind.hpp> | |
24 | +#define NON_USED 0 | |
25 | +namespace MolDS_mpi{ | |
26 | + | |
27 | +class AsyncCommunicator{ | |
28 | +public: | |
29 | + AsyncCommunicator(); | |
30 | + ~AsyncCommunicator(); | |
31 | + template<typename T> void Run(int passingTimes){ | |
32 | + int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank(); | |
33 | + while(0<passingTimes){ | |
34 | + sleep(0.1); | |
35 | + boost::mutex::scoped_lock lk(this->stateGuard); | |
36 | + try{ | |
37 | + DataInfo dInfo = this->dataQueue.FrontPop(); | |
38 | + if(dInfo.mpiFuncType == MolDS_base::Send){ | |
39 | + MolDS_mpi::MpiProcess::GetInstance()->Send(dInfo.dest, | |
40 | + dInfo.tag, | |
41 | + reinterpret_cast<T*>(dInfo.vectorPtr), | |
42 | + dInfo.num); | |
43 | + } | |
44 | + else if(dInfo.mpiFuncType == MolDS_base::Recv){ | |
45 | + MolDS_mpi::MpiProcess::GetInstance()->Recv(dInfo.source, | |
46 | + dInfo.tag, | |
47 | + reinterpret_cast<T*>(dInfo.vectorPtr), | |
48 | + dInfo.num); | |
49 | + } | |
50 | + else if(dInfo.mpiFuncType == MolDS_base::Broadcast){ | |
51 | + MolDS_mpi::MpiProcess::GetInstance()->Broadcast(reinterpret_cast<T*>(dInfo.vectorPtr), | |
52 | + dInfo.num, | |
53 | + dInfo.source); | |
54 | + } | |
55 | + else{ | |
56 | + std::stringstream ss; | |
57 | + ss << "non valid mpi function type\n"; | |
58 | + MolDS_base::MolDSException ex(ss.str()); | |
59 | + throw ex; | |
60 | + } | |
61 | + this->stateChange.notify_all(); | |
62 | + passingTimes--; | |
63 | + } | |
64 | + catch(MolDS_base::MolDSException ex){ | |
65 | + if(ex.HasKey(MolDS_base::EmptyQueue)){ | |
66 | + this->stateChange.wait(lk); | |
67 | + continue; | |
68 | + } | |
69 | + else{ | |
70 | + throw ex; | |
71 | + } | |
72 | + } | |
73 | + } | |
74 | + } | |
75 | + | |
76 | + template<typename T> void SetSentVector(T* vector, | |
77 | + intptr_t num, | |
78 | + int dest, | |
79 | + int tag){ | |
80 | + int source = NON_USED; | |
81 | + MolDS_base::MpiFunctionType mpiFuncType = MolDS_base::Send; | |
82 | + this->SetVector(vector, num, source, dest, tag, mpiFuncType); | |
83 | + } | |
84 | + | |
85 | + template<typename T> void SetRecvedVector(T* vector, | |
86 | + intptr_t num, | |
87 | + int source, | |
88 | + int tag){ | |
89 | + int dest = NON_USED; | |
90 | + MolDS_base::MpiFunctionType mpiFuncType = MolDS_base::Recv; | |
91 | + this->SetVector(vector, num, source, dest, tag, mpiFuncType); | |
92 | + } | |
93 | + | |
94 | + template<typename T> void SetBroadcastedVector(T* vector, intptr_t num, int root){ | |
95 | + int source = root; | |
96 | + int dest = NON_USED; | |
97 | + int tag = NON_USED; | |
98 | + MolDS_base::MpiFunctionType mpiFuncType = MolDS_base::Broadcast; | |
99 | + this->SetVector(vector, num, source, dest, tag, mpiFuncType); | |
100 | + } | |
101 | + | |
102 | +private: | |
103 | + struct DataInfo{intptr_t vectorPtr; | |
104 | + intptr_t num; | |
105 | + int source; | |
106 | + int dest; | |
107 | + int tag; | |
108 | + MolDS_base::MpiFunctionType mpiFuncType;}; | |
109 | + boost::mutex stateGuard; | |
110 | + boost::condition stateChange; | |
111 | + MolDS_base_containers::ThreadSafeQueue<DataInfo> dataQueue; | |
112 | + template<typename T> void SetVector(T* vector, | |
113 | + intptr_t num, | |
114 | + int source, | |
115 | + int dest, | |
116 | + int tag, | |
117 | + MolDS_base::MpiFunctionType mpiFuncType){ | |
118 | + boost::mutex::scoped_lock lk(this->stateGuard); | |
119 | + DataInfo dInfo = {reinterpret_cast<intptr_t>(vector), num, source, dest, tag, mpiFuncType}; | |
120 | + this->dataQueue.Push(dInfo); | |
121 | + this->stateChange.notify_all(); | |
122 | + } | |
123 | +}; | |
124 | + | |
125 | +} | |
126 | +#endif | |
127 | + |
@@ -23,6 +23,7 @@ | ||
23 | 23 | #include<math.h> |
24 | 24 | #include<string> |
25 | 25 | #include<stdexcept> |
26 | +#include<omp.h> | |
26 | 27 | #include<boost/format.hpp> |
27 | 28 | #include"../base/Uncopyable.h" |
28 | 29 | #include"../base/PrintController.h" |
@@ -41,9 +42,22 @@ MpiProcess::MpiProcess(int argc, char *argv[]){ | ||
41 | 42 | this->environment = new boost::mpi::environment(argc, argv); |
42 | 43 | this->communicator = new boost::mpi::communicator(); |
43 | 44 | this->messageLimit = INT_MAX; |
45 | + this->mpiConsumingTime=0.0; | |
46 | + this->mpiConsumingTimeSend=0.0; | |
47 | + this->mpiConsumingTimeRecv=0.0; | |
48 | + this->mpiConsumingTimeBrodCast=0.0; | |
49 | + this->mpiConsumingTimeAllReduce=0.0; | |
44 | 50 | } |
45 | 51 | |
46 | 52 | MpiProcess::~MpiProcess(){ |
53 | + /* | |
54 | + int rank = this->GetRank(); | |
55 | + printf("\nrnk:%d mpiconsumingtime = %e [s]\n",rank, this->mpiConsumingTime); | |
56 | + printf("\nrnk:%d mpiconsumingtimeSend = %e [s]\n",rank, this->mpiConsumingTimeSend); | |
57 | + printf("\nrnk:%d mpiconsumingtimeRecv = %e [s]\n",rank, this->mpiConsumingTimeRecv); | |
58 | + printf("\nrnk:%d mpiconsumingtimeBroadcast = %e [s]\n",rank, this->mpiConsumingTimeBrodCast); | |
59 | + printf("\nrnk:%d mpiconsumingtimeAllReduce = %e [s]\n",rank, this->mpiConsumingTimeAllReduce); | |
60 | + */ | |
47 | 61 | delete this->environment; |
48 | 62 | delete this->communicator; |
49 | 63 | } |
@@ -70,6 +84,25 @@ MpiProcess* MpiProcess::GetInstance(){ | ||
70 | 84 | return mpiProcess; |
71 | 85 | } |
72 | 86 | |
87 | +void MpiProcess::Barrier(){this->communicator->barrier();} | |
88 | + | |
89 | +int MpiProcess::GetMessagePassingTimes(intptr_t num)const{ | |
90 | + int mpiRank = MolDS_mpi::MpiProcess::GetInstance()->GetRank(); | |
91 | + int mpiSize = MolDS_mpi::MpiProcess::GetInstance()->GetSize(); | |
92 | + int mpiHeadRank = MolDS_mpi::MpiProcess::GetInstance()->GetHeadRank(); | |
93 | + int calcTimes = num/mpiSize; | |
94 | + if(mpiRank < num%mpiSize){calcTimes+=1;} | |
95 | + int mpiPassingTimes; | |
96 | + if(mpiRank == mpiHeadRank){ | |
97 | + mpiPassingTimes = num - calcTimes; | |
98 | + } | |
99 | + else{ | |
100 | + mpiPassingTimes = calcTimes; | |
101 | + } | |
102 | + return mpiPassingTimes; | |
103 | +} | |
104 | + | |
105 | + | |
73 | 106 | } |
74 | 107 | |
75 | 108 |
@@ -19,6 +19,7 @@ | ||
19 | 19 | #ifndef INCLUDED_MPIPROCESS |
20 | 20 | #define INCLUDED_MPIPROCESS |
21 | 21 | #include<limits.h> |
22 | +#include<omp.h> | |
22 | 23 | #include<boost/mpi.hpp> |
23 | 24 | namespace MolDS_mpi{ |
24 | 25 | // MpiProcess is singleton |
@@ -30,26 +31,47 @@ public: | ||
30 | 31 | int GetHeadRank() const{return 0;} |
31 | 32 | int GetRank() const{return this->communicator->rank();} |
32 | 33 | int GetSize() const{return this->communicator->size();} |
33 | - template<typename T> void Send(int dest, int tag, const T* values, intptr_t num) const{ | |
34 | + //template<typename T> void Send(int dest, int tag, const T* values, intptr_t num) const{ | |
35 | + template<typename T> void Send(int dest, int tag, const T* values, intptr_t num) { | |
36 | + double startTime=0.0; | |
37 | + double endTime=0.0; | |
34 | 38 | std::vector<Chunk> chunks; |
35 | 39 | this->SplitMessage2Chunks(chunks, tag, values, num); |
36 | 40 | for(intptr_t i=0; i<chunks.size(); i++){ |
41 | + startTime = omp_get_wtime(); | |
37 | 42 | this->communicator->send(dest, chunks[i].tag, &values[chunks[i].first], chunks[i].num); |
43 | + endTime = omp_get_wtime(); | |
44 | + this->mpiConsumingTime += endTime - startTime; | |
45 | + this->mpiConsumingTimeSend += endTime - startTime; | |
38 | 46 | } |
39 | 47 | } |
40 | - template<typename T> void Recv(int source, int tag, T* values, intptr_t num) const{ | |
48 | + //template<typename T> void Recv(int source, int tag, T* values, intptr_t num) const{ | |
49 | + template<typename T> void Recv(int source, int tag, T* values, intptr_t num) { | |
50 | + double startTime=0.0; | |
51 | + double endTime=0.0; | |
41 | 52 | std::vector<Chunk> chunks; |
42 | 53 | this->SplitMessage2Chunks(chunks, tag, values, num); |
43 | 54 | for(intptr_t i=0; i<chunks.size(); i++){ |
55 | + startTime = omp_get_wtime(); | |
44 | 56 | this->communicator->recv(source, chunks[i].tag, &values[chunks[i].first], chunks[i].num); |
57 | + endTime = omp_get_wtime(); | |
58 | + this->mpiConsumingTime += endTime - startTime; | |
59 | + this->mpiConsumingTimeRecv += endTime - startTime; | |
45 | 60 | } |
46 | 61 | } |
47 | - template<typename T> void Broadcast(T* values, intptr_t num, int root) const{ | |
62 | + //template<typename T> void Broadcast(T* values, intptr_t num, int root) const{ | |
63 | + template<typename T> void Broadcast(T* values, intptr_t num, int root){ | |
64 | + double startTime=0.0; | |
65 | + double endTime=0.0; | |
48 | 66 | std::vector<Chunk> chunks; |
49 | 67 | intptr_t tag=0; |
50 | 68 | this->SplitMessage2Chunks(chunks, tag, values, num); |
51 | 69 | for(intptr_t i=0; i<chunks.size(); i++){ |
70 | + startTime = omp_get_wtime(); | |
52 | 71 | broadcast(*this->communicator, &values[chunks[i].first], chunks[i].num, root); |
72 | + endTime = omp_get_wtime(); | |
73 | + this->mpiConsumingTime += endTime - startTime; | |
74 | + this->mpiConsumingTimeBrodCast += endTime - startTime; | |
53 | 75 | } |
54 | 76 | } |
55 | 77 | template<typename T, typename Op> void Reduce(const T* inValues, intptr_t num, T* outValues, Op op, int root) const{ |
@@ -60,15 +82,23 @@ public: | ||
60 | 82 | reduce(*this->communicator, &inValues[chunks[i].first], chunks[i].num, &outValues[chunks[i].first], op, root); |
61 | 83 | } |
62 | 84 | } |
63 | - template<typename T, typename Op> void AllReduce(const T* inValues, intptr_t num, T* outValues, Op op) const{ | |
85 | + //template<typename T, typename Op> void AllReduce(const T* inValues, intptr_t num, T* outValues, Op op) const{ | |
86 | + template<typename T, typename Op> void AllReduce(const T* inValues, intptr_t num, T* outValues, Op op){ | |
87 | + double startTime=0.0; | |
88 | + double endTime=0.0; | |
64 | 89 | std::vector<Chunk> chunks; |
65 | 90 | intptr_t tag=0; |
66 | 91 | this->SplitMessage2Chunks(chunks, tag, inValues, num); |
67 | 92 | for(intptr_t i=0; i<chunks.size(); i++){ |
93 | + startTime = omp_get_wtime(); | |
68 | 94 | all_reduce(*this->communicator, &inValues[chunks[i].first], chunks[i].num, &outValues[chunks[i].first], op); |
95 | + endTime = omp_get_wtime(); | |
96 | + this->mpiConsumingTime += endTime - startTime; | |
97 | + this->mpiConsumingTimeAllReduce += endTime - startTime; | |
69 | 98 | } |
70 | 99 | } |
71 | - template<typename T, typename Op> void AllReduce(T* values, intptr_t num, Op op) const{ | |
100 | + //template<typename T, typename Op> void AllReduce(T* values, intptr_t num, Op op) const{ | |
101 | + template<typename T, typename Op> void AllReduce(T* values, intptr_t num, Op op){ | |
72 | 102 | double* tmpValues=NULL; |
73 | 103 | try{ |
74 | 104 | MolDS_base::MallocerFreer::GetInstance()->Malloc<double>(&tmpValues, num); |
@@ -83,7 +113,8 @@ public: | ||
83 | 113 | } |
84 | 114 | MolDS_base::MallocerFreer::GetInstance()->Free<double>(&tmpValues, num); |
85 | 115 | } |
86 | - | |
116 | + void Barrier(); | |
117 | + int GetMessagePassingTimes(intptr_t num) const; | |
87 | 118 | private: |
88 | 119 | static MpiProcess* mpiProcess; |
89 | 120 | MpiProcess(); |
@@ -116,6 +147,11 @@ private: | ||
116 | 147 | chunks.push_back(chunk); |
117 | 148 | } |
118 | 149 | } |
150 | + double mpiConsumingTime; | |
151 | + double mpiConsumingTimeSend; | |
152 | + double mpiConsumingTimeRecv; | |
153 | + double mpiConsumingTimeBrodCast; | |
154 | + double mpiConsumingTimeAllReduce; | |
119 | 155 | }; |
120 | 156 | |
121 | 157 | } |