Abstract: Paralution is an open source library for sparse iterative methods with special focus on multi-core and accelerator technology such as GPUs. It has a simple fortran interface and not well designed for multiple right-hand-sides. Here we defined a user friendly interface based on ISO_C_BINDING for Fortran FEA programmes to call Paralution solver.
keywords: Paralution, Fortran, sparse iterative method, iso_c_binding, FEA
Paralution系由瑞典科研工作者开发的基于多种稀疏矩阵迭代求解方法的C++并行计算程序包。它支持包括OpenMP、GPU/CUDA、OpenCL、OpenMP/MIC等多种后台并行库,也允许通过MPI进行多节点并行加速计算。它是多许可证的开源软件,方便易用,且不依赖特定的硬件平台和软件库,支持主流的Linux、Windows和MacOS操作平台。具体内容可以参考其官方主页:www.paralution.com
Paralution附带的算例中有Fortran调用的例子,只是对于有限元程序而言通常求解线性方程组AX=B的时候,右端项B不是向量形式而是矩阵形式。其次,在动力计算中总刚的稀疏结构通常而言并不发生改变,所以迭代求解器和预处理只需进行一次,这也是软件包附带算例没有考虑的。
在有限元分析中调用Paralution计算线性稀疏方程组AX=B的一般步骤是:
1、初始化paralution环境;
2、设置求解器并导入总刚;
3、分析求解X;
4、退出并清空paralution环境
具体的介绍详见代码和算例
CPP code:
1 #include <paralution.hpp> 2 3 namespace fortran_module{ 4 // Fortran interface via iso_c_binding 5 /* 6 Author: T.Q 7 Date: 2014.11 8 Version: 1.1 9 License: GPL v3 10 */ 11 extern "C" 12 { 13 // Initialize paralution 14 void paralution_f_init(); 15 // Print info of paralution 16 void paralution_f_info(); 17 // Build solver and preconditioner of paralution 18 void paralution_f_build(const int[], const int[], const double[], int ,int); 19 // Solve Ax=b 20 void paralution_f_solve(const double[], double[], int , int&, double&, int&); 21 // Stop paralution 22 void paralution_f_stop(); 23 // Subspace method for compute general eigenpairs 24 //void paralution_f_subspace(); 25 } 26 27 int *row = NULL;// Row index 28 int *col = NULL;// Column index 29 int *row_offset = NULL;// Row offset index for CSR 30 double *val = NULL;// Value of sparse matrix 31 32 double *in_x = NULL;// Internal x vector 33 double *in_rhs = NULL;// Internal rhs vector 34 35 bool var_clear = false;// Flag of variables clearance 36 bool ls_build = false;// Flag of solver exist 37 38 using namespace paralution; 39 40 41 LocalMatrix<double> *mat_A = NULL;// Matrix A i.e. Stiffness matrix in FEM 42 LocalMatrix<double> *mat_B = NULL;// Matrix B i.e. Mass matrix in FEM 43 44 // Iterative Linear Solver and Preconditioner 45 IterativeLinearSolver<LocalMatrix<double>, LocalVector<double>, double> *ls = NULL; 46 Preconditioner<LocalMatrix<double>, LocalVector<double>, double> *pr = NULL; 47 48 void paralution_f_clear() 49 { 50 // Clear variables 51 52 if(ls!=NULL){ ls->Clear(); delete ls;} 53 if(pr!=NULL){ pr->Clear(); delete pr;} 54 55 if(row!=NULL)free_host(&row); 56 if(col!=NULL)free_host(&col); 57 if(row_offset!=NULL)free_host(&row_offset); 58 if(val!=NULL)free_host(&val); 59 60 if(in_x!=NULL)free_host(&in_x); 61 if(in_rhs!=NULL)free_host(&in_rhs); 62 63 if(mat_A!=NULL){ mat_A->Clear(); delete mat_A;} 64 if(mat_B!=NULL){ mat_B->Clear(); delete mat_B;} 65 66 var_clear = true; 67 ls_build = false; 68 } 69 70 void paralution_f_init() 71 { 72 paralution_f_clear(); 73 init_paralution(); 74 } 75 76 void paralution_f_info() 77 { 78 info_paralution(); 79 } 80 81 void paralution_f_stop() 82 { 83 paralution_f_clear(); 84 stop_paralution(); 85 } 86 87 88 void paralution_f_build(const int for_row[], const int for_col[], 89 const double for_val[], int n, int nnz) 90 { 91 int i; 92 93 if(var_clear==false)paralution_f_clear(); 94 95 // Allocate arrays 96 allocate_host(nnz,&row); 97 allocate_host(nnz,&col); 98 allocate_host(nnz,&val); 99 100 // Copy sparse matrix data F2C 101 for(i=0;i<nnz;i++){ 102 row[i] = for_row[i] - 1;// Fortran starts with 1 default 103 col[i] = for_col[i] - 1; 104 val[i] = for_val[i];} 105 106 // Load data 107 mat_A = new LocalMatrix<double>; 108 mat_A->SetDataPtrCOO(&row,&col,&val,"Imported Fortran COO Matrix", nnz, n, n); 109 mat_A->ConvertToCSR();// Manual suggest CSR format 110 mat_A->info(); 111 112 // Creat Solver and Preconditioner 113 ls = new CG<LocalMatrix<double>, LocalVector<double>, double>; 114 pr = new Jacobi<LocalMatrix<double>, LocalVector<double>, double>; 115 116 ls->Clear(); 117 ls->SetOperator(*mat_A); 118 ls->SetPreconditioner(*pr); 119 ls->Build(); 120 121 ls_build = true; 122 123 } 124 125 void paralution_f_solve(const double for_rhs[], double for_x[], int n, 126 int &iter, double &resnorm, int &err) 127 { 128 int i; 129 LocalVector<double> x;// Solution vector 130 LocalVector<double> rhs;// Right-hand-side vector 131 132 assert(ls_build==true); 133 134 if(in_rhs!=NULL)free_host(&in_rhs); 135 if(in_x!=NULL)free_host(&in_x); 136 137 allocate_host(n,&in_rhs); 138 allocate_host(n,&in_x); 139 // Copy and set rhs vector 140 for(i=0;i<n;i++){ in_rhs[i] = for_rhs[i];} 141 rhs.SetDataPtr(&in_rhs,"vector",n); 142 // Set solution to zero 143 x.Allocate("vector",n); 144 x.Zeros(); 145 // Solve 146 ls->Solve(rhs,&x); 147 // Get solution 148 x.LeaveDataPtr(&in_x); 149 // Copy to array 150 for(i=0;i<n;i++){ for_x[i] = in_x[i];} 151 // Clear 152 x.Clear(); 153 rhs.Clear(); 154 // Get information 155 iter = ls->GetIterationCount();// iteration times 156 resnorm = ls->GetCurrentResidual();// residual 157 err = ls->GetSolverStatus();// error code 158 } 159 // TODO 160 // Subspace method for solve general eigenpairs for symmetric real positive 161 // defined matrix 162 // A*x=lambda*M*x 163 // A: matrix N*N i.e. stiffness matrix 164 // B: matrix N*N i.e. mass matrix 165 // 166 void paralution_f_subspace(const int for_row[], const int for_col[], 167 const int option[], const double for_stif[], const double for_mass[], 168 double eigval[], double eigvec[], int n, int nnz) 169 { 170 } 171 }
Fortran code:
1 module paralution 2 use,intrinsic::iso_c_binding 3 implicit none 4 !******************************************************************************* 5 ! Fortran module for call Paralution package 6 !------------------------------------------------------------------------------- 7 ! Author: T.Q. 8 ! Date: 2014.11 9 ! Version: 1.1 10 !******************************************************************************* 11 ! License: GPL v3 12 !------------------------------------------------------------------------------- 13 ! usage: 14 !------------------------------------------------------------------------------- 15 ! 1) call paralution_init 16 ! 2) call paralution_info (optional) 17 ! 3) call paralution_build 18 ! 4) call paralution_solve (support multi-rhs) 19 ! 5) call paralution_stop 20 !******************************************************************************* 21 interface 22 subroutine paralution_init()bind(c,name=‘paralution_f_init‘) 23 end subroutine 24 subroutine paralution_info()bind(c,name=‘paralution_f_info‘) 25 end subroutine 26 subroutine paralution_stop()bind(c,name=‘paralution_f_stop‘) 27 end subroutine 28 subroutine paralution_build(row,col,val,n,nnz)bind(c,name=‘paralution_f_build‘) 29 import 30 implicit none 31 integer(c_int),intent(in),value::n,nnz 32 integer(c_int),intent(in)::row(nnz),col(nnz) 33 real(c_double),intent(in)::val(nnz) 34 end subroutine 35 subroutine paralution_solve_vec(rhs,x,n,iter,resnorm,err2)bind(c,name=‘paralution_f_solve‘) 36 import 37 implicit none 38 integer(c_int),intent(in),value::n 39 real(c_double),intent(in)::rhs(n) 40 real(c_double)::x(n) 41 integer(c_int)::iter,err2 42 real(c_double)::resnorm 43 end subroutine 44 end interface 45 contains 46 subroutine paralution_solve(rhs,x,mat_rhs,mat_x,n,iter,resnorm,err2) 47 implicit none 48 integer(c_int)::n,iter,err2 49 real(c_double),intent(in),optional::rhs(:),mat_rhs(:,:) 50 real(c_double),optional::x(:),mat_x(:,:) 51 real(c_double)::resnorm 52 53 logical::isVec,isMat 54 integer::i 55 isVec = present(rhs).and.present(x) 56 isMat = present(mat_rhs).and.present(mat_x) 57 58 if(isVec.and.(.not.isMat))then 59 ! rhs and x both vector 60 call paralution_solve_vec(rhs,x,n,iter,resnorm,err2) 61 elseif(isMat)then 62 ! rhs and x both matrix 63 do i=1,size(mat_rhs,2) 64 call paralution_solve_vec(mat_rhs(:,i),mat_x(:,i),n,iter,resnorm,err2) 65 enddo 66 else 67 write(*,*)"Error too many input variables" 68 endif 69 return 70 end subroutine 71 end module 72 73 program test 74 use paralution 75 implicit none 76 real(kind=8)::a(10,10),b(10,2),x(10,2),val(28),res2 77 integer::i,j,k,row(28),col(28),err2 78 a = 0.D0 79 a(1,[1,9]) = [1.7D0, .13D0] 80 a(2,[2,5,10]) = [1.D0, .02D0, .01D0] 81 a(3,3) = 1.5D0 82 a(4,4) = 1.1D0 83 a(5,5::1) = [2.6D0,0.D0,.16D0,.09D0,.52D0,.53D0] 84 a(6,6) = 1.2D0 85 a(7,[7,10]) = [1.3D0, .56D0] 86 a(8,8:9) = [1.6D0, .11D0] 87 a(9,9) = 1.4D0 88 a(10,10) = 3.1D0 89 90 write(*,*)"Data initialize" 91 do i=1,size(a,1) 92 do j=min(i+1,size(a,1)),size(a,1) 93 a(j,i) = a(i,j) 94 enddo 95 enddo 96 97 k=1 98 do i=1,size(a,1) 99 do j=1,size(a,2) 100 if(a(i,j)>1.D-4)then 101 row(k) = i 102 col(k) = j 103 val(k) = a(i,j) 104 write(*,*)i,j,val(k) 105 k = k + 1 106 endif 107 enddo 108 enddo 109 b(:,1) = [.287D0, .22D0, .45D0, .44D0, 2.486D0, .72D0, 1.55D0, 1.424D0, 1.621D0, 3.759D0] 110 b(:,2) = 1.5D0*b(:,1) 111 x = 0.D0 112 113 i = 10 114 j = 28 115 k = 2 116 call paralution_init() 117 call paralution_info() 118 call paralution_build(row,col,val,i,j) 119 do k=1,2 120 call paralution_solve(rhs=b(:,k),x=x(:,k),n=i,iter=j,resnorm=res2,err2=err2) 121 write(*,*)"Iter times:",j," relative error:",res2," error code:",err2 122 write(*,*)x(:,k) 123 enddo 124 call paralution_solve(mat_rhs=b,mat_x=x,n=i,iter=j,resnorm=res2,err2=err2) 125 do k=1,2 126 write(*,*)"Iter times:",j," relative error:",res2," error code:",err2 127 write(*,*)x(:,k) 128 enddo 129 call paralution_stop() 130 end program
result:
1 Data initialize 2 1 1 1.7000000000000000 3 1 9 0.13000000000000000 4 2 2 1.0000000000000000 5 2 5 2.0000000000000000E-002 6 2 10 1.0000000000000000E-002 7 3 3 1.5000000000000000 8 4 4 1.1000000000000001 9 5 2 2.0000000000000000E-002 10 5 5 2.6000000000000001 11 5 7 0.16000000000000000 12 5 8 8.9999999999999997E-002 13 5 9 0.52000000000000002 14 5 10 0.53000000000000003 15 6 6 1.2000000000000000 16 7 5 0.16000000000000000 17 7 7 1.3000000000000000 18 7 10 0.56000000000000005 19 8 5 8.9999999999999997E-002 20 8 8 1.6000000000000001 21 8 9 0.11000000000000000 22 9 1 0.13000000000000000 23 9 5 0.52000000000000002 24 9 8 0.11000000000000000 25 9 9 1.3999999999999999 26 10 2 1.0000000000000000E-002 27 10 5 0.53000000000000003 28 10 7 0.56000000000000005 29 10 10 3.1000000000000001 30 This version of PARALUTION is released under GPL. 31 By downloading this package you fully agree with the GPL license. 32 Number of CPU cores: 2 33 Host thread affinity policy - thread mapping on every core 34 PARALUTION ver B0.8.0 35 PARALUTION platform is initialized 36 Accelerator backend: None 37 OpenMP threads:2 38 LocalMatrix name=Imported Fortran COO Matrix; rows=10; cols=10; nnz=28; prec=64bit; asm=no; format=CSR; host backend={CPU(OpenMP)}; accelerator backend={None}; current=CPU(OpenMP) 39 PCG solver starts, with preconditioner: 40 Jacobi preconditioner 41 IterationControl criteria: abs tol=1e-15; rel tol=1e-06; div tol=1e+08; max iter=1000000 42 IterationControl initial residual = 5.33043 43 IterationControl RELATIVE criteria has been reached: res norm=6.83208e-07; rel val=1.28171e-07; iter=6 44 PCG ends 45 Iter times: 6 relative error: 6.8320832955793363E-007 error code: 2 46 0.10000029331886898 0.19999984686691041 0.30000008316594051 0.40000011088792048 0.49999997425190351 0.60000016633188091 0.70000002413346363 0.79999978910736969 0.90000002416832581 1.0000000083185150 47 PCG solver starts, with preconditioner: 48 Jacobi preconditioner 49 IterationControl criteria: abs tol=1e-15; rel tol=1e-06; div tol=1e+08; max iter=1000000 50 IterationControl initial residual = 7.99564 51 IterationControl RELATIVE criteria has been reached: res norm=1.02481e-06; rel val=1.28171e-07; iter=6 52 PCG ends 53 Iter times: 6 relative error: 1.0248124943384603E-006 error code: 2 54 0.15000043997830351 0.29999977030036568 0.45000012474891066 0.60000016633188091 0.74999996137785507 0.90000024949782143 1.0500000362001956 1.1999996836610549 1.3500000362524884 1.5000000124777721 55 PCG solver starts, with preconditioner: 56 Jacobi preconditioner 57 IterationControl criteria: abs tol=1e-15; rel tol=1e-06; div tol=1e+08; max iter=1000000 58 IterationControl initial residual = 5.33043 59 IterationControl RELATIVE criteria has been reached: res norm=6.83208e-07; rel val=1.28171e-07; iter=6 60 PCG ends 61 PCG solver starts, with preconditioner: 62 Jacobi preconditioner 63 IterationControl criteria: abs tol=1e-15; rel tol=1e-06; div tol=1e+08; max iter=1000000 64 IterationControl initial residual = 7.99564 65 IterationControl RELATIVE criteria has been reached: res norm=1.02481e-06; rel val=1.28171e-07; iter=6 66 PCG ends 67 Iter times: 6 relative error: 1.0248124943384603E-006 error code: 2 68 0.10000029331886898 0.19999984686691041 0.30000008316594051 0.40000011088792048 0.49999997425190351 0.60000016633188091 0.70000002413346363 0.79999978910736969 0.90000002416832581 1.0000000083185150 69 Iter times: 6 relative error: 1.0248124943384603E-006 error code: 2 70 0.15000043997830351 0.29999977030036568 0.45000012474891066 0.60000016633188091 0.74999996137785507 0.90000024949782143 1.0500000362001956 1.1999996836610549 1.3500000362524884 1.5000000124777721