[关闭]
@BravoWA 2016-11-25T17:15:37.000000Z 字数 6921 阅读 2490

PCG_MATLAB vs PCG_Eigen_MEX

LinearAlgebraSolver PCG Eigen


Movtivation

When I running my FSI code, I use Matlab bulit-in Preconditioned Conjugate Gradient (pcg) funtion to solve my pressure poisson equation. However, I found this built-in Matlab pcg solver is not multi-threaded. Hence, I want to use c-mex to warp a external c/cpp sparse linear algebra equation solver to speedup calculation. Here, my choice is Eigen (its development branch).

comparison code in Matlab

Platform: Win7 64bit with Intel C/CPP compiler, Matlab 2014b.

  1. ...
  2. %%
  3. % +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  4. % solve
  5. % +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  6. fprintf('MSG: Test of Eigen_pcg begin!!\n');
  7. f_nnode = size(f_node_coord,1);
  8. load('rhs_foreigentest.mat');
  9. b = rhs;
  10. x0 = zeros(f_nnode,1);
  11. [f_H,b] = sol_CBS_impose_pbc(f_H,b,f_pbc_dof,f_pbc_val);
  12. tic
  13. [x1] = pcg(f_H, b,1e-5,1000 );
  14. toc
  15. b = rhs;
  16. tic
  17. [x] = c_sol_pcg_eigen(f_H_ir, f_H_jc, f_H_val, b, x0, 1000, 1e-5,int32(f_pbc_dof), f_pbc_val);
  18. toc
  19. b = rhs;
  20. tic
  21. [x2] = c_sol_pcg_eigen_OMP...
  22. (f_H_ir, f_H_jc, f_H_val, b, x0, 1000, 1e-5,int32(f_pbc_dof), f_pbc_val);
  23. toc
  24. end

Sparse matrix H is [29292x29292], nnz of H is 389062.
Serial CPP-mex code for pcg_Eigen(dev)

Matlab cpp-mex bulid command:
mex -I".\eigen" c_sol_pcg_eigen.cpp

  1. /*==========================================================
  2. This subroutine is written in C for invoking in Matlab 2009a.
  3. Before using it, compile this C code using proper C compiler.
  4. [x] = c_sol_pcg_eigen(A_ir, A_jc, A_val, b, x0, maxiter, tol, pbc_dof, pbc_val)
  5. Input:
  6. (1) : A_ir, row-id vector for CSR Sparse matrix format.
  7. (2) : A_jc, col-id vector for CSR Sparse matrix format.
  8. (3) : A_val, value vector for CSR Sparse matrix format.
  9. (4) : b, right-hands term.
  10. (5) : x0, initial guess of unknowns variables.
  11. (6) : maxiter, max number of iterations.
  12. (7) : tol, tolerance of residuals to check if converged.
  13. (8) : pbc_dof,
  14. (9) : pbc_val.
  15. Output:
  16. (1) : x
  17. Author: Chen Jiang 2016-04-20-15.42 jiangchen2007@gmail.coom
  18. *========================================================*/
  19. #include "mex.h"
  20. #include "math.h"
  21. #include "matrix.h"
  22. #include <Eigen/Sparse>
  23. #include <Eigen/Dense>
  24. #include <vector>
  25. #include <algorithm>
  26. #include <iostream>
  27. using namespace Eigen;
  28. using namespace std;
  29. typedef Eigen::Triplet<double> T;
  30. // the gateway function
  31. void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
  32. {
  33. //input vars
  34. int *A_ir;
  35. int *A_jc;
  36. double *A_val;
  37. double *b;
  38. double *x0;
  39. int maxiter;
  40. double tol;
  41. int *pbc_dof;
  42. double *pbc_val;
  43. //output vars
  44. double *x;
  45. //temp vars
  46. int nrows;
  47. int nnz;
  48. std::vector<T> tripletList;
  49. int npbc_dof;
  50. //-----------------
  51. // GetData
  52. //-----------------
  53. if(nrhs != 9){
  54. mexErrMsgIdAndTxt("MEX:c_sol_pcg_eigen:rhs",
  55. "This function takes too much input arguments.");
  56. }
  57. A_ir = (int*)mxGetData(prhs[0]);
  58. A_jc = (int*)mxGetData(prhs[1]);
  59. A_val = mxGetPr(prhs[2]);
  60. b = mxGetPr(prhs[3]);
  61. x0 = mxGetPr(prhs[4]);
  62. maxiter = mxGetScalar(prhs[5]);
  63. tol = mxGetScalar(prhs[6]);
  64. pbc_dof = (int*) mxGetData(prhs[7]);
  65. pbc_val = mxGetPr(prhs[8]);
  66. nrows = mxGetM(prhs[3]);
  67. npbc_dof = mxGetM(prhs[7]);
  68. nnz = mxGetM(prhs[0]);
  69. plhs[0] = mxCreateDoubleMatrix(nrows,1,mxREAL);
  70. x = mxGetPr(plhs[0]);
  71. //-----------------
  72. //calculations
  73. //-----------------
  74. //covert A_ir, A_jc, A_val to Eigen Sparse matrix
  75. tripletList.reserve(nnz);
  76. for(int i=0; i<nnz; i++){
  77. tripletList.push_back(T(A_ir[i]-1, A_jc[i]-1, A_val[i]));
  78. }
  79. SparseMatrix<double> A_eigen(nrows, nrows);
  80. A_eigen.setFromTriplets(tripletList.begin(), tripletList.end());
  81. Map<VectorXd> b_eigen(b, nrows);
  82. //apply pbc
  83. for(int idof = 0; idof < npbc_dof; idof++){
  84. int dof = pbc_dof[idof]-1;
  85. A_eigen.coeffRef(dof, dof) = 1.0e8;
  86. b_eigen(dof) = 1.0e8*pbc_val[idof];
  87. }
  88. //CG solver of Eigen
  89. VectorXd x_eigen;
  90. Map<VectorXd> x0_eigen(x0, nrows);
  91. ConjugateGradient<SparseMatrix<double>, Lower|Upper, IncompleteCholesky<double> > cg;
  92. //ConjugateGradient<SparseMatrix<double>, Lower|Upper > cg;
  93. //cg.preconditioner().setDroptol(0.001);
  94. cg.setTolerance(tol);
  95. cg.setMaxIterations(maxiter);
  96. cg.compute(A_eigen);
  97. x_eigen = cg.solveWithGuess(b_eigen, x0_eigen);
  98. //x_eigen = cg.solve(b_eigen);
  99. //
  100. for(int i=0; i<nrows; i++){
  101. x[i] = x_eigen(i);
  102. }
  103. }

OpenMP CPP-MEX code for PCG_Eigen(dev)

Mex build command in Windows7:
mex -I".\eigen_dev" c_sol_pcg_eigen_OMP.cpp COMPFLAGS="$COMPFLAGS /openmp"
Mex build command in Linux (opensuse):
mex c_sol_pcg_eigen_OMP.cpp -I".\eigen_dev" CXXPLFAGS="\$CXXFLAGS -fopenmp" LDFLAGS="\$LDFLAGS -fopenmp"

  1. /*==========================================================
  2. This subroutine is written in C for invoking in Matlab 2009a.
  3. Before using it, compile this C code using proper C compiler.
  4. [x] = c_sol_pcg_eigen(A_ir, A_jc, A_val, b, x0, maxiter, tol, pbc_dof, pbc_val)
  5. Input:
  6. (1) : A_ir, row-id vector for CSR Sparse matrix format.
  7. (2) : A_jc, col-id vector for CSR Sparse matrix format.
  8. (3) : A_val, value vector for CSR Sparse matrix format.
  9. (4) : b, right-hands term.
  10. (5) : x0, initial guess of unknowns variables.
  11. (6) : maxiter, max number of iterations.
  12. (7) : tol, tolerance of residuals to check if converged.
  13. (8) : pbc_dof,
  14. (9) : pbc_val.
  15. Output:
  16. (1) : x
  17. Author: Chen Jiang 2016-04-24-10.59 jiangchen2007@gmail.com
  18. *========================================================*/
  19. #include "mex.h"
  20. #include "math.h"
  21. #include "matrix.h"
  22. #include <Eigen/Sparse>
  23. #include <Eigen/Dense>
  24. #include <vector>
  25. #include <algorithm>
  26. #include <iostream>
  27. #include <omp.h>
  28. using namespace Eigen;
  29. using namespace std;
  30. typedef Eigen::Triplet<double> T;
  31. // the gateway function
  32. void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
  33. {
  34. //
  35. Eigen::initParallel();
  36. Eigen::setNbThreads(2);
  37. //input vars
  38. int *A_ir;
  39. int *A_jc;
  40. double *A_val;
  41. double *b;
  42. double *x0;
  43. int maxiter;
  44. double tol;
  45. int *pbc_dof;
  46. double *pbc_val;
  47. //output vars
  48. double *x;
  49. //temp vars
  50. int nrows;
  51. int nnz;
  52. std::vector<T> tripletList;
  53. int npbc_dof;
  54. //-----------------
  55. // GetData
  56. //-----------------
  57. if(nrhs != 9){
  58. mexErrMsgIdAndTxt("MEX:c_sol_pcg_eigen:rhs",
  59. "This function takes too much input arguments.");
  60. }
  61. A_ir = (int*)mxGetData(prhs[0]);
  62. A_jc = (int*)mxGetData(prhs[1]);
  63. A_val = mxGetPr(prhs[2]);
  64. b = mxGetPr(prhs[3]);
  65. x0 = mxGetPr(prhs[4]);
  66. maxiter = mxGetScalar(prhs[5]);
  67. tol = mxGetScalar(prhs[6]);
  68. pbc_dof = (int*) mxGetData(prhs[7]);
  69. pbc_val = mxGetPr(prhs[8]);
  70. nrows = mxGetM(prhs[3]);
  71. npbc_dof = mxGetM(prhs[7]);
  72. nnz = mxGetM(prhs[0]);
  73. plhs[0] = mxCreateDoubleMatrix(nrows,1,mxREAL);
  74. x = mxGetPr(plhs[0]);
  75. //-----------------
  76. //calculations
  77. //-----------------
  78. //covert A_ir, A_jc, A_val to Eigen Sparse matrix
  79. tripletList.reserve(nnz);
  80. for(int i=0; i<nnz; i++){
  81. tripletList.push_back(T(A_ir[i]-1, A_jc[i]-1, A_val[i]));
  82. }
  83. SparseMatrix<double> A_eigen(nrows, nrows);
  84. A_eigen.setFromTriplets(tripletList.begin(), tripletList.end());
  85. Map<VectorXd> b_eigen(b, nrows);
  86. //apply pbc
  87. for(int idof = 0; idof < npbc_dof; idof++){
  88. int dof = pbc_dof[idof]-1;
  89. A_eigen.coeffRef(dof, dof) = 1.0e8;
  90. b_eigen(dof) = 1.0e8*pbc_val[idof];
  91. }
  92. //CG solver of Eigen
  93. VectorXd x_eigen;
  94. Map<VectorXd> x0_eigen(x0, nrows);
  95. ConjugateGradient<SparseMatrix<double>, Lower|Upper, IncompleteCholesky<double> > cg;
  96. //ConjugateGradient<SparseMatrix<double>, Lower|Upper > cg;
  97. //cg.preconditioner().setDroptol(0.001);
  98. cg.setTolerance(tol);
  99. cg.setMaxIterations(maxiter);
  100. cg.compute(A_eigen);
  101. x_eigen = cg.solveWithGuess(b_eigen, x0_eigen);
  102. //x_eigen = cg.solve(b_eigen);
  103. //
  104. for(int i=0; i<nrows; i++){
  105. x[i] = x_eigen(i);
  106. }
  107. }

Result

norm(x1-x)/norm(x1) = 2.4111e-07
Elapsed time:
Matlab built-in pcg function: 2.39255 sec
Serial Cpp-mex pcg of Eigen: 0.56422 sec
OpenMP Cpp-mex pcg of Eigen: 0.518612 sec (2 threads only)

Conclusion

Even CPP-mex PCG of Eigen contains not only the PCG calculation codes but also the data structure transfering codes, it is still faster than its rival.

添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注