@BravoWA 2016-11-25T09:15:37.000000Z 字数 6921 阅读 2631

PCG_MATLAB vs PCG_Eigen_MEX

LinearAlgebraSolver PCG Eigen

Movtivation

When I running my FSI code, I use Matlab bulit-in Preconditioned Conjugate Gradient (pcg) funtion to solve my pressure poisson equation. However, I found this built-in Matlab pcg solver is not multi-threaded. Hence, I want to use c-mex to warp a external c/cpp sparse linear algebra equation solver to speedup calculation. Here, my choice is Eigen (its development branch).

comparison code in Matlab

Platform: Win7 64bit with Intel C/CPP compiler, Matlab 2014b.

...
    %%
    % +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    %  solve
    % +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    fprintf('MSG: Test of Eigen_pcg begin!!\n');
    f_nnode = size(f_node_coord,1);
    load('rhs_foreigentest.mat');
    b = rhs;
    x0 = zeros(f_nnode,1);
    [f_H,b] = sol_CBS_impose_pbc(f_H,b,f_pbc_dof,f_pbc_val);
    tic
    [x1] = pcg(f_H, b,1e-5,1000 ); 
    toc
    b = rhs;
    tic
    [x] = c_sol_pcg_eigen(f_H_ir, f_H_jc, f_H_val, b, x0, 1000, 1e-5,int32(f_pbc_dof), f_pbc_val);
    toc
    b = rhs;
    tic
    [x2] = c_sol_pcg_eigen_OMP...
    (f_H_ir, f_H_jc, f_H_val, b, x0, 1000, 1e-5,int32(f_pbc_dof), f_pbc_val); 
    toc
end

Sparse matrix H is [29292x29292], nnz of H is 389062.
Serial CPP-mex code for pcg_Eigen(dev)

Matlab cpp-mex bulid command：
mex -I".\eigen" c_sol_pcg_eigen.cpp

/*==========================================================
This subroutine is written in C for invoking in Matlab 2009a.
Before using it, compile this C code using proper C compiler.
[x] = c_sol_pcg_eigen(A_ir, A_jc, A_val, b, x0, maxiter, tol, pbc_dof, pbc_val)
Input:
(1) : A_ir, row-id vector for CSR Sparse matrix format.
(2) : A_jc, col-id vector for CSR Sparse matrix format.
(3) : A_val, value vector for CSR Sparse matrix format.
(4) : b, right-hands term.
(5) : x0, initial guess of unknowns variables.
(6) : maxiter, max number of iterations.
(7) : tol, tolerance of residuals to check if converged.
(8) : pbc_dof,
(9) : pbc_val.
Output:
(1) : x
Author: Chen Jiang  2016-04-20-15.42 jiangchen2007@gmail.coom
*========================================================*/
#include "mex.h"
#include "math.h"
#include "matrix.h"
#include <Eigen/Sparse>
#include <Eigen/Dense>
#include <vector>
#include <algorithm>
#include <iostream>
using namespace Eigen;
using namespace std;
typedef Eigen::Triplet<double> T;
// the gateway function
void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
    //input vars
    int *A_ir;
    int *A_jc;
    double *A_val;
    double *b;
    double *x0;
    int maxiter;
    double tol;
    int *pbc_dof;
    double *pbc_val;
    //output vars
    double *x;
    //temp vars
    int nrows;
    int nnz;
    std::vector<T> tripletList;
    int npbc_dof;
    //-----------------
    // GetData
    //-----------------
    if(nrhs != 9){
        mexErrMsgIdAndTxt("MEX:c_sol_pcg_eigen:rhs",
                "This function takes too much input arguments.");
    }
    A_ir = (int*)mxGetData(prhs[0]);
    A_jc = (int*)mxGetData(prhs[1]);
    A_val = mxGetPr(prhs[2]);
    b = mxGetPr(prhs[3]);
    x0 = mxGetPr(prhs[4]);
    maxiter = mxGetScalar(prhs[5]);
    tol = mxGetScalar(prhs[6]);
    pbc_dof = (int*) mxGetData(prhs[7]);
    pbc_val = mxGetPr(prhs[8]);
    nrows = mxGetM(prhs[3]);
    npbc_dof = mxGetM(prhs[7]);
    nnz = mxGetM(prhs[0]);
    plhs[0] = mxCreateDoubleMatrix(nrows,1,mxREAL);
    x = mxGetPr(plhs[0]);
    //-----------------
    //calculations
    //-----------------
    //covert A_ir, A_jc, A_val to Eigen Sparse matrix
    tripletList.reserve(nnz);
    for(int i=0; i<nnz; i++){
        tripletList.push_back(T(A_ir[i]-1, A_jc[i]-1, A_val[i]));
    }
    SparseMatrix<double> A_eigen(nrows, nrows);
    A_eigen.setFromTriplets(tripletList.begin(), tripletList.end());
    Map<VectorXd> b_eigen(b, nrows);
    //apply pbc
    for(int idof = 0; idof < npbc_dof; idof++){
        int dof = pbc_dof[idof]-1;
        A_eigen.coeffRef(dof, dof) = 1.0e8;
        b_eigen(dof) = 1.0e8*pbc_val[idof];
    }
    //CG solver of Eigen
    VectorXd x_eigen;
    Map<VectorXd> x0_eigen(x0, nrows);
    ConjugateGradient<SparseMatrix<double>, Lower|Upper, IncompleteCholesky<double> > cg;
    //ConjugateGradient<SparseMatrix<double>, Lower|Upper > cg;
    //cg.preconditioner().setDroptol(0.001);
    cg.setTolerance(tol);
    cg.setMaxIterations(maxiter);
    cg.compute(A_eigen);
    x_eigen = cg.solveWithGuess(b_eigen, x0_eigen);
    //x_eigen = cg.solve(b_eigen);
    //
    for(int i=0; i<nrows; i++){
        x[i] = x_eigen(i);
    }
}

OpenMP CPP-MEX code for PCG_Eigen(dev)

Mex build command in Windows7:
mex -I".\eigen_dev" c_sol_pcg_eigen_OMP.cpp COMPFLAGS="$COMPFLAGS /openmp"
Mex build command in Linux (opensuse):
mex c_sol_pcg_eigen_OMP.cpp -I".\eigen_dev" CXXPLFAGS="\$CXXFLAGS -fopenmp" LDFLAGS="\$LDFLAGS -fopenmp"

/*==========================================================
This subroutine is written in C for invoking in Matlab 2009a.
Before using it, compile this C code using proper C compiler.
[x] = c_sol_pcg_eigen(A_ir, A_jc, A_val, b, x0, maxiter, tol, pbc_dof, pbc_val)
Input:
(1) : A_ir, row-id vector for CSR Sparse matrix format.
(2) : A_jc, col-id vector for CSR Sparse matrix format.
(3) : A_val, value vector for CSR Sparse matrix format.
(4) : b, right-hands term.
(5) : x0, initial guess of unknowns variables.
(6) : maxiter, max number of iterations.
(7) : tol, tolerance of residuals to check if converged.
(8) : pbc_dof,
(9) : pbc_val.
Output:
(1) : x
Author: Chen Jiang 2016-04-24-10.59 jiangchen2007@gmail.com
*========================================================*/
#include "mex.h"
#include "math.h"
#include "matrix.h"
#include <Eigen/Sparse>
#include <Eigen/Dense>
#include <vector>
#include <algorithm>
#include <iostream>
#include <omp.h>
using namespace Eigen;
using namespace std;
typedef Eigen::Triplet<double> T;
// the gateway function
void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
    //
    Eigen::initParallel();
    Eigen::setNbThreads(2);
    //input vars
    int *A_ir;
    int *A_jc;
    double *A_val;
    double *b;
    double *x0;
    int maxiter;
    double tol;
    int *pbc_dof;
    double *pbc_val;
    //output vars
    double *x;
    //temp vars
    int nrows;
    int nnz;
    std::vector<T> tripletList;
    int npbc_dof;
    //-----------------
    // GetData
    //-----------------
    if(nrhs != 9){
        mexErrMsgIdAndTxt("MEX:c_sol_pcg_eigen:rhs",
                "This function takes too much input arguments.");
    }
    A_ir = (int*)mxGetData(prhs[0]);
    A_jc = (int*)mxGetData(prhs[1]);
    A_val = mxGetPr(prhs[2]);
    b = mxGetPr(prhs[3]);
    x0 = mxGetPr(prhs[4]);
    maxiter = mxGetScalar(prhs[5]);
    tol = mxGetScalar(prhs[6]);
    pbc_dof = (int*) mxGetData(prhs[7]);
    pbc_val = mxGetPr(prhs[8]);
    nrows = mxGetM(prhs[3]);
    npbc_dof = mxGetM(prhs[7]);
    nnz = mxGetM(prhs[0]);
    plhs[0] = mxCreateDoubleMatrix(nrows,1,mxREAL);
    x = mxGetPr(plhs[0]);
    //-----------------
    //calculations
    //-----------------
    //covert A_ir, A_jc, A_val to Eigen Sparse matrix
    tripletList.reserve(nnz);
    for(int i=0; i<nnz; i++){
        tripletList.push_back(T(A_ir[i]-1, A_jc[i]-1, A_val[i]));
    }
    SparseMatrix<double> A_eigen(nrows, nrows);
    A_eigen.setFromTriplets(tripletList.begin(), tripletList.end());
    Map<VectorXd> b_eigen(b, nrows);
    //apply pbc
    for(int idof = 0; idof < npbc_dof; idof++){
        int dof = pbc_dof[idof]-1;
        A_eigen.coeffRef(dof, dof) = 1.0e8;
        b_eigen(dof) = 1.0e8*pbc_val[idof];
    }
    //CG solver of Eigen
    VectorXd x_eigen;
    Map<VectorXd> x0_eigen(x0, nrows);
    ConjugateGradient<SparseMatrix<double>, Lower|Upper, IncompleteCholesky<double> > cg;
    //ConjugateGradient<SparseMatrix<double>, Lower|Upper > cg;
    //cg.preconditioner().setDroptol(0.001);
    cg.setTolerance(tol);
    cg.setMaxIterations(maxiter);
    cg.compute(A_eigen);
    x_eigen = cg.solveWithGuess(b_eigen, x0_eigen);
    //x_eigen = cg.solve(b_eigen);
    //
    for(int i=0; i<nrows; i++){
        x[i] = x_eigen(i);
    }
}

Result

norm(x1-x)/norm(x1) = 2.4111e-07
Elapsed time:
Matlab built-in pcg function: 2.39255 sec
Serial Cpp-mex pcg of Eigen: 0.56422 sec
OpenMP Cpp-mex pcg of Eigen: 0.518612 sec (2 threads only)

Conclusion

Even CPP-mex PCG of Eigen contains not only the PCG calculation codes but also the data structure transfering codes, it is still faster than its rival.