123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219 |
- // g++ -O3 -DNDEBUG -I.. -L /usr/lib64/atlas/ benchBlasGemm.cpp -o benchBlasGemm -lrt -lcblas
- // possible options:
- // -DEIGEN_DONT_VECTORIZE
- // -msse2
- // #define EIGEN_DEFAULT_TO_ROW_MAJOR
- #define _FLOAT
- #include <iostream>
- #include <Eigen/Core>
- #include "BenchTimer.h"
- // include the BLAS headers
- extern "C" {
- #include <cblas.h>
- }
- #include <string>
- #ifdef _FLOAT
- typedef float Scalar;
- #define CBLAS_GEMM cblas_sgemm
- #else
- typedef double Scalar;
- #define CBLAS_GEMM cblas_dgemm
- #endif
- typedef Eigen::Matrix<Scalar,Eigen::Dynamic,Eigen::Dynamic> MyMatrix;
- void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops);
- void check_product(int M, int N, int K);
- void check_product(void);
- int main(int argc, char *argv[])
- {
- // disable SSE exceptions
- #ifdef __GNUC__
- {
- int aux;
- asm(
- "stmxcsr %[aux] \n\t"
- "orl $32832, %[aux] \n\t"
- "ldmxcsr %[aux] \n\t"
- : : [aux] "m" (aux));
- }
- #endif
- int nbtries=1, nbloops=1, M, N, K;
- if (argc==2)
- {
- if (std::string(argv[1])=="check")
- check_product();
- else
- M = N = K = atoi(argv[1]);
- }
- else if ((argc==3) && (std::string(argv[1])=="auto"))
- {
- M = N = K = atoi(argv[2]);
- nbloops = 1000000000/(M*M*M);
- if (nbloops<1)
- nbloops = 1;
- nbtries = 6;
- }
- else if (argc==4)
- {
- M = N = K = atoi(argv[1]);
- nbloops = atoi(argv[2]);
- nbtries = atoi(argv[3]);
- }
- else if (argc==6)
- {
- M = atoi(argv[1]);
- N = atoi(argv[2]);
- K = atoi(argv[3]);
- nbloops = atoi(argv[4]);
- nbtries = atoi(argv[5]);
- }
- else
- {
- std::cout << "Usage: " << argv[0] << " size \n";
- std::cout << "Usage: " << argv[0] << " auto size\n";
- std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n";
- std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n";
- std::cout << "Usage: " << argv[0] << " check\n";
- std::cout << "Options:\n";
- std::cout << " size unique size of the 2 matrices (integer)\n";
- std::cout << " auto automatically set the number of repetitions and tries\n";
- std::cout << " nbloops number of times the GEMM routines is executed\n";
- std::cout << " nbtries number of times the loop is benched (return the best try)\n";
- std::cout << " M N K sizes of the matrices: MxN = MxK * KxN (integers)\n";
- std::cout << " check check eigen product using cblas as a reference\n";
- exit(1);
- }
- double nbmad = double(M) * double(N) * double(K) * double(nbloops);
- if (!(std::string(argv[1])=="auto"))
- std::cout << M << " x " << N << " x " << K << "\n";
- Scalar alpha, beta;
- MyMatrix ma(M,K), mb(K,N), mc(M,N);
- ma = MyMatrix::Random(M,K);
- mb = MyMatrix::Random(K,N);
- mc = MyMatrix::Random(M,N);
- Eigen::BenchTimer timer;
- // we simply compute c += a*b, so:
- alpha = 1;
- beta = 1;
- // bench cblas
- // ROWS_A, COLS_B, COLS_A, 1.0, A, COLS_A, B, COLS_B, 0.0, C, COLS_B);
- if (!(std::string(argv[1])=="auto"))
- {
- timer.reset();
- for (uint k=0 ; k<nbtries ; ++k)
- {
- timer.start();
- for (uint j=0 ; j<nbloops ; ++j)
- #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
- CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta, mc.data(), N);
- #else
- CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta, mc.data(), M);
- #endif
- timer.stop();
- }
- if (!(std::string(argv[1])=="auto"))
- std::cout << "cblas: " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
- else
- std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
- }
- // clear
- ma = MyMatrix::Random(M,K);
- mb = MyMatrix::Random(K,N);
- mc = MyMatrix::Random(M,N);
- // eigen
- // if (!(std::string(argv[1])=="auto"))
- {
- timer.reset();
- for (uint k=0 ; k<nbtries ; ++k)
- {
- timer.start();
- bench_eigengemm(mc, ma, mb, nbloops);
- timer.stop();
- }
- if (!(std::string(argv[1])=="auto"))
- std::cout << "eigen : " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
- else
- std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
- }
- std::cout << "l1: " << Eigen::l1CacheSize() << std::endl;
- std::cout << "l2: " << Eigen::l2CacheSize() << std::endl;
-
- return 0;
- }
- using namespace Eigen;
- void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops)
- {
- for (uint j=0 ; j<nbloops ; ++j)
- mc.noalias() += ma * mb;
- }
- #define MYVERIFY(A,M) if (!(A)) { \
- std::cout << "FAIL: " << M << "\n"; \
- }
- void check_product(int M, int N, int K)
- {
- MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N);
- ma = MyMatrix::Random(M,K);
- mb = MyMatrix::Random(K,N);
- maT = ma.transpose();
- mbT = mb.transpose();
- mc = MyMatrix::Random(M,N);
- MyMatrix::Scalar eps = 1e-4;
- meigen = mref = mc;
- CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M);
- meigen += ma * mb;
- MYVERIFY(meigen.isApprox(mref, eps),". * .");
- meigen = mref = mc;
- CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M);
- meigen += maT.transpose() * mb;
- MYVERIFY(meigen.isApprox(mref, eps),"T * .");
- meigen = mref = mc;
- CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M);
- meigen += (maT.transpose()) * (mbT.transpose());
- MYVERIFY(meigen.isApprox(mref, eps),"T * T");
- meigen = mref = mc;
- CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M);
- meigen += ma * mbT.transpose();
- MYVERIFY(meigen.isApprox(mref, eps),". * T");
- }
- void check_product(void)
- {
- int M, N, K;
- for (uint i=0; i<1000; ++i)
- {
- M = internal::random<int>(1,64);
- N = internal::random<int>(1,768);
- K = internal::random<int>(1,768);
- M = (0 + M) * 1;
- std::cout << M << " x " << N << " x " << K << "\n";
- check_product(M, N, K);
- }
- }
|