NICE
Northeastern Interactive Clustering Engine
gpu_svd_solver.h
Go to the documentation of this file.
1 // The MIT License (MIT)
2 //
3 // Copyright (c) 2016 Northeastern University
4 //
5 // Permission is hereby granted, free of charge, to any person obtaining a copy
6 // of this software and associated documentation files (the "Software"), to deal
7 // in the Software without restriction, including without limitation the rights
8 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 // copies of the Software, and to permit persons to whom the Software is
10 // furnished to do so, subject to the following conditions:
11 //
12 // The above copyright notice and this permission notice shall be included in
13 // all copies or substantial portions of the Software.
14 //
15 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 // SOFTWARE.
22 
23 #ifndef CPP_INCLUDE_GPU_SVD_SOLVER_H_
24 #define CPP_INCLUDE_GPU_SVD_SOLVER_H_
25 
26 #ifdef NEED_CUDA
27 #include<cuda_runtime_api.h>
28 #include<cuda_runtime.h>
29 #include<device_launch_parameters.h>
30 #include<cusolverDn.h>
31 
32 #include<iostream>
33 
34 #include "Eigen/Dense"
35 #include "include/matrix.h"
36 #include "include/vector.h"
37 #include "include/gpu_util.h"
38 
39 namespace Nice {
40 
41 template<typename T>
42 class GpuSvdSolver {
43  private:
44  Matrix<T> u_;
45  Matrix<T> v_;
46  Vector<T> s_;
47 
48  public:
50 
51  void Compute(const Matrix<T> &A) {
52  int M = A.rows();
53  int N = A.cols();
54  const T *h_A = &A(0);
55  // --- Setting the device matrix and moving the host matrix to the device
56  T *d_A; gpuErrchk(cudaMalloc(&d_A, M * N * sizeof(T)));
57  gpuErrchk(cudaMemcpy(d_A, h_A, M * N * sizeof(T), cudaMemcpyHostToDevice));
58 
59  //--- host side SVD results space
60  s_.resize(M, 1);
61  u_.resize(M, M);
62  v_.resize(N, N);
63 
64  // --- device side SVD workspace and matrices
65  int work_size = 0;
66  int devInfo_h = 0;
67  int *devInfo; gpuErrchk(cudaMalloc(&devInfo, sizeof(int)));
68  T *d_U; gpuErrchk(cudaMalloc(&d_U, M * M * sizeof(T)));
69  T *d_V; gpuErrchk(cudaMalloc(&d_V, N * N * sizeof(T)));
70  T *d_S; gpuErrchk(cudaMalloc(&d_S, N * sizeof(T)));
71  cusolverStatus_t stat;
72 
73  // --- CUDA solver initialization
74  cusolverDnHandle_t solver_handle;
75  cusolverDnCreate(&solver_handle);
76  stat = cusolverDnSgesvd_bufferSize(solver_handle, M, N, &work_size);
77  if (stat != CUSOLVER_STATUS_SUCCESS) {
78  std::cout << "Initialization of cuSolver failed." << std::endl;
79  cudaFree(d_S); cudaFree(d_U); cudaFree(d_V);
80  cusolverDnDestroy(solver_handle);
81  exit(1);
82  }
83  T *work; gpuErrchk(cudaMalloc(&work, work_size * sizeof(T)));
84 
85  // --- CUDA SVD execution
86  stat = GpuSvd(solver_handle, M, N,
87  d_A, d_S, d_U, d_V,
88  work, work_size, devInfo);
89 
90  // Error Check
91  gpuErrchk(cudaMemcpy(&devInfo_h, devInfo,
92  sizeof(int), cudaMemcpyDeviceToHost));
93  if (stat != CUSOLVER_STATUS_SUCCESS || devInfo_h != 0) {
94  std::cerr << "GPU SVD Solver Internal Failure" << std::endl;
95  cudaFree(d_S); cudaFree(d_U); cudaFree(d_V); cudaFree(work);
96  cusolverDnDestroy(solver_handle);
97  exit(1);
98  }
99  cudaDeviceSynchronize();
100 
101  // --- Moving the results from device to host
102  gpuErrchk(cudaMemcpy(&s_(0, 0), d_S, N*sizeof(T),
103  cudaMemcpyDeviceToHost));
104  gpuErrchk(cudaMemcpy(&u_(0, 0), d_U, M*M*sizeof(T),
105  cudaMemcpyDeviceToHost));
106  gpuErrchk(cudaMemcpy(&v_(0, 0), d_V, N*N*sizeof(T),
107  cudaMemcpyDeviceToHost));
108 
109  cudaFree(d_S); cudaFree(d_U); cudaFree(d_V); cudaFree(work);
110  cusolverDnDestroy(solver_handle);
111  }
112 
113  Matrix<T> MatrixU() const { return u_; }
114 
115  Matrix<T> MatrixV() const { return v_; }
116 
117  Vector<T> SingularValues() const { return s_; }
118 };
119 } // namespace Nice
120 
121 #endif // NEED_CUDA
122 
123 #endif // CPP_INCLUDE_GPU_SVD_SOLVER_H_
124 
Matrix< T > MatrixU() const
Definition: gpu_svd_solver.h:113
void gpuErrchk(cudaError_t)
GpuSvdSolver()
Definition: gpu_svd_solver.h:49
Definition: cpu_operations.h:36
Vector< T > SingularValues() const
Definition: gpu_svd_solver.h:117
Definition: gpu_svd_solver.h:42
Matrix< T > MatrixV() const
Definition: gpu_svd_solver.h:115
Eigen::Matrix< T, Eigen::Dynamic, 1 > Vector
Definition: vector.h:31
cusolverStatus_t GpuSvd(cusolverDnHandle_t solver_handle, int M, int N, float *d_A, float *d_S, float *d_U, float *d_V, float *work, int work_size, int *devInfo)
void Compute(const Matrix< T > &A)
Definition: gpu_svd_solver.h:51
Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic > Matrix
Definition: matrix.h:31