NICE
Northeastern Interactive Clustering Engine
gpu_util.h
Go to the documentation of this file.
1 // The MIT License (MIT)
2 //
3 // Copyright (c) 2016 Northeastern University
4 //
5 // Permission is hereby granted, free of charge, to any person obtaining a copy
6 // of this software and associated documentation files (the "Software"), to deal
7 // in the Software without restriction, including without limitation the rights
8 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 // copies of the Software, and to permit persons to whom the Software is
10 // furnished to do so, subject to the following conditions:
11 //
12 // The above copyright notice and this permission notice shall be included in
13 // all copies or substantial portions of the Software.
14 //
15 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 // SOFTWARE.
22 #ifndef CPP_INCLUDE_GPU_UTIL_H_
23 #define CPP_INCLUDE_GPU_UTIL_H_
24 
25 #ifdef NEED_CUDA
26 
27 #include <cuda_runtime_api.h>
28 #include <cuda_runtime.h>
29 #include <device_launch_parameters.h>
30 #include <cusolverDn.h>
31 
32 #include <iostream>
33 
34 namespace Nice {
35 
36 //
37 // Helper functions
38 //
39 void gpuAssert(cudaError_t, const char *, int, bool);
40 void gpuErrchk(cudaError_t);
41 
42 //
43 // Cusolver wraper functions
44 //
45 cusolverStatus_t GpuSvd(cusolverDnHandle_t solver_handle,
46  int M,
47  int N,
48  float * d_A,
49  float * d_S,
50  float * d_U,
51  float * d_V,
52  float * work,
53  int work_size,
54  int * devInfo);
55 
56 cusolverStatus_t GpuSvd(cusolverDnHandle_t solver_handle,
57  int M,
58  int N,
59  double * d_A,
60  double * d_S,
61  double * d_U,
62  double * d_V,
63  double * work,
64  int work_size,
65  int * devInfo);
66 
67 cusolverStatus_t GpuGetLUDecompWorkspace(cusolverDnHandle_t handle,
68  int m,
69  int n,
70  float *A,
71  int lda,
72  int *Lwork);
73 
74 cusolverStatus_t GpuGetLUDecompWorkspace(cusolverDnHandle_t handle,
75  int m,
76  int n,
77  double *A,
78  int lda,
79  int *Lwork);
80 
81 cusolverStatus_t GpuLUDecomposition(cusolverDnHandle_t handle,
82  int m,
83  int n,
84  float *A,
85  int lda,
86  float *Workspace,
87  int *devIpiv, int *devInfo);
88 
89 cusolverStatus_t GpuLUDecomposition(cusolverDnHandle_t handle,
90  int m,
91  int n,
92  double *A,
93  int lda,
94  double *Workspace,
95  int *devIpiv, int *devInfo);
96 
97 cusolverStatus_t GpuLinearSolver(cusolverDnHandle_t handle,
98  cublasOperation_t trans,
99  int n,
100  int nrhs,
101  const float *A,
102  int lda,
103  const int *devIpiv,
104  float *B,
105  int ldb,
106  int *devInfo);
107 
108 cusolverStatus_t GpuLinearSolver(cusolverDnHandle_t handle,
109  cublasOperation_t trans,
110  int n,
111  int nrhs,
112  const double *A,
113  int lda,
114  const int *devIpiv,
115  double *B,
116  int ldb,
117  int *devInfo);
118 
119 cusolverStatus_t GpuLuWorkspace(cusolverDnHandle_t handle,
120  int m,
121  int n,
122  float *a,
123  int *worksize);
124 
125 cusolverStatus_t GpuLuWorkspace(cusolverDnHandle_t handle,
126  int m,
127  int n,
128  double *a,
129  int *worksize);
130 
131 cusolverStatus_t GpuDeterminant(cusolverDnHandle_t handle,
132  int m,
133  int n,
134  float *a,
135  float *workspace,
136  int *devIpiv,
137  int *devInfo);
138 
139 cusolverStatus_t GpuDeterminant(cusolverDnHandle_t handle,
140  int m,
141  int n,
142  double *a,
143  double *workspace,
144  int *devIpiv,
145  int *devInfo);
146 
147 //
148 // Cublas wraper functions
149 //
150 cublasStatus_t GpuMatrixVectorMul(cublasHandle_t handle,
151  cublasOperation_t trans,
152  int m, int n,
153  const float *alpha,
154  const float *A, int lda,
155  const float *x, int incx,
156  const float *beta,
157  float *y, int incy);
158 
159 cublasStatus_t GpuMatrixVectorMul(cublasHandle_t handle,
160  cublasOperation_t trans,
161  int m, int n,
162  const double *alpha,
163  const double *A, int lda,
164  const double *x, int incx,
165  const double *beta,
166  double *y, int incy);
167 cublasStatus_t GpuMatrixScalarMul(cublasHandle_t handle,
168  int n,
169  const float &scalar,
170  float *a);
171 
172 cublasStatus_t GpuMatrixScalarMul(cublasHandle_t handle,
173  int n,
174  const double &scalar,
175  double *a);
176 
177 cublasStatus_t GpuMatrixMatrixMul(cublasHandle_t handle,
178  int m,
179  int n,
180  int k,
181  float *a,
182  float *b,
183  float *c);
184 
185 cublasStatus_t GpuMatrixMatrixMul(cublasHandle_t handle,
186  int m,
187  int n,
188  int k,
189  double *a,
190  double *b,
191  double *c);
192 
193 cublasStatus_t GpuMatrixAdd(cublasHandle_t handle,
194  int m,
195  int n,
196  const float *alpha,
197  const float *A, int lda,
198  const float *beta,
199  const float *B, int ldb,
200  float *C, int ldc);
201 
202 cublasStatus_t GpuMatrixAdd(cublasHandle_t handle,
203  int m,
204  int n,
205  const double *alpha,
206  const double *A, int lda,
207  const double *beta,
208  const double *B, int ldb,
209  double *C, int ldc);
210 
211 cublasStatus_t GpuMatrixMatrixSub(cublasHandle_t handle,
212  int m,
213  int n,
214  const float *alpha,
215  float *a, int lda,
216  const float *beta,
217  float *b, int ldb,
218  float *c, int ldc);
219 
220 cublasStatus_t GpuMatrixMatrixSub(cublasHandle_t handle,
221  int m,
222  int n,
223  const double *alpha,
224  double *a, int lda,
225  const double *beta,
226  double *b, int ldb,
227  double *c, int ldc);
228 
229 cublasStatus_t GpuVectorVectorDot(cublasHandle_t handle,
230  int n,
231  float *a,
232  float *b,
233  float *c);
234 cublasStatus_t GpuVectorVectorDot(cublasHandle_t handle,
235  int n,
236  double *a,
237  double *b,
238  double *c);
239 
240 cublasStatus_t GpuFrobeniusNorm(cublasHandle_t handle,
241  int n,
242  int incx,
243  float * a,
244  float * c);
245 
246 cublasStatus_t GpuFrobeniusNorm(cublasHandle_t handle,
247  int n,
248  int incx,
249  double * a,
250  double * c);
251 } // namespace Nice
252 
253 #endif // NEED_CUDA
254 #endif // CPP_INCLUDE_GPU_UTIL_H_
cublasStatus_t GpuMatrixScalarMul(cublasHandle_t handle, int n, const float &scalar, float *a)
void gpuErrchk(cudaError_t)
cublasStatus_t GpuFrobeniusNorm(cublasHandle_t handle, int n, int incx, float *a, float *c)
Definition: cpu_operations.h:36
cusolverStatus_t GpuLuWorkspace(cusolverDnHandle_t handle, int m, int n, float *a, int *worksize)
cublasStatus_t GpuVectorVectorDot(cublasHandle_t handle, int n, float *a, float *b, float *c)
void gpuAssert(cudaError_t, const char *, int, bool)
cusolverStatus_t GpuDeterminant(cusolverDnHandle_t handle, int m, int n, float *a, float *workspace, int *devIpiv, int *devInfo)
cusolverStatus_t GpuLinearSolver(cusolverDnHandle_t handle, cublasOperation_t trans, int n, int nrhs, const float *A, int lda, const int *devIpiv, float *B, int ldb, int *devInfo)
cublasStatus_t GpuMatrixMatrixMul(cublasHandle_t handle, int m, int n, int k, float *a, float *b, float *c)
cublasStatus_t GpuMatrixAdd(cublasHandle_t handle, int m, int n, const float *alpha, const float *A, int lda, const float *beta, const float *B, int ldb, float *C, int ldc)
cusolverStatus_t GpuGetLUDecompWorkspace(cusolverDnHandle_t handle, int m, int n, float *A, int lda, int *Lwork)
cublasStatus_t GpuMatrixMatrixSub(cublasHandle_t handle, int m, int n, const float *alpha, float *a, int lda, const float *beta, float *b, int ldb, float *c, int ldc)
cusolverStatus_t GpuSvd(cusolverDnHandle_t solver_handle, int M, int N, float *d_A, float *d_S, float *d_U, float *d_V, float *work, int work_size, int *devInfo)
cusolverStatus_t GpuLUDecomposition(cusolverDnHandle_t handle, int m, int n, float *A, int lda, float *Workspace, int *devIpiv, int *devInfo)
cublasStatus_t GpuMatrixVectorMul(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const float *alpha, const float *A, int lda, const float *x, int incx, const float *beta, float *y, int incy)