本文整理汇总了C++中cublasCreate函数的典型用法代码示例。如果您正苦于以下问题:C++ cublasCreate函数的具体用法?C++ cublasCreate怎么用?C++ cublasCreate使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了cublasCreate函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: cublas_handle_
Caffe::Caffe()
: cublas_handle_(NULL),cusparse_handle_(NULL),cusparse_descr_(NULL),curand_generator_(NULL),random_generator_(),mode_(Caffe::CPU), solver_count_(1), root_solver_(true){
// Try to create a cublas handler, and report an error if failed (but we will
// keep the program running as one might just want to run CPU code).
LOG(INFO)<<"caffe init.";
if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) {
LOG(ERROR) << "Cannot create Cublas handle. Cublas won't be available.";
}
//add cusparse handler
if (cusparseCreate(&cusparse_handle_)!=CUSPARSE_STATUS_SUCCESS){
LOG(ERROR) << "cannot create Cusparse handle,Cusparse won't be available.";
}
if(cusparseCreateMatDescr(&cusparse_descr_)!=CUSPARSE_STATUS_SUCCESS){
LOG(ERROR) << "cannot create Cusparse descr,descr won't be available.";
}else{
cusparseSetMatType(cusparse_descr_,CUSPARSE_MATRIX_TYPE_GENERAL);
cusparseSetMatIndexBase(cusparse_descr_,CUSPARSE_INDEX_BASE_ZERO);
LOG(INFO)<<"init descr";
}
// Try to create a curand handler.
if (curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)
!= CURAND_STATUS_SUCCESS ||
curandSetPseudoRandomGeneratorSeed(curand_generator_, cluster_seedgen())
!= CURAND_STATUS_SUCCESS) {
LOG(ERROR) << "Cannot create Curand generator. Curand won't be available.";
}
LOG(INFO)<<"caffe finish";
}
开发者ID:ZhouYuSong,项目名称:caffe-pruned,代码行数:28,代码来源:common.cpp
示例2: SwitchDevice
void Context::Init(int device_id) {
device_id_ = device_id;
SwitchDevice();
#if defined(USE_CUDA)
if (blas_handle_ == nullptr) {
CUBLAS_CHECK(cublasCreate((cublasHandle_t*)&blas_handle_));
CHECK_NOTNULL(blas_handle_);
}
#endif
#if defined(USE_CUDNN)
if (cudnn_handle_ == nullptr) {
CUDNN_CHECK(cudnnCreate((cudnnHandle_t*)&cudnn_handle_));
CHECK_NOTNULL(cudnn_handle_);
}
#endif
#if defined(USE_NNPACK)
if (nnpack_handle_ == nullptr) {
CHECK_EQ(nnp_initialize(), nnp_status_success);
nnpack_handle_ = pthreadpool_create(0);
CHECK_NOTNULL(nnpack_handle_);
}
#endif
}
开发者ID:junluan,项目名称:shadow,代码行数:27,代码来源:context.cpp
示例3: gpuSetUp
void gpuSetUp(const int maxBlocksPerKernel, const int n) {
debug("setting up cuBLAS");
if (cublasCreate(&g_cublasHandle) != CUBLAS_STATUS_SUCCESS) {
fatal("couldn't open cuBLAS handle");
}
cuSetUp(maxBlocksPerKernel, n);
}
开发者ID:zauberkraut,项目名称:acmi,代码行数:7,代码来源:linalg.c
示例4: micronn_read
micronn* micronn_read(FILE* file)
{
uint i, tmp;
cublasStatus_t stat;
micronn* net = malloc(sizeof(micronn));
stat = cublasCreate(&net->handle);
if(stat != CUBLAS_STATUS_SUCCESS) {
fprintf(stderr, "CUBLAS initialization failed\n");
return NULL;
}
fscanf(file, "micro neural network\n");
fscanf(file, "ninputs: %d\nnoutputs: %d\nnhidden %d\n", &net->nin, &net->nout, &net->nhidden);
fscanf(file, "chidden:");
net->weights = malloc(sizeof(micronn_matrix*) * (net->nhidden + 1));
net->chidden = malloc(sizeof(uint) * net->nhidden);
for(i = 0; i < net->nhidden; i++) {
fscanf(file, " %d", &net->chidden[i]);
}
fscanf(file, "\n");
for(i = 0; i <= net->nhidden; i++) {
fscanf(file, "weight %d:\n", &tmp);
net->weights[i] = micronn_matrix_read(file);
}
return net;
};
开发者ID:microo8,项目名称:micronn,代码行数:30,代码来源:micronn.c
示例5: blasx_resource_init
void blasx_resource_init(int GPUs, cublasHandle_t* handles, cudaStream_t* streams, cudaEvent_t* events, void** C_dev, int floatType_id) {
if(floatType_id == 0) C_dev = (float**) C_dev;
else if(floatType_id == 1) C_dev = (double**) C_dev;
else C_dev = (cuDoubleComplex**) C_dev;
int GPU_id = 0;
for (GPU_id = 0; GPU_id < GPUs; GPU_id++) {
assert( cudaSetDevice(GPU_id) == cudaSuccess );
//create handles
assert( cublasCreate(&handles[GPU_id]) == CUBLAS_STATUS_SUCCESS);
//create streams and event
int i = 0;
for (i = 0 ; i < STREAMNUM; i++) {
assert( cudaStreamCreate(&streams[i+GPU_id*STREAMNUM]) == cudaSuccess );
assert( cudaEventCreateWithFlags(&events[i+GPU_id*STREAMNUM], cudaEventDisableTiming) == cudaSuccess );
}
//create C_dev
for (i = 0; i < STREAMNUM*2; i++) {
if (floatType_id == 0) {
assert( cudaMalloc((void**)&C_dev[i+GPU_id*STREAMNUM*2], sizeof(float)*BLOCKSIZE_SGEMM*BLOCKSIZE_SGEMM) == cudaSuccess );
}else if (floatType_id == 1) {
assert( cudaMalloc((void**)&C_dev[i+GPU_id*STREAMNUM*2], sizeof(double)*BLOCKSIZE_DGEMM*BLOCKSIZE_DGEMM) == cudaSuccess );
} else {
assert( cudaMalloc((void**)&C_dev[i+GPU_id*STREAMNUM*2], sizeof(cuDoubleComplex)*BLOCKSIZE_ZGEMM*BLOCKSIZE_ZGEMM) == cudaSuccess );
}
}
}
}
开发者ID:529038378,项目名称:BLASX,代码行数:27,代码来源:blasx_internal.c
示例6: gpu_cublas1
void gpu_cublas1(double *A, double *B, double *C, double *D, double *r, double *nrmC, int N, int N2)
{
#pragma acc data present(A, B, C, D)
{
#pragma acc host_data use_device(A, B, C, D)
{
cublasHandle_t handle;
cublasCreate(&handle);
const double alpha = 1.0;
const double beta = 0.0;
cublasDgemm(handle, CUBLAS_OP_T, CUBLAS_OP_T, N, N, N, &alpha, A, N, B, N, &beta, C, N);
printf(" gpu gemm success \n");
cublasDdot(handle, N2, C, 1, B, 1, r);
printf(" gpu dot success \n");
*r = -1.0 * *r;
cublasDaxpy(handle, N2, r, B, 1, C, 1);
printf(" gpu axpy success \n");
cublasDnrm2(handle, N2, C, 1, nrmC);
printf(" gpu nrm2 success \n");
cublasDcopy(handle, N2, C, 1, D, 1);
printf(" gpu copy success \n");
*nrmC = 1.0 / *nrmC;
cublasDscal(handle, N2, nrmC, D, 1);
printf(" gpu scal success \n");
cublasDestroy(handle);
printf(" gpu destroy success \n");
}
}
}
开发者ID:xflying777,项目名称:OpenAcc,代码行数:29,代码来源:cublas_level_1.cpp
示例7: CUDA_CHECK
void Caffe::SetDevice(const int device_id) {
int current_device;
CUDA_CHECK(cudaGetDevice(¤t_device));
if (current_device == device_id) {
return;
}
// The call to cudaSetDevice must come before any calls to Get, which
// may perform initialization using the GPU.
CUDA_CHECK(cudaSetDevice(device_id));
if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_));
if (Get().cusparse_descr_)CUSPARSE_CHECK(cusparseDestroyMatDescr(Get().cusparse_descr_));
if (Get().cusparse_handle_)CUSPARSE_CHECK(cusparseDestroy(Get().cusparse_handle_));
if (Get().curand_generator_) {
CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_));
}
CUSPARSE_CHECK(cusparseCreate(&Get().cusparse_handle_));
CUSPARSE_CHECK(cusparseCreateMatDescr(&Get().cusparse_descr_));
// cusparseSetMatType(cusparse_descr_,CUSPARSE_MATRIX_TYPE_GENERAL);
// cusparseSetMatIndexBase(cusparse_descr_,CUSPARSE_INDEX_BASE_ZERO);
LOG(INFO)<<"set descr";
CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_));
CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_,
CURAND_RNG_PSEUDO_DEFAULT));
CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_,
cluster_seedgen()));
}
开发者ID:ZhouYuSong,项目名称:caffe-pruned,代码行数:26,代码来源:common.cpp
示例8: mpla_init_instance_block_rows
void mpla_init_instance_block_rows(struct mpla_instance* instance, MPI_Comm comm)
{
instance->comm = comm;
// get number of process
MPI_Comm_size(comm, &(instance->proc_count));
// find number of current process
MPI_Comm_rank(comm, &(instance->cur_proc_rank));
if (instance->cur_proc_rank==0)
instance->is_parent = true;
else
instance->is_parent = false;
// compute the process grid, enforcing only a parallelization over rows
int dims[2];
dims[0]=instance->proc_count;
dims[1]=1;
MPI_Dims_create(instance->proc_count, 2, dims);
instance->proc_rows = dims[0];
instance->proc_cols = dims[1];
// create cartesian communicator and retrieve cartesian coordinates
int periods[2];
periods[0]=periods[1]=0;
MPI_Cart_create(comm, 2, dims, periods, 0, &(instance->comm));
int cur_proc_coord[2];
MPI_Cart_get(instance->comm, 2, dims, periods, cur_proc_coord);
instance->cur_proc_row = cur_proc_coord[0];
instance->cur_proc_col = cur_proc_coord[1];;
cublasCreate(&(instance->cublas_handle));
}
开发者ID:zaspel,项目名称:MPLA,代码行数:34,代码来源:mpla.cpp
示例9: GPUsgemv
void GPUsgemv(int gpuInner, int Md,int Nd,int Kd,float* Adevice,float *Bdevice,float *Cdevice,float *Ahost,float *Bhost,float *Chost, cudaStream_t *stream) {
cudaError_t error;
int memSizeA = sizeof(float)*Md*Nd;
int memSizeB = sizeof(float)*Nd;
int memSizeC = sizeof(float)*Md;
error = cudaMemcpyAsync(Adevice,Ahost,memSizeA,cudaMemcpyHostToDevice,*stream); if (error != cudaSuccess){printf("cudaMemcpy A returned error code %d, line(%d)\n", error, __LINE__);exit(EXIT_FAILURE);}
error = cudaMemcpyAsync(Bdevice,Bhost,memSizeB,cudaMemcpyHostToDevice,*stream); if (error != cudaSuccess){printf("cudaMemcpy B returned error code %d, line(%d)\n", error, __LINE__);exit(EXIT_FAILURE);}
// setup execution parameters
dim3 threads(block_size,block_size);
dim3 grid(Nd/threads.x,Md/threads.y);
// inside CUBLAS
cublasHandle_t handle;
cublasStatus_t ret;
ret = cublasCreate(&handle); if (ret != CUBLAS_STATUS_SUCCESS){printf("cublasCreate returned error code %d, line(%d)\n", ret, __LINE__);exit(EXIT_FAILURE);}
const float alpha = 1.0f;
const float beta = 0.0f;
cublasSetStream(handle,*stream);
for (int i = 0; i < gpuInner; i++) {
ret = cublasSgemv(handle, CUBLAS_OP_N, Md, Nd, &alpha, Adevice, Md, Bdevice, 1, &beta, Cdevice, 1);
if (ret != CUBLAS_STATUS_SUCCESS) {
printf("cublasSgemm returned error code %d, line(%d)\n", ret, __LINE__);
exit(EXIT_FAILURE);
}
}
// done CUBLAS
// copy result back to host
error = cudaMemcpyAsync(Chost,Cdevice,memSizeC,cudaMemcpyDeviceToHost,*stream);
// printf("GPU Iter queued\n");
}
开发者ID:agearh,项目名称:dissertation,代码行数:33,代码来源:sgemv_hetero_blas.cpp
示例10: contractTT
void contractTT(sTensorGPU *TT1, sTensorGPU *TT2, const int n, const int size)
{
cublasHandle_t handle;
cublasCreate(&handle);
type result=0;
sTensorGPU temp1 = emptyTensor(size*size,2);
sTensorGPU temp2 = emptyTensor(size*size*2,3);
cudaEvent_t start;
cudaEventCreate(&start);
cudaEvent_t stop;
cudaEventCreate(&stop);
//printf("Start contractTT\n");
cudaEventRecord(start, NULL);
int indA = TT1[0].size[0];
int indB = TT2[0].size[0];
sTensorCPU tt1start = copyToCPU(TT1[0]);
sTensorCPU tt2start = copyToCPU(TT2[0]);
sTensorCPU tt1end = copyToCPU(TT1[n - 1]);
sTensorCPU tt2end = copyToCPU( TT2[n - 1]);
for (int i = 0; i < indA; i++){
TT1[0] = prepareTensorStart(tt1start, i);
TT1[n - 1] = prepareTensorEnd(tt1end, i);
for (int j = 0; j < indB; j++){
TT2[0] = prepareTensorStart(tt2start, j);
TT2[n - 1] = prepareTensorEnd(tt2end, j);
contractTensor(handle, TT1[0], TT2[0], temp1);
for (int i = 1; i < n; i++){
contractTensor(handle, temp1, TT1[i], temp2);
contractTensor(handle, temp2, TT2[i], temp1, 2);
}
type add = 0;
cudaMemcpy(&add, temp1.deviceData, sizeof(type), cudaMemcpyDeviceToHost);
//printf("%e ", add);
result += add;
}
}
cudaEventRecord(stop, NULL);
cudaEventSynchronize(stop);
float msecTotal = 0.0f;
cudaEventElapsedTime(&msecTotal, start, stop);
printf("Time: %.3fms\n", msecTotal);
printf("Ops: %.0f\n", bops);
double gigaFlops = (bops * 1.0e-9f) / (msecTotal / 1000.0f);
printf("Perf= %.2f GFlop/s\n", gigaFlops);
cublasDestroy(handle);
cudaDeviceReset();
printf("%.5e \n", result);
exit(0);
}
开发者ID:thomas-hoer,项目名称:cuTT,代码行数:58,代码来源:bigSizeTensors.cpp
示例11: cublasCreate
cublasHandle_t CudaUtil::cublasInit()
{
cublasHandle_t handle;
cublasStatus_t status = cublasCreate(&handle);
if (status != CUBLAS_STATUS_SUCCESS) {
throw CudaException("CUBALS initialisation error");
}
return handle;
}
开发者ID:onedigit,项目名称:org.onedigit.cuda.cpp,代码行数:9,代码来源:CudaUtil.cpp
示例12: device
GpuDevice::Impl::Impl(int d) : device(d) {
ActivateDevice();
for (size_t i = 0; i < kParallelism; ++i) {
CUDA_CALL(cudaStreamCreate(&stream[i]));
CUBLAS_CALL(cublasCreate(&cublas_handle[i]));
CUBLAS_CALL(cublasSetStream(cublas_handle[i], stream[i]));
CUDNN_CALL(cudnnCreate(&cudnn_handle[i]));
CUDNN_CALL(cudnnSetStream(cudnn_handle[i], stream[i]));
}
}
开发者ID:AI42,项目名称:minerva,代码行数:10,代码来源:device.cpp
示例13: initCublasHandle
void initCublasHandle(cublasHandle_t* handle)
{
cublasStatus_t stat;
stat = cublasCreate(handle);
if (stat != CUBLAS_STATUS_SUCCESS)
{
printf("CUBLAS INITIALIZATION FAILED");
}
}
开发者ID:Tifuera,项目名称:linear_eq_symm_band_matrix,代码行数:10,代码来源:gpu_util.cpp
示例14: mode_
Caffe::Caffe()
: mode_(Caffe::CPU), phase_(Caffe::TRAIN), cublas_handle_(NULL),
curand_generator_(NULL), vsl_stream_(NULL)
{
CUBLAS_CHECK(cublasCreate(&cublas_handle_));
//TODO: original caffe code has bug here!
CURAND_CHECK(curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT));
CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(curand_generator_, 1701ULL));
VSL_CHECK(vslNewStream(&vsl_stream_, VSL_BRNG_MT19937, 1701));
}
开发者ID:xiaobinxu,项目名称:recaffe,代码行数:10,代码来源:common.cpp
示例15: init
/*init cublas Handle*/
bool init(cublasHandle_t& handle)
{
cublasStatus_t stat;
stat = cublasCreate(&handle);
if(stat != CUBLAS_STATUS_SUCCESS) {
printf ("init: CUBLAS initialization failed\n");
exit(0);
}
return true;
}
开发者ID:Donkey-Sand,项目名称:CUDA-CNN,代码行数:11,代码来源:main.cpp
示例16: blas_handle
cublasHandle_t blas_handle()
{
static int init = 0;
static cublasHandle_t handle;
if(!init) {
cublasCreate(&handle);
init = 1;
}
return handle;
}
开发者ID:isuker,项目名称:darknet,代码行数:10,代码来源:cuda.c
示例17: blas_handle
cublasHandle_t blas_handle()
{
static int init[16] = {0};
static cublasHandle_t handle[16];
int i = cuda_get_device();
if(!init[i]) {
cublasCreate(&handle[i]);
init[i] = 1;
}
return handle[i];
}
开发者ID:KaiqiZhang,项目名称:darknet,代码行数:11,代码来源:cuda.c
示例18: checkCudaErrors
void
CudaInterface::initialize() {
mDevID = 0;
checkCudaErrors(cudaSetDevice(mDevID));
checkCudaErrors(cudaGetDevice(&mDevID));
checkCudaErrors(cudaGetDeviceProperties(&mDeviceProperty, mDevID));
checkCudaErrors(cublasCreate(&mCublasHandle));
printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", mDevID, mDeviceProperty.name, mDeviceProperty.major, mDeviceProperty.minor);
// needs a larger block size for Fermi and above
int block_size = (mDeviceProperty.major < 2) ? 16 : 32;
}
开发者ID:ethancaballero,项目名称:Multiplicative-Recursive-Neural-Net,代码行数:12,代码来源:host-device_interface.cpp
示例19: norm_gpu
void norm_gpu(double *x, double *norm, int N)
{
#pragma acc data present(x)
{
#pragma acc host_data use_device(x)
{
cublasHandle_t h;
cublasCreate(&h);
cublasDnrm2(h, N, x, 1, norm);
cublasDestroy(h);
}
}
}
开发者ID:xflying777,项目名称:OpenAcc,代码行数:13,代码来源:gmres_part.cpp
示例20: dot_gpu
void dot_gpu(double *x, double *y, double *result, int N)
{
#pragma acc data present(x, y)
{
#pragma acc host_data use_device(x, y)
{
cublasHandle_t h;
cublasCreate(&h);
cublasDdot(h, N, x, 1, y, 1, result);
cublasDestroy(h);
}
}
}
开发者ID:xflying777,项目名称:OpenAcc,代码行数:13,代码来源:gmres_part.cpp
注:本文中的cublasCreate函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论