本文整理汇总了C++中cusparseDestroy函数的典型用法代码示例。如果您正苦于以下问题:C++ cusparseDestroy函数的具体用法?C++ cusparseDestroy怎么用?C++ cusparseDestroy使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了cusparseDestroy函数的19个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: CUDA_CHECK
void Caffe::SetDevice(const int device_id) {
int current_device;
CUDA_CHECK(cudaGetDevice(¤t_device));
if (current_device == device_id) {
return;
}
// The call to cudaSetDevice must come before any calls to Get, which
// may perform initialization using the GPU.
CUDA_CHECK(cudaSetDevice(device_id));
if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_));
if (Get().cusparse_descr_)CUSPARSE_CHECK(cusparseDestroyMatDescr(Get().cusparse_descr_));
if (Get().cusparse_handle_)CUSPARSE_CHECK(cusparseDestroy(Get().cusparse_handle_));
if (Get().curand_generator_) {
CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_));
}
CUSPARSE_CHECK(cusparseCreate(&Get().cusparse_handle_));
CUSPARSE_CHECK(cusparseCreateMatDescr(&Get().cusparse_descr_));
// cusparseSetMatType(cusparse_descr_,CUSPARSE_MATRIX_TYPE_GENERAL);
// cusparseSetMatIndexBase(cusparse_descr_,CUSPARSE_INDEX_BASE_ZERO);
LOG(INFO)<<"set descr";
CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_));
CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_,
CURAND_RNG_PSEUDO_DEFAULT));
CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_,
cluster_seedgen()));
}
开发者ID:ZhouYuSong,项目名称:caffe-pruned,代码行数:26,代码来源:common.cpp
示例2: cusparseDestroy
TxMatrixOptimizationDataCU::~TxMatrixOptimizationDataCU() {
if (handle) {
cusparseDestroy(handle);
handle = 0;
}
if (matDescr) {
cusparseDestroyMatDescr(matDescr);
matDescr = 0;
}
if (localMatrix) {
cusparseDestroyHybMat(localMatrix);
localMatrix = 0;
}
if (gsContext) {
cugelusDestroySorIterationData(gsContext);
gsContext = 0;
}
if (f2c) {
CHKCUDAERR(cudaFree(f2c));
}
if (workvector) {
CHKCUDAERR(cudaFree(workvector));
}
#ifndef HPCG_NOMPI
CHKCUDAERR(cudaFree(elementsToSend));
CHKCUDAERR(cudaFree(sendBuffer_d));
#endif
}
开发者ID:NobodyInAmerica,项目名称:libTxHPCG,代码行数:28,代码来源:TxMatrixOptimizationDataCU.cpp
示例3: cublasDestroy
cuda_running_configuration::~cuda_running_configuration()
{
if (cublas_handle)
cublasDestroy(cublas_handle);
if (cusparse_handle)
cusparseDestroy(cusparse_handle);
cudaDeviceReset();
}
开发者ID:yzxyzh,项目名称:nnForge,代码行数:8,代码来源:cuda_running_configuration.cpp
示例4:
Caffe::~Caffe() {
if (cusparse_descr_) CUSPARSE_CHECK(cusparseDestroyMatDescr(cusparse_descr_));
if (cublas_handle_) CUBLAS_CHECK(cublasDestroy(cublas_handle_));
if (cusparse_handle_) CUSPARSE_CHECK(cusparseDestroy(cusparse_handle_));
if (curand_generator_) {
CURAND_CHECK(curandDestroyGenerator(curand_generator_));
}
}
开发者ID:ZhouYuSong,项目名称:caffe-pruned,代码行数:8,代码来源:common.cpp
示例5: magma_dapplycuicc_l
magma_int_t
magma_dapplycuicc_l( magma_d_vector b, magma_d_vector *x,
magma_d_preconditioner *precond ){
double one = MAGMA_D_MAKE( 1.0, 0.0);
// CUSPARSE context //
cusparseHandle_t cusparseHandle;
cusparseStatus_t cusparseStatus;
cusparseStatus = cusparseCreate(&cusparseHandle);
if(cusparseStatus != 0) printf("error in Handle.\n");
cusparseMatDescr_t descrL;
cusparseStatus = cusparseCreateMatDescr(&descrL);
if(cusparseStatus != 0) printf("error in MatrDescr.\n");
cusparseStatus =
cusparseSetMatType(descrL,CUSPARSE_MATRIX_TYPE_TRIANGULAR);
if(cusparseStatus != 0) printf("error in MatrType.\n");
cusparseStatus =
cusparseSetMatDiagType (descrL, CUSPARSE_DIAG_TYPE_NON_UNIT);
if(cusparseStatus != 0) printf("error in DiagType.\n");
cusparseStatus =
cusparseSetMatFillMode(descrL,CUSPARSE_FILL_MODE_LOWER);
if(cusparseStatus != 0) printf("error in fillmode.\n");
cusparseStatus =
cusparseSetMatIndexBase(descrL,CUSPARSE_INDEX_BASE_ZERO);
if(cusparseStatus != 0) printf("error in IndexBase.\n");
// end CUSPARSE context //
cusparseStatus =
cusparseDcsrsv_solve( cusparseHandle,
CUSPARSE_OPERATION_NON_TRANSPOSE,
precond->M.num_rows, &one,
descrL,
precond->M.val,
precond->M.row,
precond->M.col,
precond->cuinfoL,
b.val,
x->val );
if(cusparseStatus != 0) printf("error in L triangular solve:%p.\n", precond->cuinfoL );
cusparseDestroyMatDescr( descrL );
cusparseDestroy( cusparseHandle );
magma_device_sync();
return MAGMA_SUCCESS;
}
开发者ID:XapaJIaMnu,项目名称:magma,代码行数:58,代码来源:dcuilu.cpp
示例6: ssp_finalize_cuda
// finalize CUDA
void ssp_finalize_cuda(ssp_cuda *cudaHandle) {
if (!cudaHandle)
return;
if (cudaHandle->cusparse_handle)
cusparseDestroy(cudaHandle->cusparse_handle);
if (cudaHandle->cusparse_matDescr)
cusparseDestroyMatDescr(cudaHandle->cusparse_matDescr);
free(cudaHandle);
cudaDeviceReset();
}
开发者ID:nefan,项目名称:ssparse,代码行数:14,代码来源:ssp_cuda.cpp
示例7: cudaFree
CUDAManager::~CUDAManager()
{
if(m_tempBuffer)
cudaFree(m_tempBuffer);
cudaFree(m_tempRetBuffer);
#ifdef USE_CUSPARSE
if(cusparseHandle) cusparseDestroy(cusparseHandle);
#endif
if(cublasHandle) cublasDestroy(cublasHandle);
cudaDeviceReset();
cout << "Cleaned up CUDA." << endl;
}
开发者ID:bsumirak,项目名称:ugcore,代码行数:13,代码来源:cuda_manager.cpp
示例8: THCudaShutdown
void THCudaShutdown(THCState* state)
{
THCRandom_shutdown(state);
free(state->rngState);
free(state->deviceProperties);
int deviceCount = 0;
int prevDev = -1;
THCudaCheck(cudaGetDevice(&prevDev));
THCudaCheck(cudaGetDeviceCount(&deviceCount));
/* cleanup p2p access state */
for (int dev = 0; dev < deviceCount; ++dev) {
free(state->p2pAccessEnabled[dev]);
}
free(state->p2pAccessEnabled);
/* cleanup per-device state */
for (int dev = 0; dev < deviceCount; ++dev) {
THCudaCheck(cudaSetDevice(dev));
THCCudaResourcesPerDevice* res = &(state->resourcesPerDevice[dev]);
/* Free user defined BLAS handles */
for (int i = 0; i < res->numBlasHandles; ++i) {
THCublasCheck(cublasDestroy(res->blasHandles[i]));
}
/* Free user defined sparse handles */
for (int i = 0; i < res->numSparseHandles; ++i) {
THCusparseCheck(cusparseDestroy(res->sparseHandles[i]));
}
free(res->blasHandles);
free(res->sparseHandles);
THCStream_free((THCStream*)THCThreadLocal_get(state->currentStreams[dev]));
THCThreadLocal_free(state->currentStreams[dev]);
}
free(state->resourcesPerDevice);
if (state->cudaDeviceAllocator->emptyCache) {
state->cudaDeviceAllocator->emptyCache(state->cudaDeviceAllocator->state);
}
if (state->cudaHostAllocator == &THCCachingHostAllocator) {
THCCachingHostAllocator_emptyCache();
}
free(state->currentStreams);
THCThreadLocal_free(state->currentPerDeviceBlasHandle);
THCudaCheck(cudaSetDevice(prevDev));
}
开发者ID:HustlehardInc,项目名称:pytorch,代码行数:48,代码来源:THCGeneral.cpp
示例9: magma_capplycumicc_l
extern "C" magma_int_t
magma_capplycumicc_l(
magma_c_matrix b,
magma_c_matrix *x,
magma_c_preconditioner *precond,
magma_queue_t queue )
{
magma_int_t info = 0;
cusparseHandle_t cusparseHandle=NULL;
cusparseMatDescr_t descrL=NULL;
magmaFloatComplex one = MAGMA_C_MAKE( 1.0, 0.0);
// CUSPARSE context //
CHECK_CUSPARSE( cusparseCreate( &cusparseHandle ));
CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue ));
CHECK_CUSPARSE( cusparseCreateMatDescr( &descrL ));
CHECK_CUSPARSE( cusparseSetMatType( descrL, CUSPARSE_MATRIX_TYPE_TRIANGULAR ));
CHECK_CUSPARSE( cusparseSetMatDiagType( descrL, CUSPARSE_DIAG_TYPE_NON_UNIT ));
CHECK_CUSPARSE( cusparseSetMatFillMode( descrL, CUSPARSE_FILL_MODE_LOWER ));
CHECK_CUSPARSE( cusparseSetMatIndexBase( descrL, CUSPARSE_INDEX_BASE_ZERO ));
CHECK_CUSPARSE( cusparseCcsrsm_solve( cusparseHandle,
CUSPARSE_OPERATION_NON_TRANSPOSE,
precond->M.num_rows,
b.num_rows*b.num_cols/precond->M.num_rows,
&one,
descrL,
precond->M.dval,
precond->M.drow,
precond->M.dcol,
precond->cuinfoL,
b.dval,
precond->M.num_rows,
x->dval,
precond->M.num_rows ));
magma_device_sync();
cleanup:
cusparseDestroyMatDescr( descrL );
cusparseDestroy( cusparseHandle );
return info;
}
开发者ID:cjy7117,项目名称:FT-MAGMA,代码行数:44,代码来源:ccumilu.cpp
示例10: main
//.........这里部分代码省略.........
cusparseMatDescr_t descr = 0;
cusparseStatus = cusparseCreateMatDescr(&descr);
checkCudaErrors(cusparseStatus);
cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL);
cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO);
// temp memory for CG
checkCudaErrors(cudaMallocManaged((void **)&r, N*sizeof(float)));
checkCudaErrors(cudaMallocManaged((void **)&p, N*sizeof(float)));
checkCudaErrors(cudaMallocManaged((void **)&Ax, N*sizeof(float)));
cudaDeviceSynchronize();
for (int i=0; i < N; i++)
{
r[i] = rhs[i];
}
alpha = 1.0;
alpham1 = -1.0;
beta = 0.0;
r0 = 0.;
cusparseScsrmv(cusparseHandle,CUSPARSE_OPERATION_NON_TRANSPOSE, N, N, nz, &alpha, descr, val, I, J, x, &beta, Ax);
cublasSaxpy(cublasHandle, N, &alpham1, Ax, 1, r, 1);
cublasStatus = cublasSdot(cublasHandle, N, r, 1, r, 1, &r1);
k = 1;
while (r1 > tol*tol && k <= max_iter)
{
if (k > 1)
{
b = r1 / r0;
cublasStatus = cublasSscal(cublasHandle, N, &b, p, 1);
cublasStatus = cublasSaxpy(cublasHandle, N, &alpha, r, 1, p, 1);
}
else
{
cublasStatus = cublasScopy(cublasHandle, N, r, 1, p, 1);
}
cusparseScsrmv(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, N, N, nz, &alpha, descr, val, I, J, p, &beta, Ax);
cublasStatus = cublasSdot(cublasHandle, N, p, 1, Ax, 1, &dot);
a = r1 / dot;
cublasStatus = cublasSaxpy(cublasHandle, N, &a, p, 1, x, 1);
na = -a;
cublasStatus = cublasSaxpy(cublasHandle, N, &na, Ax, 1, r, 1);
r0 = r1;
cublasStatus = cublasSdot(cublasHandle, N, r, 1, r, 1, &r1);
cudaThreadSynchronize();
printf("iteration = %3d, residual = %e\n", k, sqrt(r1));
k++;
}
printf("Final residual: %e\n",sqrt(r1));
fprintf(stdout,"&&&& uvm_cg test %s\n", (sqrt(r1) < tol) ? "PASSED" : "FAILED");
float rsum, diff, err = 0.0;
for (int i = 0; i < N; i++)
{
rsum = 0.0;
for (int j = I[i]; j < I[i+1]; j++)
{
rsum += val[j]*x[J[j]];
}
diff = fabs(rsum - rhs[i]);
if (diff > err)
{
err = diff;
}
}
cusparseDestroy(cusparseHandle);
cublasDestroy(cublasHandle);
cudaFree(I);
cudaFree(J);
cudaFree(val);
cudaFree(x);
cudaFree(r);
cudaFree(p);
cudaFree(Ax);
cudaDeviceReset();
printf("Test Summary: Error amount = %f, result = %s\n", err, (k <= max_iter) ? "SUCCESS" : "FAILURE");
exit((k <= max_iter) ? EXIT_SUCCESS : EXIT_FAILURE);
}
开发者ID:ziyuhe,项目名称:cuda_project,代码行数:101,代码来源:main.cpp
示例11: _tmain
//.........这里部分代码省略.........
deviceProp.multiProcessorCount, deviceProp.major, deviceProp.minor);
int version = (deviceProp.major * 0x10 + deviceProp.minor);
if (version < 0x11)
{
printf("Requires a minimum CUDA compute 1.1 capability\n");
cudaDeviceReset();
exit(EXIT_SUCCESS);
}
M = N = 8388608; //2 ^ 23
//M = N = 4194304; //2 ^ 22
//M = N = 2097152; //2 ^ 21
//M = N = 1048576; //2 ^ 20
//M = N = 524288; //2 ^ 19
nz = N * 8;
I = (int *)malloc(sizeof(int)*(N + 1));
J = (int *)malloc(sizeof(int)*nz);
val = (cuDoubleComplex *)malloc(sizeof(cuDoubleComplex)*nz);
genTridiag(I, J, val, N, nz);
x = (cuDoubleComplex*)malloc(sizeof(cuDoubleComplex)* N);
y = (cuDoubleComplex*)malloc(sizeof(cuDoubleComplex)* N);
//create an array for the answer array (Y) and set all of the answers to 0 for the test (could do random)
for (int i = 0; i < N; i++)
{
y[i] = make_cuDoubleComplex(0.0, 0.0);
}
//Get handle to the CUBLAS context
cublasHandle_t cublasHandle = 0;
cublasStatus_t cublasStatus;
cublasStatus = cublasCreate(&cublasHandle);
checkCudaErrors(cublasStatus);
//Get handle to the CUSPARSE context
cusparseHandle_t cusparseHandle = 0;
cusparseStatus_t cusparseStatus;
cusparseStatus = cusparseCreate(&cusparseHandle);
checkCudaErrors(cusparseStatus);
//Get handle to a CUSPARSE matrix descriptor
cusparseMatDescr_t descr = 0;
cusparseStatus = cusparseCreateMatDescr(&descr);
checkCudaErrors(cusparseStatus);
//Get handle to a matrix_solve_info object
cusparseSolveAnalysisInfo_t info = 0;
cusparseStatus = cusparseCreateSolveAnalysisInfo(&info);
checkCudaErrors(cusparseStatus);
cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL);
cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO);
duration_setup = (std::clock() - setup_clock) / (double)CLOCKS_PER_SEC;
printf("setup_time: %f\r\n", duration_setup);
std::clock_t start;
start = std::clock();
checkCudaErrors(cudaMalloc((void **)&d_x, N*sizeof(float)));
checkCudaErrors(cudaMalloc((void **)&d_y, N*sizeof(float)));
cudaMemcpy(d_x, x, N*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(d_y, y, N*sizeof(float), cudaMemcpyHostToDevice);
//Analyze the matrix. The info variable is needed to perform additional operations on the matrix
cusparseStatus = cusparseZcsrsv_analysis(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, N, nz, descr, val, J, I, info);
//Uses infor gathered from the matrix to solve the matrix.
cusparseStatus = cusparseZcsrsv_solve(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, N, 0, descr, val, J, I, info, d_x, d_y);
//Get the result back from the device
cudaMemcpy(x, d_x, N*sizeof(float), cudaMemcpyDeviceToHost);
cudaMemcpy(y, d_y, N*sizeof(float), cudaMemcpyDeviceToHost);
duration = (std::clock() - start) / (double)CLOCKS_PER_SEC;
printf("time ellapsed: %f", duration);
//free up memory
cusparseDestroy(cusparseHandle);
cublasDestroy(cublasHandle);
free(I);
free(J);
free(val);
free(x);
cudaFree(d_x);
cudaDeviceReset();
//Wait for user input so they can see the results
char* s = (char*)malloc(sizeof(char) * 8);
scanf(s);
exit(0);
}
开发者ID:davidhauck,项目名称:MatrixSolver,代码行数:101,代码来源:CudaTest.cpp
示例12: magma_d_spmv
//.........这里部分代码省略.........
//printf("done.\n");
}
else if ( A.storage_type == Magma_DENSE ) {
//printf("using DENSE kernel for SpMV: ");
magmablas_dgemv( MagmaNoTrans, A.num_rows, A.num_cols, alpha,
A.dval, A.num_rows, x.dval, 1, beta, y.dval,
1, queue );
//printf("done.\n");
}
else if ( A.storage_type == Magma_SPMVFUNCTION ) {
//printf("using DENSE kernel for SpMV: ");
CHECK( magma_dcustomspmv( alpha, x, beta, y, queue ));
//printf("done.\n");
}
else if ( A.storage_type == Magma_BCSR ) {
//printf("using CUSPARSE BCSR kernel for SpMV: ");
// CUSPARSE context //
cusparseDirection_t dirA = CUSPARSE_DIRECTION_ROW;
int mb = magma_ceildiv( A.num_rows, A.blocksize );
int nb = magma_ceildiv( A.num_cols, A.blocksize );
CHECK_CUSPARSE( cusparseCreate( &cusparseHandle ));
CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() ));
CHECK_CUSPARSE( cusparseCreateMatDescr( &descr ));
cusparseDbsrmv( cusparseHandle, dirA,
CUSPARSE_OPERATION_NON_TRANSPOSE, mb, nb, A.numblocks,
&alpha, descr, A.dval, A.drow, A.dcol, A.blocksize, x.dval,
&beta, y.dval );
}
else {
printf("error: format not supported.\n");
info = MAGMA_ERR_NOT_SUPPORTED;
}
}
else if ( A.num_cols < x.num_rows || x.num_cols > 1 ) {
magma_int_t num_vecs = x.num_rows / A.num_cols * x.num_cols;
if ( A.storage_type == Magma_CSR ) {
CHECK_CUSPARSE( cusparseCreate( &cusparseHandle ));
CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() ));
CHECK_CUSPARSE( cusparseCreateMatDescr( &descr ));
CHECK_CUSPARSE( cusparseSetMatType( descr, CUSPARSE_MATRIX_TYPE_GENERAL ));
CHECK_CUSPARSE( cusparseSetMatIndexBase( descr, CUSPARSE_INDEX_BASE_ZERO ));
if ( x.major == MagmaColMajor) {
cusparseDcsrmm(cusparseHandle,
CUSPARSE_OPERATION_NON_TRANSPOSE,
A.num_rows, num_vecs, A.num_cols, A.nnz,
&alpha, descr, A.dval, A.drow, A.dcol,
x.dval, A.num_cols, &beta, y.dval, A.num_cols);
} else if ( x.major == MagmaRowMajor) {
/*cusparseDcsrmm2(cusparseHandle,
CUSPARSE_OPERATION_NON_TRANSPOSE,
CUSPARSE_OPERATION_TRANSPOSE,
A.num_rows, num_vecs, A.num_cols, A.nnz,
&alpha, descr, A.dval, A.drow, A.dcol,
x.dval, A.num_cols, &beta, y.dval, A.num_cols);
*/
}
} else if ( A.storage_type == Magma_SELLP ) {
if ( x.major == MagmaRowMajor) {
CHECK( magma_dmgesellpmv( MagmaNoTrans, A.num_rows, A.num_cols,
num_vecs, A.blocksize, A.numblocks, A.alignment,
alpha, A.dval, A.dcol, A.drow, x.dval, beta, y.dval, queue ));
}
else if ( x.major == MagmaColMajor) {
// transpose first to row major
CHECK( magma_dvtranspose( x, &x2, queue ));
CHECK( magma_dmgesellpmv( MagmaNoTrans, A.num_rows, A.num_cols,
num_vecs, A.blocksize, A.numblocks, A.alignment,
alpha, A.dval, A.dcol, A.drow, x2.dval, beta, y.dval, queue ));
}
}
/*if ( A.storage_type == Magma_DENSE ) {
//printf("using DENSE kernel for SpMV: ");
magmablas_dmgemv( MagmaNoTrans, A.num_rows, A.num_cols,
num_vecs, alpha, A.dval, A.num_rows, x.dval, 1,
beta, y.dval, 1 );
//printf("done.\n");
}*/
else {
printf("error: format not supported.\n");
info = MAGMA_ERR_NOT_SUPPORTED;
}
}
}
// CPU case missing!
else {
printf("error: CPU not yet supported.\n");
info = MAGMA_ERR_NOT_SUPPORTED;
}
cleanup:
cusparseDestroyMatDescr( descr );
cusparseDestroy( cusparseHandle );
cusparseHandle = 0;
descr = 0;
magma_dmfree(&x2, queue );
return info;
}
开发者ID:xulunfan,项目名称:magma,代码行数:101,代码来源:magma_d_blaswrapper.cpp
示例13: magma_c_cucsrtranspose
extern "C" magma_int_t
magma_c_cucsrtranspose(
magma_c_sparse_matrix A,
magma_c_sparse_matrix *B,
magma_queue_t queue )
{
// for symmetric matrices: convert to csc using cusparse
if( A.storage_type == Magma_CSR && A.memory_location == Magma_DEV ) {
magma_c_sparse_matrix C;
magma_c_mtransfer( A, &C, Magma_DEV, Magma_DEV, queue );
// CUSPARSE context //
cusparseHandle_t handle;
cusparseStatus_t cusparseStatus;
cusparseStatus = cusparseCreate(&handle);
cusparseSetStream( handle, queue );
if (cusparseStatus != 0) printf("error in Handle.\n");
cusparseMatDescr_t descrA;
cusparseMatDescr_t descrB;
cusparseStatus = cusparseCreateMatDescr(&descrA);
cusparseStatus = cusparseCreateMatDescr(&descrB);
if (cusparseStatus != 0) printf("error in MatrDescr.\n");
cusparseStatus =
cusparseSetMatType(descrA,CUSPARSE_MATRIX_TYPE_GENERAL);
cusparseSetMatType(descrB,CUSPARSE_MATRIX_TYPE_GENERAL);
if (cusparseStatus != 0) printf("error in MatrType.\n");
cusparseStatus =
cusparseSetMatIndexBase(descrA,CUSPARSE_INDEX_BASE_ZERO);
cusparseSetMatIndexBase(descrB,CUSPARSE_INDEX_BASE_ZERO);
if (cusparseStatus != 0) printf("error in IndexBase.\n");
cusparseStatus =
cusparseCcsr2csc( handle, A.num_rows, A.num_rows, A.nnz,
A.dval, A.drow, A.dcol, C.dval, C.dcol, C.drow,
CUSPARSE_ACTION_NUMERIC,
CUSPARSE_INDEX_BASE_ZERO);
if (cusparseStatus != 0)
printf("error in transpose: %d.\n", cusparseStatus);
cusparseDestroyMatDescr( descrA );
cusparseDestroyMatDescr( descrB );
cusparseDestroy( handle );
magma_c_mtransfer( C, B, Magma_DEV, Magma_DEV, queue );
if( A.fill_mode == Magma_FULL ){
B->fill_mode = Magma_FULL;
}
else if( A.fill_mode == Magma_LOWER ){
B->fill_mode = Magma_UPPER;
}
else if ( A.fill_mode == Magma_UPPER ){
B->fill_mode = Magma_LOWER;
}
// end CUSPARSE context //
return MAGMA_SUCCESS;
}else if( A.storage_type == Magma_CSR && A.memory_location == Magma_CPU ){
magma_c_sparse_matrix A_d, B_d;
magma_c_mtransfer( A, &A_d, A.memory_location, Magma_DEV, queue );
magma_c_cucsrtranspose( A_d, &B_d, queue );
magma_c_mtransfer( B_d, B, Magma_DEV, A.memory_location, queue );
magma_c_mfree( &A_d, queue );
magma_c_mfree( &B_d, queue );
return MAGMA_SUCCESS;
}else {
magma_c_sparse_matrix ACSR, BCSR;
magma_c_mconvert( A, &ACSR, A.storage_type, Magma_CSR, queue );
magma_c_cucsrtranspose( ACSR, &BCSR, queue );
magma_c_mconvert( BCSR, B, Magma_CSR, A.storage_type, queue );
magma_c_mfree( &ACSR, queue );
magma_c_mfree( &BCSR, queue );
return MAGMA_SUCCESS;
}
}
开发者ID:liuxingrui4p,项目名称:magma-1,代码行数:91,代码来源:magma_cmtranspose.cpp
示例14: main
//.........这里部分代码省略.........
// convert to SELLP and copy to GPU
TESTING_CHECK( magma_smconvert( hA, &hA_SELLP, Magma_CSR, Magma_SELLP, queue ));
TESTING_CHECK( magma_smtransfer( hA_SELLP, &dA_SELLP, Magma_CPU, Magma_DEV, queue ));
magma_smfree(&hA_SELLP, queue );
magma_smfree( &dy, queue );
TESTING_CHECK( magma_svinit( &dy, Magma_DEV, dx.num_rows, dx.num_cols, c_zero, queue ));
// SpMV on GPU (SELLP)
start = magma_sync_wtime( queue );
for (j=0; j < 10; j++) {
TESTING_CHECK( magma_s_spmv( c_one, dA_SELLP, dx, c_zero, dy, queue ));
}
end = magma_sync_wtime( queue );
printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (SELLP).\n",
(end-start)/10, FLOPS*10.*n/(end-start) );
TESTING_CHECK( magma_smtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue ));
res = 0.0;
for(magma_int_t k=0; k < hA.num_rows; k++ ) {
res=res + MAGMA_S_REAL(hcheck.val[k]) - MAGMA_S_REAL(hrefvec.val[k]);
}
printf("%% |x-y|_F = %8.2e\n", res);
if ( res < accuracy )
printf("%% tester spmm SELL-P: ok\n");
else
printf("%% tester spmm SELL-P: failed\n");
magma_smfree( &hcheck, queue );
magma_smfree(&dA_SELLP, queue );
// SpMV on GPU (CUSPARSE - CSR)
// CUSPARSE context //
magma_smfree( &dy, queue );
TESTING_CHECK( magma_svinit( &dy, Magma_DEV, dx.num_rows, dx.num_cols, c_zero, queue ));
//#ifdef PRECISION_d
start = magma_sync_wtime( queue );
TESTING_CHECK( cusparseCreate( &cusparseHandle ));
TESTING_CHECK( cusparseSetStream( cusparseHandle, magma_queue_get_cuda_stream(queue) ));
TESTING_CHECK( cusparseCreateMatDescr( &descr ));
TESTING_CHECK( cusparseSetMatType( descr, CUSPARSE_MATRIX_TYPE_GENERAL ));
TESTING_CHECK( cusparseSetMatIndexBase( descr, CUSPARSE_INDEX_BASE_ZERO ));
float alpha = c_one;
float beta = c_zero;
// copy matrix to GPU
TESTING_CHECK( magma_smtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue) );
for (j=0; j < 10; j++) {
cusparseScsrmm(cusparseHandle,
CUSPARSE_OPERATION_NON_TRANSPOSE,
dA.num_rows, n, dA.num_cols, dA.nnz,
&alpha, descr, dA.dval, dA.drow, dA.dcol,
dx.dval, dA.num_cols, &beta, dy.dval, dA.num_cols);
}
end = magma_sync_wtime( queue );
printf( " > CUSPARSE: %.2e seconds %.2e GFLOP/s (CSR).\n",
(end-start)/10, FLOPS*10*n/(end-start) );
TESTING_CHECK( magma_smtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue ));
res = 0.0;
for(magma_int_t k=0; k < hA.num_rows; k++ ) {
res = res + MAGMA_S_REAL(hcheck.val[k]) - MAGMA_S_REAL(hrefvec.val[k]);
}
printf("%% |x-y|_F = %8.2e\n", res);
if ( res < accuracy )
printf("%% tester spmm cuSPARSE: ok\n");
else
printf("%% tester spmm cuSPARSE: failed\n");
magma_smfree( &hcheck, queue );
cusparseDestroyMatDescr( descr );
cusparseDestroy( cusparseHandle );
descr = NULL;
cusparseHandle = NULL;
//#endif
printf("\n\n");
// free CPU memory
magma_smfree( &hA, queue );
magma_smfree( &hx, queue );
magma_smfree( &hy, queue );
magma_smfree( &hrefvec, queue );
// free GPU memory
magma_smfree( &dx, queue );
magma_smfree( &dy, queue );
magma_smfree( &dA, queue);
#ifdef MAGMA_WITH_MKL
magma_free_cpu( pntre );
#endif
i++;
}
magma_queue_destroy( queue );
TESTING_CHECK( magma_finalize() );
return info;
}
开发者ID:maxhutch,项目名称:magma,代码行数:101,代码来源:testing_sspmm.cpp
示例15: magma_cmtransposeconjugate
extern "C" magma_int_t
magma_cmtransposeconjugate(
magma_c_matrix A,
magma_c_matrix *B,
magma_queue_t queue )
{
// for symmetric matrices: convert to csc using cusparse
magma_int_t info = 0;
cusparseHandle_t handle=NULL;
cusparseMatDescr_t descrA=NULL;
cusparseMatDescr_t descrB=NULL;
magma_c_matrix ACSR={Magma_CSR}, BCSR={Magma_CSR};
magma_c_matrix A_d={Magma_CSR}, B_d={Magma_CSR};
if( A.storage_type == Magma_CSR && A.memory_location == Magma_DEV ) {
// fill in information for B
B->storage_type = A.storage_type;
B->diagorder_type = A.diagorder_type;
B->memory_location = Magma_DEV;
B->num_rows = A.num_cols; // transposed
B->num_cols = A.num_rows; // transposed
B->nnz = A.nnz;
B->true_nnz = A.true_nnz;
if ( A.fill_mode == MagmaFull ) {
B->fill_mode = MagmaFull;
}
else if ( A.fill_mode == MagmaLower ) {
B->fill_mode = MagmaUpper;
}
else if ( A.fill_mode == MagmaUpper ) {
B->fill_mode = MagmaLower;
}
B->dval = NULL;
B->drow = NULL;
B->dcol = NULL;
// memory allocation
CHECK( magma_cmalloc( &B->dval, B->nnz ));
CHECK( magma_index_malloc( &B->drow, B->num_rows + 1 ));
CHECK( magma_index_malloc( &B->dcol, B->nnz ));
// CUSPARSE context //
CHECK_CUSPARSE( cusparseCreate( &handle ));
CHECK_CUSPARSE( cusparseSetStream( handle, queue->cuda_stream() ));
CHECK_CUSPARSE( cusparseCreateMatDescr( &descrA ));
CHECK_CUSPARSE( cusparseCreateMatDescr( &descrB ));
CHECK_CUSPARSE( cusparseSetMatType( descrA, CUSPARSE_MATRIX_TYPE_GENERAL ));
CHECK_CUSPARSE( cusparseSetMatType( descrB, CUSPARSE_MATRIX_TYPE_GENERAL ));
CHECK_CUSPARSE( cusparseSetMatIndexBase( descrA, CUSPARSE_INDEX_BASE_ZERO ));
CHECK_CUSPARSE( cusparseSetMatIndexBase( descrB, CUSPARSE_INDEX_BASE_ZERO ));
CHECK_CUSPARSE(
cusparseCcsr2csc( handle, A.num_rows, A.num_cols, A.nnz,
A.dval, A.drow, A.dcol, B->dval, B->dcol, B->drow,
CUSPARSE_ACTION_NUMERIC,
CUSPARSE_INDEX_BASE_ZERO) );
CHECK( magma_cmconjugate( B, queue ));
} else if ( A.memory_location == Magma_CPU ){
CHECK( magma_cmtransfer( A, &A_d, A.memory_location, Magma_DEV, queue ));
CHECK( magma_cmtransposeconjugate( A_d, &B_d, queue ));
CHECK( magma_cmtransfer( B_d, B, Magma_DEV, A.memory_location, queue ));
} else {
CHECK( magma_cmconvert( A, &ACSR, A.storage_type, Magma_CSR, queue ));
CHECK( magma_cmtransposeconjugate( ACSR, &BCSR, queue ));
CHECK( magma_cmconvert( BCSR, B, Magma_CSR, A.storage_type, queue ));
}
cleanup:
cusparseDestroyMatDescr( descrA );
cusparseDestroyMatDescr( descrB );
cusparseDestroy( handle );
magma_cmfree( &A_d, queue );
magma_cmfree( &B_d, queue );
magma_cmfree( &ACSR, queue );
magma_cmfree( &BCSR, queue );
if( info != 0 ){
magma_cmfree( B, queue );
}
return info;
}
开发者ID:xulunfan,项目名称:magma,代码行数:79,代码来源:magma_cmtranspose.cpp
示例16: magma_ccustomicsetup
magma_int_t
magma_ccustomicsetup(
magma_c_matrix A,
magma_c_matrix b,
magma_c_preconditioner *precond,
magma_queue_t queue )
{
magma_int_t info = 0;
cusparseHandle_t cusparseHandle=NULL;
cusparseMatDescr_t descrL=NULL;
cusparseMatDescr_t descrU=NULL;
magma_c_matrix hA={Magma_CSR};
char preconditionermatrix[255];
snprintf( preconditionermatrix, sizeof(preconditionermatrix),
"/Users/hanzt0114cl306/work/matrices/ani/ani7_crop_ichol.mtx" );
CHECK( magma_c_csr_mtx( &hA, preconditionermatrix , queue) );
// for CUSPARSE
CHECK( magma_cmtransfer( hA, &precond->M, Magma_CPU, Magma_DEV , queue ));
// copy the matrix to precond->L and (transposed) to precond->U
CHECK( magma_cmtransfer(precond->M, &(precond->L), Magma_DEV, Magma_DEV, queue ));
CHECK( magma_cmtranspose( precond->L, &(precond->U), queue ));
// extract the diagonal of L into precond->d
CHECK( magma_cjacobisetup_diagscal( precond->L, &precond->d, queue ));
CHECK( magma_cvinit( &precond->work1, Magma_DEV, hA.num_rows, 1, MAGMA_C_ZERO, queue ));
// extract the diagonal of U into precond->d2
CHECK( magma_cjacobisetup_diagscal( precond->U, &precond->d2, queue ));
CHECK( magma_cvinit( &precond->work2, Magma_DEV, hA.num_rows, 1, MAGMA_C_ZERO, queue ));
// CUSPARSE context //
CHECK_CUSPARSE( cusparseCreate( &cusparseHandle ));
CHECK_CUSPARSE( cusparseCreateMatDescr( &descrL ));
CHECK_CUSPARSE( cusparseSetMatType( descrL, CUSPARSE_MATRIX_TYPE_TRIANGULAR ));
CHECK_CUSPARSE( cusparseSetMatDiagType( descrL, CUSPARSE_DIAG_TYPE_NON_UNIT ));
CHECK_CUSPARSE( cusparseSetMatIndexBase( descrL, CUSPARSE_INDEX_BASE_ZERO ));
CHECK_CUSPARSE( cusparseSetMatFillMode( descrL, CUSPARSE_FILL_MODE_LOWER ));
CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoL ));
CHECK_CUSPARSE( cusparseCcsrsv_analysis( cusparseHandle,
CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows,
precond->M.nnz, descrL,
precond->M.val, precond->M.row, precond->M.col, precond->cuinfoL ));
CHECK_CUSPARSE( cusparseCreateMatDescr( &descrU ));
CHECK_CUSPARSE( cusparseSetMatType( descrU, CUSPARSE_MATRIX_TYPE_TRIANGULAR ));
CHECK_CUSPARSE( cusparseSetMatDiagType( descrU, CUSPARSE_DIAG_TYPE_NON_UNIT ));
CHECK_CUSPARSE( cusparseSetMatIndexBase( descrU, CUSPARSE_INDEX_BASE_ZERO ));
CHECK_CUSPARSE( cusparseSetMatFillMode( descrU, CUSPARSE_FILL_MODE_LOWER ));
CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoU ));
CHECK_CUSPARSE( cusparseCcsrsv_analysis( cusparseHandle,
CUSPARSE_OPERATION_TRANSPOSE, precond->M.num_rows,
precond->M.nnz, descrU,
precond->M.val, precond->M.row, precond->M.col, precond->cuinfoU ));
cleanup:
cusparseDestroy( cusparseHandle );
cusparseDestroyMatDescr( descrL );
cusparseDestroyMatDescr( descrU );
cusparseHandle=NULL;
descrL=NULL;
descrU=NULL;
magma_cmfree( &hA, queue );
return info;
}
开发者ID:maxhutch,项目名称:magma,代码行数:74,代码来源:ccustomic.cpp
示例17: main
//.........这里部分代码省略.........
// Back Substitution
cusparseStatus = cusparseScsrsv_solve(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, N, &floatone, descrU,
d_valsILU0, d_row, d_col, info_u, d_y, d_zm1);
checkCudaErrors(cusparseStatus);
k++;
if (k == 1)
{
cublasScopy(cublasHandle, N, d_zm1, 1, d_p, 1);
}
else
{
cublasSdot(cublasHandle, N, d_r, 1, d_zm1, 1, &numerator);
cublasSdot(cublasHandle, N, d_rm2, 1, d_zm2, 1, &denominator);
beta = numerator/denominator;
cublasSscal(cublasHandle, N, &beta, d_p, 1);
cublasSaxpy(cublasHandle, N, &floatone, d_zm1, 1, d_p, 1) ;
}
cusparseScsrmv(cusparseHandle,CUSPARSE_OPERATION_NON_TRANSPOSE, N, N, nzILU0, &floatone, descrU, d_val, d_row, d_col, d_p, &floatzero, d_omega);
cublasSdot(cublasHandle, N, d_r, 1, d_zm1, 1, &numerator);
cublasSdot(cublasHandle, N, d_p, 1, d_omega, 1, &denominator);
alpha = numerator / denominator;
cublasSaxpy(cublasHandle, N, &alpha, d_p, 1, d_x, 1);
cublasScopy(cublasHandle, N, d_r, 1, d_rm2, 1);
cublasScopy(cublasHandle, N, d_zm1, 1, d_zm2, 1);
nalpha = -alpha;
cublasSaxpy(cublasHandle, N, &nalpha, d_omega, 1, d_r, 1);
cublasSdot(cublasHandle, N, d_r, 1, d_r, 1, &r1);
}
printf(" iteration = %3d, residual = %e \n", k, sqrt(r1));
cudaMemcpy(x, d_x, N*sizeof(float), cudaMemcpyDeviceToHost);
/* check result */
err = 0.0;
for (int i = 0; i < N; i++)
{
rsum = 0.0;
for (int j = I[i]; j < I[i+1]; j++)
{
rsum += val[j]*x[J[j]];
}
diff = fabs(rsum - rhs[i]);
if (diff > err)
{
err = diff;
}
}
printf(" Convergence Test: %s \n", (k <= max_iter) ? "OK" : "FAIL");
nErrors += (k > max_iter) ? 1 : 0;
qaerr2 = err;
/* Destroy parameters */
cusparseDestroySolveAnalysisInfo(infoA);
cusparseDestroySolveAnalysisInfo(info_u);
/* Destroy contexts */
cusparseDestroy(cusparseHandle);
cublasDestroy(cublasHandle);
/* Free device memory */
free(I);
free(J);
free(val);
free(x);
free(rhs);
free(valsILU0);
cudaFree(d_col);
cudaFree(d_row);
cudaFree(d_val);
cudaFree(d_x);
cudaFree(d_y);
cudaFree(d_r);
cudaFree(d_p);
cudaFree(d_omega);
cudaFree(d_valsILU0);
cudaFree(d_zm1);
cudaFree(d_zm2);
cudaFree(d_rm2);
// cudaDeviceReset causes the driver to clean up all state. While
// not mandatory in normal operation, it is good practice. It is also
// needed to ensure correct operation when the application is being
// profiled. Calling cudaDeviceReset causes all profile data to be
// flushed before the application exits
cudaDeviceReset();
printf(" Test Summary:\n");
printf(" Counted total of %d errors\n", nErrors);
printf(" qaerr1 = %f qaerr2 = %f\n\n", fabs(qaerr1), fabs(qaerr2));
exit((nErrors == 0 &&fabs(qaerr1)<1e-5 && fabs(qaerr2) < 1e-5 ? EXIT_SUCCESS : EXIT_FAILURE));
}
开发者ID:drolfe00,项目名称:CUDAVerificationkernels,代码行数:101,代码来源:main.cpp
示例18: magma_zcuspaxpy
//.........这里部分代码省略.........
C.memory_location = A.memory_location;
magma_int_t stat_dev = 0;
C.val = NULL;
C.col = NULL;
C.row = NULL;
C.rowidx = NULL;
C.blockinfo = NULL;
C.diag = NULL;
C.dval = NULL;
C.dcol = NULL;
C.drow = NULL;
C.drowidx = NULL;
C.ddiag = NULL;
// CUSPARSE context //
cusparseHandle_t handle;
cusparseStatus_t cusparseStatus;
cusparseStatus = cusparseCreate(&handle);
cusparseSetStream( handle, queue );
if (cusparseStatus != 0) printf("error in Handle.\n");
cusparseMatDescr_t descrA;
cusparseMatDescr_t descrB;
cusparseMatDescr_t descrC;
cusparseStatus = cusparseCreateMatDescr(&descrA);
cusparseStatus = cusparseCreateMatDescr(&descrB);
cusparseStatus = cusparseCreateMatDescr(&descrC);
if (cusparseStatus != 0) printf("error in MatrDescr.\n");
cusparseStatus =
cusparseSetMatType(descrA,CUSPARSE_MATRIX_TYPE_GENERAL);
cusparseSetMatType(descrB,CUSPARSE_MATRIX_TYPE_GENERAL);
cusparseSetMatType(descrC,CUSPARSE_MATRIX_TYPE_GENERAL);
if (cusparseStatus != 0) printf("error in MatrType.\n");
cusparseStatus =
cusparseSetMatIndexBase(descrA,CUSPARSE_INDEX_BASE_ZERO);
cusparseSetMatIndexBase(descrB,CUSPARSE_INDEX_BASE_ZERO);
cusparseSetMatIndexBase(descrC,CUSPARSE_INDEX_BASE_ZERO);
if (cusparseStatus != 0) printf("error in IndexBase.\n");
// multiply A and B on the device
magma_int_t baseC;
// nnzTotalDevHostPtr points to host memory
magma_index_t *nnzTotalDevHostPtr = (magma_index_t*) &C.nnz;
cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST);
stat_dev += magma_index_malloc( &C.drow, (A.num_rows + 1) );
cusparseXcsrgeamNnz(handle,A.num_rows, A.num_cols,
descrA, A.nnz, A.drow, A.dcol,
descrB, B.nnz, B.drow, B.dcol,
descrC, C.row, nnzTotalDevHostPtr);
if (NULL != nnzTotalDevHostPtr) {
C.nnz = *nnzTotalDevHostPtr
|
请发表评论