本文整理汇总了C++中cudaGetDevice函数的典型用法代码示例。如果您正苦于以下问题:C++ cudaGetDevice函数的具体用法?C++ cudaGetDevice怎么用?C++ cudaGetDevice使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了cudaGetDevice函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: switch
// 内部使用的
// 如果当前未初始化直接在GPU分配内存
// 如果当前在CPU,则在GPU上分配内存并且复制到GPU
// 如果数据已经在GPU则啥也不做
inline void SyncedMemory::to_gpu() {
#ifndef CPU_ONLY
switch (head_) {
case UNINITIALIZED:
// 获取设备
CUDA_CHECK(cudaGetDevice(&gpu_device_));
// 在设备上分配内存
CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
// 初始化为0
caffe_gpu_memset(size_, 0, gpu_ptr_);
head_ = HEAD_AT_GPU;
own_gpu_data_ = true;
break;
case HEAD_AT_CPU:
if (gpu_ptr_ == NULL) {
CUDA_CHECK(cudaGetDevice(&gpu_device_));
CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
own_gpu_data_ = true;
}
caffe_gpu_memcpy(size_, cpu_ptr_, gpu_ptr_);
head_ = SYNCED;
break;
case HEAD_AT_GPU:
case SYNCED:
break;
}
#else
NO_GPU;
#endif
}
开发者ID:runaway,项目名称:studycaffe,代码行数:34,代码来源:syncedmem.cpp
示例2: cuda_assert
~cuda_pattern_data()
{
int current_id;
cuda_assert( cudaGetDevice(¤t_id) );
if ( current_id != device_id ) cuda_assert( cudaSetDevice( device_id ) );
if ( ar ) cuda_assert( cudaFree(ar) );
if ( dim ) cuda_assert( cudaFree(dim) );
if ( I_diff ) cuda_assert( cudaFree(I_diff) );
if ( I_exp ) cuda_assert( cudaFree(I_exp) );
if ( I_exp ) cuda_assert( cudaFree(I_zigmoid) );
if ( diag ) cuda_assert( cudaFree(diag) );
if ( ug ) cuda_assert( cudaFree(ug) );
if ( cache ) cuda_assert( cudaFree(cache) );
if ( beams ) cuda_assert( cudaFree(beams) );
if ( kt_factor ) cuda_assert( cudaFree(kt_factor) );
if ( gvec ) cuda_assert( cudaFree(gvec) );
if ( tilt ) cuda_assert( cudaFree(tilt) );
ar = 0;
dim = 0;
I_diff = 0;
I_exp = 0;
I_zigmoid = 0;
diag = 0;
ug = 0;
cache = 0;
gvec = 0;
tilt = 0;
}
开发者ID:fengwang,项目名称:larbed-refinement,代码行数:30,代码来源:cuda_rotated_pattern_data.hpp
示例3: THCState_getCurrentDeviceProperties
struct cudaDeviceProp* THCState_getCurrentDeviceProperties(THCState* state)
{
int curDev = -1;
THCudaCheck(cudaGetDevice(&curDev));
return &(state->deviceProperties[curDev]);
}
开发者ID:ASAPPinc,项目名称:cutorch,代码行数:7,代码来源:THCGeneral.c
示例4: printf
void Engine::DeviceQuery() {
cudaDeviceProp prop;
int device;
if (cudaSuccess != cudaGetDevice(&device)) {
printf("No cuda device present.\n");
return;
}
CUDA_CHECK(cudaGetDeviceProperties(&prop, device));
LOG(INFO) << "Device id: " << device;
LOG(INFO) << "Major revision number: " << prop.major;
LOG(INFO) << "Minor revision number: " << prop.minor;
LOG(INFO) << "Name: " << prop.name;
LOG(INFO) << "Total global memory: " << prop.totalGlobalMem;
LOG(INFO) << "Total shared memory per block: " << prop.sharedMemPerBlock;
LOG(INFO) << "Total registers per block: " << prop.regsPerBlock;
LOG(INFO) << "Warp size: " << prop.warpSize;
LOG(INFO) << "Maximum memory pitch: " << prop.memPitch;
LOG(INFO) << "Maximum threads per block: " << prop.maxThreadsPerBlock;
LOG(INFO) << "Maximum dimension of block: "
<< prop.maxThreadsDim[0] << ", " << prop.maxThreadsDim[1] << ", "
<< prop.maxThreadsDim[2];
LOG(INFO) << "Maximum dimension of grid: "
<< prop.maxGridSize[0] << ", " << prop.maxGridSize[1] << ", "
<< prop.maxGridSize[2];
LOG(INFO) << "Clock rate: " << prop.clockRate;
LOG(INFO) << "Total constant memory: " << prop.totalConstMem;
LOG(INFO) << "Texture alignment: " << prop.textureAlignment;
LOG(INFO) << "Concurrent copy and execution: "
<< (prop.deviceOverlap ? "Yes" : "No");
LOG(INFO) << "Number of multiprocessors: " << prop.multiProcessorCount;
LOG(INFO) << "Kernel execution timeout: "
<< (prop.kernelExecTimeoutEnabled ? "Yes" : "No");
return;
}
开发者ID:airxcy,项目名称:ITF_Tracker,代码行数:34,代码来源:common.cpp
示例5: THCudaMemGetInfoCached
cudaError_t THCudaMemGetInfoCached(THCState *state, size_t* freeBytes, size_t* totalBytes, size_t* largestBlock)
{
size_t cachedBytes = 0;
THCDeviceAllocator* allocator = state->cudaDeviceAllocator;
*largestBlock = 0;
/* get info from CUDA first */
cudaError_t ret = cudaMemGetInfo(freeBytes, totalBytes);
if (ret!= cudaSuccess)
return ret;
int device;
ret = cudaGetDevice(&device);
if (ret!= cudaSuccess)
return ret;
/* not always true - our optimistic guess here */
*largestBlock = *freeBytes;
if (allocator->cacheInfo != NULL)
allocator->cacheInfo(allocator->state, device, &cachedBytes, largestBlock);
/* Adjust resulting free bytes number. largesBlock unused for now */
*freeBytes += cachedBytes;
return cudaSuccess;
}
开发者ID:HustlehardInc,项目名称:pytorch,代码行数:26,代码来源:THCGeneral.cpp
示例6: printf
void Caffe::DeviceQuery() {
cudaDeviceProp prop;
int device;
if (cudaSuccess != cudaGetDevice(&device)) {
printf("No cuda device present.\n");
return;
}
CUDA_CHECK(cudaGetDeviceProperties(&prop, device));
printf("Device id: %d\n", device);
printf("Major revision number: %d\n", prop.major);
printf("Minor revision number: %d\n", prop.minor);
printf("Name: %s\n", prop.name);
printf("Total global memory: %lu\n", prop.totalGlobalMem);
printf("Total shared memory per block: %lu\n", prop.sharedMemPerBlock);
printf("Total registers per block: %d\n", prop.regsPerBlock);
printf("Warp size: %d\n", prop.warpSize);
printf("Maximum memory pitch: %lu\n", prop.memPitch);
printf("Maximum threads per block: %d\n", prop.maxThreadsPerBlock);
printf("Maximum dimension of block: %d, %d, %d\n",
prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]);
printf("Maximum dimension of grid: %d, %d, %d\n",
prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]);
printf("Clock rate: %d\n", prop.clockRate);
printf("Total constant memory: %lu\n", prop.totalConstMem);
printf("Texture alignment: %lu\n", prop.textureAlignment);
printf("Concurrent copy and execution: %s\n",
(prop.deviceOverlap ? "Yes" : "No"));
printf("Number of multiprocessors: %d\n", prop.multiProcessorCount);
printf("Kernel execution timeout: %s\n",
(prop.kernelExecTimeoutEnabled ? "Yes" : "No"));
return;
}
开发者ID:FuchenUSTC,项目名称:caffe-c3d,代码行数:32,代码来源:common.cpp
示例7: cudaGetDeviceCount
void GpuDeviceInformationDialog::setupGpuDeviceTabPages()
{
int numDevs = 0;
cudaGetDeviceCount(&numDevs);
this->setWindowTitle(QString("GPU Device Information (") + QString::number(numDevs) + QString(" devices found)"));
for(int i = 0; i < numDevs; i++)
{
cudaDeviceProp devProp;
cudaGetDeviceProperties(&devProp, i);
QWidget* deviceTabPage = new GpuDeviceInformationDialogTabPage(devProp, i);
this->tabWidget->addTab(deviceTabPage, devProp.name);
connect(deviceTabPage, SIGNAL(setMainComputeDevice(int)), this, SLOT(on_setMainComputeDevice(int)));
connect(this, SIGNAL(hasChangedMainComputeDevice(int)), deviceTabPage, SLOT(on_hasChangedMainComputeDevice(int)));
}
int currentComputeDevice;
cudaGetDevice(¤tComputeDevice);
emit hasChangedMainComputeDevice(currentComputeDevice);
}
开发者ID:apartridge,项目名称:GpuRayTracer,代码行数:27,代码来源:GpuDeviceInformationDialog.cpp
示例8: __declspec
__declspec(dllexport) int __stdcall GetDevice()
{
int device = 0;
cudaGetDevice(&device);
return device;
}
开发者ID:dtegunov,项目名称:warp,代码行数:7,代码来源:Device.cpp
示例9: rcrackiThreadEntryPoint
// start processing of jobs
void rcrackiThread::rcrackiThreadEntryPoint()
{
#if GPU
if(gpu != 0 && cudaGetDevice(&cudaDevId) == CUDA_SUCCESS) {
cudaBuffCount = 0x2000;
cudaChainSize = 100;
cudaDeviceProp deviceProp;
if(cudaGetDeviceProperties(&deviceProp, cudaDevId) == CUDA_SUCCESS) {
switch(deviceProp.major) {
case 1: ; break;
case 2:
cudaBuffCount = 0x4000;
cudaChainSize = 200;
break;
}
}
cudaBuffCount = rcuda::GetChainsBufferSize(cudaBuffCount);
}
else
#endif
cudaDevId = -1;
if (falseAlarmChecker) {
if (falseAlarmCheckerO) {
CheckAlarmO();
}
else {
CheckAlarm();
}
}
else {
PreCalculate();
}
}
开发者ID:ChunHungLiu,项目名称:ctf-writeup,代码行数:36,代码来源:rcrackiThread.cpp
示例10: TryToAddSingleFitStream
bool TryToAddSingleFitStream(void * vpsM, WorkerInfoQueue* q){
#ifdef ION_COMPILE_CUDA
int dev_id = 0;
cudaStreamManager * psM = (cudaStreamManager *) vpsM;
SingleFitStream * temp;
cudaGetDevice( &dev_id );
int i;
try{ // exception handling to allow fallback to CPU Fit if not a single strweam could be created
temp = new SingleFitStream(q);
i = psM->addStreamUnit( temp);
std::cout <<"CUDA: Device " << dev_id << " Single Fit stream " << i <<" created " << std::endl;
psM->printMemoryUsage();
}
catch(cudaException& e)
{
cout << e.what() << endl;
if(psM->getNumStreams() > 0){
cout << "CUDA: Device " << dev_id<< " could not create more than " << psM->getNumStreams() << " Single Fit streams" << std::endl;
psM->printMemoryUsage();
}else{
std::cout << "CUDA: Device " << dev_id << " no Single Fit streams could be created >>>>>>>>>>>>>>>>> FALLING BACK TO CPU!"<< std::endl;
return false;
}
}
#endif
return true;
}
开发者ID:bdiegel,项目名称:TS,代码行数:28,代码来源:cudaWrapper.cpp
示例11: getCurrentDeviceProperties
const cudaDeviceProp& getCurrentDeviceProperties() {
int device = 0;
auto err = cudaGetDevice(&device);
checkCuda(err, std::string("CUDA ERROR: cudaGetDeviceCount "));
return getDeviceProperties(device);
}
开发者ID:JohnJPS,项目名称:fbcuda,代码行数:7,代码来源:CachedDeviceProperties.cpp
示例12: m_initialized
GpuSurfDetectorInternal::GpuSurfDetectorInternal(GpuSurfConfiguration config) :
m_initialized(false),
m_config(config)
{
int deviceCount;
int device;
cudaError_t err;
cudaGetDeviceCount(&deviceCount);
ASRL_ASSERT_GT(deviceCount,0,"There are no CUDA capable devices present");
err = cudaGetDevice(&device);
ASRL_ASSERT_EQ(err,cudaSuccess, "Unable to get the CUDA device: " << cudaGetErrorString(err));
//std::cout << "Found device " << device << std::endl;
err = cudaGetDeviceProperties(&m_deviceProp,device);
ASRL_ASSERT_EQ(err,cudaSuccess, "Unable to get the CUDA device properties: " << cudaGetErrorString(err));
// Some more checking...
ASRL_ASSERT_GE(m_deviceProp.major,1,"Minimum compute capability 1.1 is necessary");
ASRL_ASSERT_GE(m_deviceProp.minor,1,"Minimum compute capability 1.1 is necessary");
m_maxmin.init(ASRL_SURF_MAX_CANDIDATES,false);
m_maxmin.memset(0);
}
开发者ID:dongmingdmdm,项目名称:camnavi,代码行数:25,代码来源:GpuSurfDetectorInternal.cpp
示例13: cudaGetDevice
int CUDADevicesService::getMaximumTexture2DHeight() {
int device;
cudaGetDevice(&device);
cudaDeviceProp* devProperties = new cudaDeviceProp();
cudaGetDeviceProperties(devProperties, device);
return devProperties->maxTexture2D[1];
}
开发者ID:christiantinauer,项目名称:relaxometry,代码行数:7,代码来源:CUDADevicesService.cpp
示例14: cutorch_streamWaitFor
/*
Usage:
cutorch.streamWaitFor(waiterStream, {waitForStream1, ..., waitForStreamN})
for streams on the current device. Creates a one-way barrier where
waiterStream waits for waitForStream1-N to reach the current point.
*/
static int cutorch_streamWaitFor(lua_State *L)
{
THCState *state = cutorch_getstate(L);
int curDev = -1;
THCudaCheck(cudaGetDevice(&curDev));
/* Check that the waiting stream is in bounds; this will error out if not */
int waitingId = (int) luaL_checknumber(L, 1);
cudaStream_t streamWaiting =
THCState_getDeviceStream(state, curDev, waitingId);
/* Validate the streams that we are waiting on */
int streams = checkAndCountListOfStreams(L, state, 2, curDev);
if (streams < 1) {
/* nothing to synchronize */
return 0;
}
/* One-way dependency; streamWaiting will wait for the list of streams to
wait on to complete execution of pending scheduled kernels/events */
cudaEvent_t * events = (cudaEvent_t*)malloc(sizeof(cudaEvent_t) * streams);
createSingleDeviceEvents(L, state, 2, curDev, events);
/* Then, wait on them */
for (int i = 0; i < streams; i++) {
THCudaCheck(cudaStreamWaitEvent(streamWaiting, events[i], 0));
THCudaCheck(cudaEventDestroy(events[i]));
}
free(events);
return 0;
}
开发者ID:ASAPPinc,项目名称:cutorch,代码行数:37,代码来源:init.c
示例15: gpu_print_properties
void gpu_print_properties(FILE* out){
int device = -1;
gpu_safe( cudaGetDevice(&device) );
cudaDeviceProp prop;
gpu_safe( cudaGetDeviceProperties(&prop, device) );
int MiB = 1024 * 1024;
int kiB = 1024;
fprintf(out, " Device number: %d\n", device);
fprintf(out, " Device name: %s\n", prop.name);
fprintf(out, " Global Memory: %d MiB\n", (int)(prop.totalGlobalMem/MiB));
fprintf(out, " Shared Memory: %d kiB/block\n", (int)(prop.sharedMemPerBlock/kiB));
fprintf(out, " Constant memory: %d kiB\n", (int)(prop.totalConstMem/kiB));
fprintf(out, " Registers: %d per block\n", (int)(prop.regsPerBlock/kiB));
fprintf(out, " Warp size: %d threads\n", (int)(prop.warpSize));
//fprintf(out, " Max memory pitch: %d bytes\n", (int)(prop.memPitch));
fprintf(out, " Texture alignment: %d bytes\n", (int)(prop.textureAlignment));
fprintf(out, " Max threads/block: %d\n", prop.maxThreadsPerBlock);
fprintf(out, " Max block size: %d x %d x %d threads\n", prop.maxThreadsDim[X], prop.maxThreadsDim[Y], prop.maxThreadsDim[Z]);
fprintf(out, " Max grid size: %d x %d x %d blocks\n", prop.maxGridSize[X], prop.maxGridSize[Y], prop.maxGridSize[Z]);
fprintf(out, "Compute capability: %d.%d\n", prop.major, prop.minor);
fprintf(out, " Clock rate: %d MHz\n", prop.clockRate/1000);
fprintf(out, " Multiprocessors: %d\n", prop.multiProcessorCount);
fprintf(out, " Timeout enabled: %d\n", prop.kernelExecTimeoutEnabled);
fprintf(out, " Compute mode: %d\n", prop.computeMode);
fprintf(out, " Device overlap: %d\n", prop.deviceOverlap);
fprintf(out, "Concurrent kernels: %d\n", prop.concurrentKernels);
fprintf(out, " Integrated: %d\n", prop.integrated);
fprintf(out, " Can map host mem: %d\n", prop.canMapHostMemory);
}
开发者ID:LStoleriu,项目名称:hotspin,代码行数:33,代码来源:gpu_properties.cpp
示例16: cutorch_streamBarrier
/*
Usage:
cutorch.streamBarrier({stream1, stream2, ..., streamN})
applies to streams for the current device. Creates a N-way barrier
to synchronize all of the streams given
*/
static int cutorch_streamBarrier(lua_State *L)
{
THCState *state = cutorch_getstate(L);
int curDev = -1;
THCudaCheck(cudaGetDevice(&curDev));
int streams = checkAndCountListOfStreams(L, state, 1, curDev);
if (streams < 2) {
/* nothing to synchronize together */
return 0;
}
/* Multi-way dependency (barrier); all streams must complete execution
of pending scheduled kernels/events */
cudaEvent_t * events = (cudaEvent_t*)malloc(sizeof(cudaEvent_t) * streams);
/* First, create an event and record them for all streams */
int eventsCreated = createSingleDeviceEvents(L, state, 1, curDev, events);
/* Then, wait on the event. Each stream is actually waiting on itself here
too, but that's harmless and isn't worth weeding out. */
waitSingleDeviceEvents(L, state, 1, curDev, events, eventsCreated);
for (int i = 0; i < eventsCreated; i++)
THCudaCheck(cudaEventDestroy(events[i]));
free(events);
return 0;
}
开发者ID:ASAPPinc,项目名称:cutorch,代码行数:34,代码来源:init.c
示例17: cudppMoveToFrontTransform
/**
* @brief Performs the Move-to-Front Transform
*
* Performs a parallel move-to-front transform on 1,048,576 elements.
* The MTF uses a scan-based algorithm to parallelize the computation.
* The MTF uses a scan-based algorithm described in our paper "Parallel
* Lossless Data Compression on the GPU". (See the \ref references bibliography).
*
* - Currently, the MTF can only be performed on 1,048,576 (uchar) elements.
* - The transformed string is written to \a d_mtfOut.
*
* @param[in] planHandle Handle to plan for MTF
* @param[out] d_out Output data
* @param[in] d_in Input data
* @param[in] numElements Number of elements
* @returns CUDPPResult indicating success or error condition
*
* @see cudppPlan, CUDPPConfiguration, CUDPPAlgorithm
*/
CUDPP_DLL
CUDPPResult cudppMoveToFrontTransform(CUDPPHandle planHandle,
unsigned char *d_in,
unsigned char *d_out,
size_t numElements)
{
// first check: is this device >= 2.0? if not, return error
int dev;
cudaGetDevice(&dev);
cudaDeviceProp devProps;
cudaGetDeviceProperties(&devProps, dev);
if((int)devProps.major < 2) {
// Only supported on devices with compute
// capability 2.0 or greater
return CUDPP_ERROR_ILLEGAL_CONFIGURATION;
}
CUDPPMtfPlan * plan =
(CUDPPMtfPlan *) getPlanPtrFromHandle<CUDPPMtfPlan>(planHandle);
if(plan != NULL)
{
if (plan->m_config.algorithm != CUDPP_MTF)
return CUDPP_ERROR_INVALID_PLAN;
if (plan->m_config.datatype != CUDPP_UCHAR)
return CUDPP_ERROR_ILLEGAL_CONFIGURATION;
cudppMtfDispatch(d_in, d_out, numElements, plan);
return CUDPP_SUCCESS;
}
else
return CUDPP_ERROR_INVALID_HANDLE;
}
开发者ID:UIKit0,项目名称:cudpp,代码行数:54,代码来源:cudpp.cpp
示例18: getDevCapability
/**
* Returns the compute capability of the selected GPU.
* @return the compute capability in the integer format (210 means
* version 2.1)
*/
int getDevCapability() {
cudaDeviceProp devProp;
int dev;
cudaGetDevice(&dev);
cutilSafeCall(cudaGetDeviceProperties(&devProp, dev));
return devProp.major*100+devProp.minor*10;
}
开发者ID:edanssandes,项目名称:MASA-CUDAlign,代码行数:12,代码来源:cuda_util.cpp
示例19: CUDA_CHECK
void P2PSync<Dtype>::on_gradients_ready(Timer* timer, ostringstream* timing) {
#ifndef CPU_ONLY
#ifdef DEBUG
int device;
CUDA_CHECK(cudaGetDevice(&device));
CHECK(device == solver_->param().device_id());
#endif
// Sum children gradients as they appear in the queue
for (int i = 0; i < children_.size(); ++i) {
timer->Start();
P2PSync<Dtype> *child = queue_.pop();
Dtype* src = child->parent_grads_;
Dtype* dst = diff_;
#ifdef DEBUG
bool ok = false;
for (int j = 0; j < children_.size(); ++j) {
if (child == children_[j]) {
ok = true;
}
}
CHECK(ok);
cudaPointerAttributes attributes;
CUDA_CHECK(cudaPointerGetAttributes(&attributes, src));
CHECK(attributes.device == device);
CUDA_CHECK(cudaPointerGetAttributes(&attributes, dst));
CHECK(attributes.device == device);
#endif
caffe_gpu_add(size_, src, dst, dst);
*timing << " add_grad: " << timer->MilliSeconds();
}
// Send gradients to parent
if (parent_) {
timer->Start();
Dtype* src = diff_;
Dtype* dst = parent_grads_;
#ifdef DEBUG
cudaPointerAttributes attributes;
CUDA_CHECK(cudaPointerGetAttributes(&attributes, src));
CHECK(attributes.device == device);
CUDA_CHECK(cudaPointerGetAttributes(&attributes, dst));
CHECK(attributes.device == parent_->solver_->param().device_id());
#endif
CUDA_CHECK(cudaMemcpyAsync(dst, src, size_ * sizeof(Dtype), //
cudaMemcpyDeviceToDevice, cudaStreamDefault));
CUDA_CHECK(cudaStreamSynchronize(cudaStreamDefault));
parent_->queue_.push(this);
*timing << " send_grad: " << timer->MilliSeconds();
} else {
// Loss functions divide gradients by the batch size, so to compensate
// for split batch, the root solver divides by number of solvers.
caffe_gpu_scal(size_, Dtype(1.0 / Caffe::solver_count()), diff_);
}
#endif
}
开发者ID:bbshocking,项目名称:caffe,代码行数:60,代码来源:parallel.cpp
示例20: checkDeviceMeetComputeSpec
void checkDeviceMeetComputeSpec(int argc, char **argv)
{
int device = 0;
cudaGetDevice(&device);
if (checkCUDAProfile(device, MIN_RUNTIME_VERSION, MIN_COMPUTE_VERSION))
{
fprintf(stderr,"\nCUDA Capable Device %d, meets minimum required specs.\n", device);
}
else
{
fprintf(stderr, "\nNo configuration with minimum compute capabilities found. Exiting...\n");
fprintf(stderr, "This sample requires:\n");
fprintf(stderr, "\tCUDA Compute Capability >= %d.%d is required\n", MIN_COMPUTE_VERSION/16, MIN_COMPUTE_VERSION%16);
fprintf(stderr, "\tCUDA Runtime Version >= %d.%d is required\n", MIN_RUNTIME_VERSION/1000, (MIN_RUNTIME_VERSION%100)/10);
// cudaDeviceReset causes the driver to clean up all state. While
// not mandatory in normal operation, it is good practice. It is also
// needed to ensure correct operation when the application is being
// profiled. Calling cudaDeviceReset causes all profile data to be
// flushed before the application exits
cudaDeviceReset();
exit(EXIT_SUCCESS);
}
}
开发者ID:ajperalt,项目名称:nvidia-cuda-7.0-samples,代码行数:25,代码来源:volumeFiltering.cpp
注:本文中的cudaGetDevice函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论