本文整理汇总了C++中cudaStreamDestroy函数的典型用法代码示例。如果您正苦于以下问题:C++ cudaStreamDestroy函数的具体用法?C++ cudaStreamDestroy怎么用?C++ cudaStreamDestroy使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了cudaStreamDestroy函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: CopySegment
void CopySegment(int a, int b)
{
void *deva_buff = nullptr, *devb_buff = nullptr;
void *deva_buff2 = nullptr, *devb_buff2 = nullptr;
cudaStream_t a_stream, b_stream;
// Allocate buffers
CUDA_CHECK(cudaSetDevice(a));
CUDA_CHECK(cudaMalloc(&deva_buff, FLAGS_size));
CUDA_CHECK(cudaMalloc(&deva_buff2, FLAGS_size));
CUDA_CHECK(cudaStreamCreateWithFlags(&a_stream, cudaStreamNonBlocking));
CUDA_CHECK(cudaSetDevice(b));
CUDA_CHECK(cudaMalloc(&devb_buff, FLAGS_size));
CUDA_CHECK(cudaMalloc(&devb_buff2, FLAGS_size));
CUDA_CHECK(cudaStreamCreateWithFlags(&b_stream, cudaStreamNonBlocking));
// Synchronize devices before copying
CUDA_CHECK(cudaSetDevice(a));
CUDA_CHECK(cudaDeviceSynchronize());
CUDA_CHECK(cudaSetDevice(b));
CUDA_CHECK(cudaDeviceSynchronize());
// Exchange
auto t1 = std::chrono::high_resolution_clock::now();
for(uint64_t i = 0; i < FLAGS_repetitions; ++i)
{
CUDA_CHECK(cudaMemcpyPeerAsync(devb_buff, b, deva_buff, a,
FLAGS_size, b_stream));
CUDA_CHECK(cudaMemcpyPeerAsync(deva_buff2, a, devb_buff2, b,
FLAGS_size, a_stream));
}
CUDA_CHECK(cudaSetDevice(a));
CUDA_CHECK(cudaDeviceSynchronize());
CUDA_CHECK(cudaSetDevice(b));
CUDA_CHECK(cudaDeviceSynchronize());
auto t2 = std::chrono::high_resolution_clock::now();
double mstime = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count() / 1000.0 / FLAGS_repetitions;
// MiB/s = [bytes / (1024^2)] / [ms / 1000]
double MBps = (FLAGS_size / 1024.0 / 1024.0) / (mstime / 1000.0);
printf("%.2lf MB/s (%lf ms)\n", MBps, mstime);
// Free buffers
CUDA_CHECK(cudaSetDevice(a));
CUDA_CHECK(cudaFree(deva_buff));
CUDA_CHECK(cudaFree(deva_buff2));
CUDA_CHECK(cudaStreamDestroy(a_stream));
CUDA_CHECK(cudaSetDevice(b));
CUDA_CHECK(cudaFree(devb_buff));
CUDA_CHECK(cudaFree(devb_buff2));
CUDA_CHECK(cudaStreamDestroy(b_stream));
}
开发者ID:tbennun,项目名称:mgbench,代码行数:57,代码来源:fullduplex.cpp
示例2: CAFFE1_CUDA_CHECK
void BasePrefetchingDataLayer<Dtype>::InternalThreadEntry() {
#ifndef CPU_ONLY
cudaStream_t stream;
cudaStream_t stream2;
if (Caffe::mode() == Caffe::GPU) {
CAFFE1_CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
if (untransformed_top_)
CAFFE1_CUDA_CHECK(cudaStreamCreateWithFlags(&stream2, cudaStreamNonBlocking));
}
#endif
try {
while (!must_stop()) {
Batch<Dtype>* batch = prefetch_free_.pop();
Batch<Dtype>* batch_untransformed = NULL;
if (untransformed_top_)
{
batch_untransformed = prefetch_free_untransformed_.pop();
load_batch_and_untransformed_batch(batch,batch_untransformed);
}
else
load_batch(batch);
#ifndef CPU_ONLY
if (Caffe::mode() == Caffe::GPU) {
batch->data_.data().get()->async_gpu_push(stream);
CAFFE1_CUDA_CHECK(cudaStreamSynchronize(stream));
if (untransformed_top_)
{
batch_untransformed->data_.data().get()->async_gpu_push(stream2);
CAFFE1_CUDA_CHECK(cudaStreamSynchronize(stream2));
}
}
#endif
prefetch_full_.push(batch);
if (untransformed_top_)
prefetch_full_untransformed_.push(batch_untransformed);
}
} catch (boost::thread_interrupted&) {
// Interrupted exception is expected on shutdown
}
#ifndef CPU_ONLY
if (Caffe::mode() == Caffe::GPU) {
CAFFE1_CUDA_CHECK(cudaStreamDestroy(stream));
if (untransformed_top_)
CAFFE1_CUDA_CHECK(cudaStreamDestroy(stream2));
}
#endif
}
开发者ID:beniz,项目名称:caffe,代码行数:49,代码来源:base_data_layer.cpp
示例3: cudnnDestroyTensorDescriptor
CuDNNConvolutionLayer<Dtype>::~CuDNNConvolutionLayer() {
// Check that handles have been setup before destroying.
if (!handles_setup_) { return; }
for (int_tp i = 0; i < bottom_descs_.size(); i++) {
cudnnDestroyTensorDescriptor(bottom_descs_[i]);
cudnnDestroyTensorDescriptor(top_descs_[i]);
cudnnDestroyConvolutionDescriptor(conv_descs_[i]);
}
if (this->bias_term_) {
cudnnDestroyTensorDescriptor(bias_desc_);
}
cudnnDestroyFilterDescriptor(filter_desc_);
for (int_tp g = 0; g < this->group_ * CUDNN_STREAMS_PER_GROUP; g++) {
cudaStreamDestroy(stream_[g]);
cudnnDestroy(handle_[g]);
}
cudaFree(workspaceData);
delete [] stream_;
delete [] handle_;
delete [] fwd_algo_;
delete [] bwd_filter_algo_;
delete [] bwd_data_algo_;
delete [] workspace_fwd_sizes_;
delete [] workspace_bwd_data_sizes_;
delete [] workspace_bwd_filter_sizes_;
}
开发者ID:codeaudit,项目名称:caffe,代码行数:29,代码来源:cudnn_conv_layer.cpp
示例4: TEST_P
TEST_P(MemcpyAsync, D2DTransfers) {
const size_t param = GetParam();
const size_t alloc = 1 << param;
cudaError_t ret;
void *d1, *d2;
ret = cudaMalloc(&d1, alloc);
ASSERT_EQ(cudaSuccess, ret);
ret = cudaMalloc(&d2, alloc);
ASSERT_EQ(cudaSuccess, ret);
cudaStream_t stream;
ret = cudaStreamCreate(&stream);
ASSERT_EQ(cudaSuccess, ret);
ret = cudaMemcpyAsync(d2, d1, alloc, cudaMemcpyDeviceToDevice, stream);
ASSERT_EQ(cudaSuccess, ret);
ret = cudaStreamSynchronize(stream);
ASSERT_EQ(cudaSuccess, ret);
ret = cudaFree(d1);
ASSERT_EQ(cudaSuccess, ret);
ret = cudaFree(d2);
ASSERT_EQ(cudaSuccess, ret);
ret = cudaStreamDestroy(stream);
ASSERT_EQ(cudaSuccess, ret);
}
开发者ID:ckennelly,项目名称:panoptes,代码行数:31,代码来源:vtest_memcpyasync.cpp
示例5: CUDA_CHECK
void BasePrefetchingDataLayer<Dtype>::InternalThreadEntry() {
#ifndef CPU_ONLY
cudaStream_t stream;//创建CUDA stream,非阻塞类型
if (Caffe::mode() == Caffe::GPU) {
CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
}
#endif
try {
while (!must_stop()) { //循环载入批量数据
Batch<Dtype>* batch = prefetch_free_.pop();//拿到一个空闲batch
load_batch(batch);//载入批量数据
#ifndef CPU_ONLY
if (Caffe::mode() == Caffe::GPU) {
batch->data_.data().get()->async_gpu_push(stream);
if (this->output_labels_) {
batch->label_.data().get()->async_gpu_push(stream);
}
CUDA_CHECK(cudaStreamSynchronize(stream));//同步到GPU
}
#endif
prefetch_full_.push(batch);//加入到带负载的Batch队列中
}
} catch (boost::thread_interrupted&) {//捕获异常,退出while循环
// Interrupted exception is expected on shutdown
}
#ifndef CPU_ONLY
if (Caffe::mode() == Caffe::GPU) {
CUDA_CHECK(cudaStreamDestroy(stream));//销毁CUDA stream
}
#endif
}
开发者ID:huanyii,项目名称:caffe_read,代码行数:32,代码来源:base_data_layer.cpp
示例6: TEST
/**
* CUDA4 introduced the cudaMemcpyDefault direction to cudaMemcpy.
*/
TEST(MemcpyAsync, CheckDefaultDirection) {
cudaError_t ret;
cudaStream_t stream;
ret = cudaStreamCreate(&stream);
ASSERT_EQ(cudaSuccess, ret);
int a1 = 0;
int a2 = 0;
int * b;
ret = cudaMalloc((void**) &b, sizeof(*b));
ASSERT_EQ(cudaSuccess, ret);
ret = cudaMemcpyAsync(&a1, &a2, sizeof(a1), cudaMemcpyDefault, stream);
EXPECT_EQ(cudaSuccess, ret);
ret = cudaMemcpyAsync(&a1, b, sizeof(a1), cudaMemcpyDefault, stream);
EXPECT_EQ(cudaSuccess, ret);
ret = cudaMemcpyAsync(b, &a1, sizeof(a1), cudaMemcpyDefault, stream);
EXPECT_EQ(cudaSuccess, ret);
ret = cudaMemcpyAsync(b, b, sizeof(a1), cudaMemcpyDefault, stream);
EXPECT_EQ(cudaSuccess, ret);
ret = cudaStreamSynchronize(stream);
EXPECT_EQ(cudaSuccess, ret);
ret = cudaFree(b);
ASSERT_EQ(cudaSuccess, ret);
ret = cudaStreamDestroy(stream);
EXPECT_EQ(cudaSuccess, ret);
}
开发者ID:ckennelly,项目名称:panoptes,代码行数:37,代码来源:vtest_memcpyasync.cpp
示例7: TEST
TEST(StreamQuery, InvalidStream) {
::testing::FLAGS_gtest_death_test_style = "threadsafe";
cudaError_t ret;
cudaStream_t stream;
/* The CUDA 5.0 driver no longer segfaults. */
int driver;
ret = cudaDriverGetVersion(&driver);
ASSERT_EQ(cudaSuccess, ret);
ret = cudaStreamCreate(&stream);
ASSERT_EQ(cudaSuccess, ret);
ret = cudaStreamDestroy(stream);
ASSERT_EQ(cudaSuccess, ret);
if (driver >= 5000) {
ret = cudaStreamQuery(stream);
EXPECT_EQ(cudaErrorUnknown, ret);
} else {
EXPECT_EXIT({
cudaStreamQuery(stream); },
::testing::KilledBySignal(SIGSEGV), "");
}
开发者ID:ckennelly,项目名称:panoptes,代码行数:25,代码来源:test_streamquery.cpp
示例8: THCudaShutdown
void THCudaShutdown(THCState* state)
{
THCRandom_shutdown(state);
THCudaBlas_shutdown(state);
free(state->blasState);
free(state->rngState);
free(state->deviceProperties);
int prevDev = -1;
THCudaCheck(cudaGetDevice(&prevDev));
for (int dev = 0; dev < state->numDevices; ++dev) {
THCudaCheck(cudaSetDevice(dev));
/* Free Torch-defined streams (0 is the default stream) */
for (int stream = 1; stream <= state->numUserStreams; ++stream) {
THCudaCheck(cudaStreamDestroy(state->streamsPerDevice[dev][stream]));
}
free(state->streamsPerDevice[dev]);
}
free(state->streamsPerDevice);
THCudaCheck(cudaSetDevice(prevDev));
}
开发者ID:noa,项目名称:cutorch,代码行数:25,代码来源:THCGeneral.c
示例9: TEST
TEST(EventRecord, RecordAfterDestroy) {
::testing::FLAGS_gtest_death_test_style = "threadsafe";
cudaError_t ret;
cudaEvent_t event;
cudaStream_t stream;
ret = cudaEventCreate(&event);
ASSERT_EQ(cudaSuccess, ret);
ret = cudaEventDestroy(event);
EXPECT_EQ(cudaSuccess, ret);
ret = cudaStreamCreate(&stream);
ASSERT_EQ(cudaSuccess, ret);
#if CUDART_VERSION >= 5000
ret = cudaEventRecord(event);
EXPECT_EQ(cudaErrorUnknown, ret);
#else
EXPECT_EXIT(
cudaEventRecord(event, stream),
::testing::KilledBySignal(SIGSEGV), "");
#endif
ret = cudaStreamDestroy(stream);
EXPECT_EQ(cudaSuccess, ret);
}
开发者ID:ckennelly,项目名称:panoptes,代码行数:28,代码来源:test_eventrecord.cpp
示例10: cudaEventDestroy
__host__ __device__
~future()
{
if(valid())
{
#if __BULK_HAS_CUDART__
// swallow errors
cudaError_t e = cudaEventDestroy(m_event);
#if __BULK_HAS_PRINTF__
if(e)
{
printf("CUDA error after cudaEventDestroy in future dtor: %s", cudaGetErrorString(e));
} // end if
#endif // __BULK_HAS_PRINTF__
if(m_owns_stream)
{
e = cudaStreamDestroy(m_stream);
#if __BULK_HAS_PRINTF__
if(e)
{
printf("CUDA error after cudaStreamDestroy in future dtor: %s", cudaGetErrorString(e));
} // end if
#endif // __BULK_HAS_PRINTF__
} // end if
#endif
} // end if
} // end ~future()
开发者ID:0x0all,项目名称:thrust,代码行数:30,代码来源:future.hpp
示例11: CUDA_CHECK
void BasePrefetchingLabelmapDataLayer<Dtype>::InternalThreadEntry() {
#ifndef CPU_ONLY
cudaStream_t stream;
if (Caffe::mode() == Caffe::GPU) {
CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
}
#endif
try {
while (!must_stop()) {
LabelmapBatch<Dtype>* batch = prefetch_free_.pop();
load_batch(batch);
#ifndef CPU_ONLY
if (Caffe::mode() == Caffe::GPU) {
batch->data_.data().get()->async_gpu_push(stream);
CUDA_CHECK(cudaStreamSynchronize(stream));
}
#endif
prefetch_full_.push(batch);
}
} catch (boost::thread_interrupted&) {
// Interrupted exception is expected on shutdown
}
#ifndef CPU_ONLY
if (Caffe::mode() == Caffe::GPU) {
CUDA_CHECK(cudaStreamDestroy(stream));
}
#endif
}
开发者ID:AndrewChiyz,项目名称:hed,代码行数:29,代码来源:base_data_layer.cpp
示例12: cudaGetDevice
CudaStream::~CudaStream() {
int current_device; // Just to check CUDA status:
cudaError_t status = cudaGetDevice(¤t_device);
// Preventing dead lock while Caffe shutting down.
if (status != cudaErrorCudartUnloading) {
CUDA_CHECK(cudaStreamDestroy(stream_));
}
}
开发者ID:Caffe-MPI,项目名称:Caffe-MPI.github.io,代码行数:8,代码来源:common.cpp
示例13: ActivateDevice
GpuDevice::Impl::~Impl() {
ActivateDevice();
for (size_t i = 0; i < kParallelism; ++i) {
CUDNN_CALL(cudnnDestroy(cudnn_handle[i]));
CUBLAS_CALL(cublasDestroy(cublas_handle[i]));
CUDA_CALL(cudaStreamDestroy(stream[i]));
}
}
开发者ID:AI42,项目名称:minerva,代码行数:8,代码来源:device.cpp
示例14: CUDA_CHECK
NCCL<Dtype>::~NCCL() {
if (solver_->param().layer_wise_reduce()) {
CUDA_CHECK(cudaStreamDestroy(stream_));
}
if (comm_) {
ncclCommDestroy(comm_);
}
}
开发者ID:20337112,项目名称:caffe,代码行数:8,代码来源:parallel.cpp
示例15: cudaStreamDestroy
void BilateralFilterLayer<Dtype>::cudastream_free() {
#ifndef CPU_ONLY
if(stream_ != NULL) {
cudaStreamDestroy(*stream_);
delete [] stream_;
stream_ = NULL;
}
#endif
}
开发者ID:jasonbunk,项目名称:caffe,代码行数:9,代码来源:bilateral_filter_layer.cpp
示例16: get_gpu
JNIEXPORT jdouble JNICALL Java_org_apache_spark_mllib_classification_LogisticRegressionNative_predictPoint
(JNIEnv *env, jobject obj, jdoubleArray data, jdoubleArray weights, jdouble intercept) {
// the kernel is written to take multiple data sets and produce a set of results, but we're going
// to run it as multiple parallel kernels, each producing a single result instead
double *d_dataBuffer, *d_weightsBuffer, *d_score;
int dataCount, dataLen, whichGPU;
jdouble h_score, *h_dataBuffer, *h_weightsBuffer;
cudaStream_t stream;
// select a GPU for *this* specific dataset
whichGPU = get_gpu();
checkCudaErrors(cudaSetDevice(whichGPU));
checkCudaErrors(cudaStreamCreate(&stream));
// get a pointer to the raw input data, pinning them in memory
dataCount = env->GetArrayLength(data);
dataLen = dataCount*sizeof(double);
assert(dataCount == env->GetArrayLength(weights));
h_dataBuffer = (jdouble*) env->GetPrimitiveArrayCritical(data, 0);
h_weightsBuffer = (jdouble*) env->GetPrimitiveArrayCritical(weights, 0);
// copy input data to the GPU memory
// TODO: It may be better to access host memory directly, skipping the copy. Investigate.
checkCudaErrors(mallocBest((void**)&d_dataBuffer, dataLen));
checkCudaErrors(mallocBest((void**)&d_weightsBuffer, dataLen));
checkCudaErrors(cudaMemcpyAsync(d_dataBuffer, h_dataBuffer, dataLen, cudaMemcpyHostToDevice, stream));
checkCudaErrors(cudaMemcpyAsync(d_weightsBuffer, h_weightsBuffer, dataLen, cudaMemcpyHostToDevice, stream));
// synchronize before unpinning, and also because there is a device-device transfer in predictKernelDevice
checkCudaErrors(cudaStreamSynchronize(stream));
// un-pin the host arrays, as we're done with them
env->ReleasePrimitiveArrayCritical(data, h_dataBuffer, 0);
env->ReleasePrimitiveArrayCritical(weights, h_weightsBuffer, 0);
// allocate storage for the result
checkCudaErrors(mallocBest((void**)&d_score, sizeof(double)));
// run the kernel, to produce a result
predictKernelDevice(d_dataBuffer, d_weightsBuffer, intercept, d_score, 1, dataCount, stream);
checkCudaErrors(cudaStreamSynchronize(stream));
// copy result back to host
checkCudaErrors(cudaMemcpyAsync(&h_score, d_score, sizeof(double), cudaMemcpyDeviceToHost, stream));
checkCudaErrors(cudaStreamSynchronize(stream));
// Free the GPU buffers
checkCudaErrors(freeBest(d_dataBuffer));
checkCudaErrors(freeBest(d_weightsBuffer));
checkCudaErrors(freeBest(d_score));
checkCudaErrors(cudaStreamDestroy(stream));
return h_score;
}
开发者ID:IBMSparkGPU,项目名称:CUDA-MLlib,代码行数:56,代码来源:LogisticRegressionNative.cpp
示例17: CUDA_CALL
GpuDevice::~GpuDevice() {
CUDA_CALL(cudaSetDevice(device_));
pool_.WaitForAllFinished();
for (size_t i = 0; i < kParallelism; ++i) {
CUDNN_CALL(cudnnDestroy(cudnn_handle_[i]));
CUBLAS_CALL(cublasDestroy(cublas_handle_[i]));
CUDA_CALL(cudaStreamDestroy(stream_[i]));
}
delete data_store_;
}
开发者ID:Exlsunshine,项目名称:minerva,代码行数:10,代码来源:device.cpp
示例18: stopCache
bool ControlCubeCache::stopCache()
{
if (cudaSuccess != cudaStreamDestroy(_stream))
{
std::cerr<<"Control Cube Cache, cuda create stream error: "<<cudaGetErrorString(cudaGetLastError())<<std::endl;
return false;
}
return stopWork();
}
开发者ID:carlosduelo,项目名称:eqMivtRefactor,代码行数:10,代码来源:controlCubeCache.cpp
示例19: THCStream_free
void THCStream_free(THCStream* self)
{
if (!self || !self->stream) {
return;
}
if (THAtomicDecrementRef(&self->refcount)) {
THCudaCheckWarn(cudaStreamDestroy(self->stream));
free(self);
}
}
开发者ID:Jsmilemsj,项目名称:pytorch,代码行数:10,代码来源:THCStream.cpp
示例20: cudaDestroyTextureObject
SingleParticle2dx::Methods::CUDAProjectionMethod::~CUDAProjectionMethod ()
{
cudaDestroyTextureObject(m_texObj);
cudaFreeArray(m_cuArray);
cudaStreamDestroy(m_stream);
delete[] m_matrix;
delete m_t;
free(res_data_h);
cudaFree(res_data_d);
}
开发者ID:C-CINA,项目名称:2dx,代码行数:11,代码来源:CudaProjectionMethod.cpp
注:本文中的cudaStreamDestroy函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论