本文整理汇总了C++中cuMemAlloc函数的典型用法代码示例。如果您正苦于以下问题:C++ cuMemAlloc函数的具体用法?C++ cuMemAlloc怎么用?C++ cuMemAlloc使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了cuMemAlloc函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: DeallocateResources
const bool CUDARunner::AllocateResources(const int numb, const int numt)
{
bool allocated=true;
CUresult rval;
DeallocateResources();
m_in=(cuda_in *)malloc(sizeof(cuda_in));
m_out=(cuda_out *)malloc(numb*numt*sizeof(cuda_out));
rval=cuMemAlloc(&m_devin,sizeof(cuda_in));
if(rval!=CUDA_SUCCESS)
{
printf("Error %d allocating CUDA memory\n",rval);
m_devin=0;
allocated=false;
}
rval=cuMemAlloc(&m_devout,numb*numt*sizeof(cuda_out));
if(rval!=CUDA_SUCCESS)
{
printf("Error %d allocating CUDA memory\n",rval);
m_devout=0;
allocated=false;
}
printf("Done allocating CUDA resources for (%d,%d)\n",numb,numt);
return allocated;
}
开发者ID:1989gaurav,项目名称:rpcminer-mod,代码行数:27,代码来源:bitcoinminercuda.cpp
示例2: gpu_transpose_naive
void gpu_transpose_naive(int *dest, const int *src, int height, int width) {
assert((width & (width - 1)) == 0); // TODO
assert((height & (height - 1)) == 0);
cuda->set_default_module("transpose.ptx");
CUfunction transpose_kernel = cuda->get_kernel("transpose_naive");
int grid_dim_x = width / BLOCK_DIM_X;
int grid_dim_y = height / BLOCK_DIM_Y;
CUdeviceptr device_src;
CUdeviceptr device_dest;
cuMemAlloc(&device_src, width*height*sizeof(int));
cuMemAlloc(&device_dest, width*height*sizeof(int));
cuMemcpyHtoD(device_src, src, width*height*sizeof(int));
void *args[] = {&device_dest, &device_src, &height, &width};
cuda->launch_kernel_2d_sync(transpose_kernel,
grid_dim_x, grid_dim_y,
BLOCK_DIM_X, BLOCK_DIM_Y,
args);
cuMemcpyDtoH(dest, device_dest, width*height*sizeof(int));
cuMemFree(device_src);
cuMemFree(device_dest);
cuda->ctx_synchronize();
}
开发者ID:Larhard,项目名称:Objective-Cuda,代码行数:27,代码来源:transpose.cpp
示例3: get_dev_mem
/*
* get device memory
*/
void
get_dev_mem(void){
res = cuMemAlloc(&x_dev, N * sizeof(double));
if(res != CUDA_SUCCESS){
printf("cuMemAlloc(x) failed: res = %s\n", conv(res));
exit(1);
}
res = cuMemAlloc(&v_dev, N * sizeof(double));
if(res != CUDA_SUCCESS){
printf("cuMemAlloc(v) failed: res = %s\n", conv(res));
exit(1);
}
res = cuMemAlloc(&error_dev, sizeof(int));
if(res != CUDA_SUCCESS){
printf("cuMemAlloc(error) failed: res = %s\n", conv(res));
exit(1);
}
res = cuMemAlloc(&s_time_dev, sizeof(double));
if(res != CUDA_SUCCESS){
printf("cuMemAlloc(s_time) failed: res = %s\n", conv(res));
exit(1);
}
}
开发者ID:CPFL,项目名称:gtraffic,代码行数:31,代码来源:ov.c
示例4: gpu_transpose_with_shared_mem
void gpu_transpose_with_shared_mem(int *dest, const int *src, int height, int width) {
assert((width & (width - 1)) == 0); // TODO
assert((height & (height - 1)) == 0);
cuda->set_default_module(CUDA_PTX_PREFIX"transpose.cu.ptx");
CUfunction transpose_kernel = cuda->get_kernel("transpose_with_shared_mem");
int grid_dim_x = width / TILE_DIM;
int grid_dim_y = height / TILE_DIM;
CUdeviceptr device_src;
CUdeviceptr device_dest;
cuMemAlloc(&device_src, width*height*sizeof(int));
cuMemAlloc(&device_dest, width*height*sizeof(int));
cuMemcpyHtoD(device_src, src, width*height*sizeof(int));
void *args[] = {&device_dest, &device_src};
cuda->launch_kernel_2d_sync(transpose_kernel,
grid_dim_x, grid_dim_y,
TILE_DIM, 2,
args);
cuMemcpyDtoH(dest, device_dest, width*height*sizeof(int));
cuMemFree(device_src);
cuMemFree(device_dest);
cuda->ctx_synchronize();
}
开发者ID:Larhard,项目名称:Objective-Cuda,代码行数:27,代码来源:transpose.cpp
示例5: cuda_over_map
Object cuda_over_map(Object self, int nparts, int *argcv,
Object *argv, int flags) {
CUresult error;
cuInit(0);
int deviceCount = 0;
error = cuDeviceGetCount(&deviceCount);
if (deviceCount == 0) {
raiseError("No CUDA devices found");
}
CUdevice cuDevice;
CUcontext cuContext;
CUmodule cuModule;
CUfunction cuFunc;
error = cuDeviceGet(&cuDevice, 0);
error = cuCtxCreate(&cuContext, 0, cuDevice);
CUdeviceptr d_A;
CUdeviceptr d_B;
CUdeviceptr d_res;
errcheck(cuModuleLoad(&cuModule, grcstring(argv[argcv[0]])));
CUdeviceptr dps[argcv[0]];
void *args[argcv[0]+2];
int size = INT_MAX;
for (int i=0; i<argcv[0]; i++) {
struct CudaFloatArray *a = (struct CudaFloatArray *)argv[i];
if (a->size < size)
size = a->size;
errcheck(cuMemAlloc(&dps[i], size * sizeof(float)));
errcheck(cuMemcpyHtoD(dps[i], &a->data, size * sizeof(float)));
args[i+1] = &dps[i];
}
struct CudaFloatArray *r =
(struct CudaFloatArray *)(alloc_CudaFloatArray(size));
int fsize = sizeof(float) * size;
errcheck(cuMemAlloc(&d_res, fsize));
errcheck(cuMemcpyHtoD(d_res, &r->data, fsize));
args[0] = &d_res;
args[argcv[0]+1] = &size;
int threadsPerBlock = 256;
int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
char name[256];
strcpy(name, "block");
strcat(name, grcstring(argv[argcv[0]]) + strlen("_cuda/"));
for (int i=0; name[i] != 0; i++)
if (name[i] == '.') {
name[i] = 0;
break;
}
errcheck(cuModuleGetFunction(&cuFunc, cuModule, name));
errcheck(cuLaunchKernel(cuFunc, blocksPerGrid, 1, 1,
threadsPerBlock, 1, 1,
0,
NULL, args, NULL));
errcheck(cuMemcpyDtoH(&r->data, d_res, fsize));
cuMemFree(d_res);
for (int i=0; i<argcv[0]; i++)
cuMemFree(dps[i]);
return (Object)r;
}
开发者ID:mwh,项目名称:grace-cuda,代码行数:59,代码来源:cuda.c
示例6: main
/**
* This measures the overhead in launching a kernel function on each GPU in the
* system.
*
* It does this by executing a small kernel (copying 1 value in global memory) a
* very large number of times and taking the average execution time. This
* program uses the CUDA driver API.
*/
int main() {
CU_ERROR_CHECK(cuInit(0));
int count;
CU_ERROR_CHECK(cuDeviceGetCount(&count));
float x = 5.0f;
for (int d = 0; d < count; d++) {
CUdevice device;
CU_ERROR_CHECK(cuDeviceGet(&device, d));
CUcontext context;
CU_ERROR_CHECK(cuCtxCreate(&context, 0, device));
CUdeviceptr in, out;
CU_ERROR_CHECK(cuMemAlloc(&in, sizeof(float)));
CU_ERROR_CHECK(cuMemAlloc(&out, sizeof(float)));
CU_ERROR_CHECK(cuMemcpyHtoD(in, &x, sizeof(float)));
CUmodule module;
CU_ERROR_CHECK(cuModuleLoadData(&module, imageBytes));
CUfunction function;
CU_ERROR_CHECK(cuModuleGetFunction(&function, module, "kernel"));
void * params[] = { &in, &out };
CUevent start, stop;
CU_ERROR_CHECK(cuEventCreate(&start, 0));
CU_ERROR_CHECK(cuEventCreate(&stop, 0));
CU_ERROR_CHECK(cuEventRecord(start, 0));
for (int i = 0; i < ITERATIONS; i++)
CU_ERROR_CHECK(cuLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, params, NULL));
CU_ERROR_CHECK(cuEventRecord(stop, 0));
CU_ERROR_CHECK(cuEventSynchronize(stop));
float time;
CU_ERROR_CHECK(cuEventElapsedTime(&time, start, stop));
CU_ERROR_CHECK(cuEventDestroy(start));
CU_ERROR_CHECK(cuEventDestroy(stop));
CU_ERROR_CHECK(cuMemFree(in));
CU_ERROR_CHECK(cuMemFree(out));
fprintf(stdout, "Device %d: %fms\n", d, (time / (double)ITERATIONS));
CU_ERROR_CHECK(cuModuleUnload(module));
CU_ERROR_CHECK(cuCtxDestroy(context));
}
return 0;
}
开发者ID:garymacindoe,项目名称:cuda-cholesky,代码行数:64,代码来源:kernel-test.c
示例7: TestSAXPY
CUresult
TestSAXPY( chCUDADevice *chDevice, size_t N, float alpha )
{
CUresult status;
CUdeviceptr dptrOut = 0;
CUdeviceptr dptrIn = 0;
float *hostOut = 0;
float *hostIn = 0;
CUDA_CHECK( cuCtxPushCurrent( chDevice->context() ) );
CUDA_CHECK( cuMemAlloc( &dptrOut, N*sizeof(float) ) );
CUDA_CHECK( cuMemsetD32( dptrOut, 0, N ) );
CUDA_CHECK( cuMemAlloc( &dptrIn, N*sizeof(float) ) );
CUDA_CHECK( cuMemHostAlloc( (void **) &hostOut, N*sizeof(float), 0 ) );
CUDA_CHECK( cuMemHostAlloc( (void **) &hostIn, N*sizeof(float), 0 ) );
for ( size_t i = 0; i < N; i++ ) {
hostIn[i] = (float) rand() / (float) RAND_MAX;
}
CUDA_CHECK( cuMemcpyHtoDAsync( dptrIn, hostIn, N*sizeof(float ), NULL ) );
{
CUmodule moduleSAXPY;
CUfunction kernelSAXPY;
void *params[] = { &dptrOut, &dptrIn, &N, &alpha };
moduleSAXPY = chDevice->module( "saxpy.ptx" );
if ( ! moduleSAXPY ) {
status = CUDA_ERROR_NOT_FOUND;
goto Error;
}
CUDA_CHECK( cuModuleGetFunction( &kernelSAXPY, moduleSAXPY, "saxpy" ) );
CUDA_CHECK( cuLaunchKernel( kernelSAXPY, 1500, 1, 1, 512, 1, 1, 0, NULL, params, NULL ) );
}
CUDA_CHECK( cuMemcpyDtoHAsync( hostOut, dptrOut, N*sizeof(float), NULL ) );
CUDA_CHECK( cuCtxSynchronize() );
for ( size_t i = 0; i < N; i++ ) {
if ( fabsf( hostOut[i] - alpha*hostIn[i] ) > 1e-5f ) {
status = CUDA_ERROR_UNKNOWN;
goto Error;
}
}
status = CUDA_SUCCESS;
printf( "Well it worked!\n" );
Error:
cuCtxPopCurrent( NULL );
cuMemFreeHost( hostOut );
cuMemFreeHost( hostIn );
cuMemFree( dptrOut );
cuMemFree( dptrIn );
return status;
}
开发者ID:AnilVarmaBiruduraju,项目名称:cudahandbook,代码行数:56,代码来源:saxpyDrv.cpp
示例8: try_init
void *swanMalloc( size_t len ) {
void *ptr;
CUdeviceptr dptr;
CUresult err;
try_init();
if( len == 0 ) {
// printf("SWAN: WARNING - swnaMalloc() called with 0\n");
return NULL;
}
err = cuMemAlloc( (CUdeviceptr*) &dptr, len );
ptr = (void*)dptr;
if ( err != CUDA_SUCCESS ) {
printf("Attempted to allocate %lu bytes (%lu already allocated)\n", len, state.bytes_allocated );
abort();
error("swanMalloc failed\n" );
}
state.bytes_allocated += len;
// MJH likes his memory clean
swanMemset( ptr, 0, len );
return ptr;
}
开发者ID:liyuming1978,项目名称:swan,代码行数:26,代码来源:swanlib_nv.c
示例9: halide_dev_malloc
WEAK void halide_dev_malloc(void *user_context, buffer_t* buf) {
if (buf->dev) {
// This buffer already has a device allocation
return;
}
size_t size = __buf_size(user_context, buf);
#ifdef DEBUG
halide_printf(user_context, "dev_malloc allocating buffer of %zd bytes, "
"extents: %zdx%zdx%zdx%zd strides: %zdx%zdx%zdx%zd (%d bytes per element)\n",
size, buf->extent[0], buf->extent[1], buf->extent[2], buf->extent[3],
buf->stride[0], buf->stride[1], buf->stride[2], buf->stride[3],
buf->elem_size);
#endif
CUdeviceptr p;
TIME_CALL( cuMemAlloc(&p, size), "dev_malloc");
buf->dev = (uint64_t)p;
halide_assert(user_context, buf->dev);
#ifdef DEBUG
halide_assert(user_context, halide_validate_dev_pointer(user_context, buf));
#endif
}
开发者ID:EEmmanuel7,项目名称:Halide,代码行数:26,代码来源:cuda.cpp
示例10: get_read_ptr_cuda
CUdeviceptr get_read_ptr_cuda(ComputeEnv *env,int devid, size_t read_byte_size) {
if (cuda_valid_list[devid]) {
return cuda_ptr_list[devid];
}
if (host_valid == false) {
/* xx */
abort();
return 0;
}
CUDADev *dev = &env->cuda_dev_list[devid];
cuCtxPushCurrent(dev->context);
if (cuda_ptr_list[devid] == 0) {
CUresult err;
err = cuMemAlloc(&cuda_ptr_list[devid], byte_size);
if (err != CUDA_SUCCESS) {
abort();
}
}
//double t0 = getsec();
cuMemcpyHtoD(cuda_ptr_list[devid], host_ptr, read_byte_size);
//double t1 = getsec();
//env->transfer_wait = t1-t0;
cuda_valid_list[devid] = true;
CUcontext old;
cuCtxPopCurrent(&old);
return cuda_ptr_list[devid];
}
开发者ID:vitaliylag,项目名称:waifu2x-converter-cpp,代码行数:34,代码来源:Buffer.hpp
示例11: main
int main(){
init_test();
const std::string source =
".version 4.2\n"
".target sm_20\n"
".address_size 64\n"
".visible .entry kernel(.param .u64 kernel_param_0) {\n"
".reg .s32 %r<2>;\n"
".reg .s64 %rd<3>;\n"
"bra BB1_2;\n"
"ld.param.u64 %rd1, [kernel_param_0];\n"
"cvta.to.global.u64 %rd2, %rd1;\n"
"mov.u32 %r1, 5;\n"
"st.global.u32 [%rd2], %r1;\n"
"BB1_2: ret;\n"
"}\n";
CUmodule modId = 0;
CUfunction funcHandle = 0;
cu_assert(cuModuleLoadData(&modId, source.c_str()));
cu_assert(cuModuleGetFunction(&funcHandle, modId, "kernel"));
CUdeviceptr devValue;
int hostValue = 10;
cu_assert(cuMemAlloc(&devValue, sizeof(int)));
cu_assert(cuMemcpyHtoD(devValue, &hostValue, sizeof(hostValue)));
void * params[] = {&devValue};
cu_assert(cuLaunchKernel(funcHandle, 1,1,1, 1,1,1, 0,0, params, nullptr));
cu_assert(cuMemcpyDtoH(&hostValue, devValue, sizeof(hostValue)));
assert(hostValue == 10);
std::cout << hostValue << "\n";
cu_assert(cuMemFree(devValue));
cu_assert(cuModuleUnload(modId));
return 0;
}
开发者ID:sebsgit,项目名称:gemu,代码行数:33,代码来源:basic_branch.cpp
示例12: sararfftnd_one_complex_to_real
void sararfftnd_one_complex_to_real(
sararfftnd_plan plan, sarafft_complex *h_data
) {
CUdeviceptr d_data;
size_t planSize = getPlanSize( plan );
if ( CUDA_SUCCESS != cuMemAlloc( &d_data, planSize ) ) {
printf( "cuMemAlloc failed for plansize %li!\n", planSize );
fflush ( stdout );
exit( 90 );
}
if ( CUDA_SUCCESS != cuMemcpyHtoD( d_data, h_data, planSize ) ) {
printf( "cuMemcpyHtoD failed!\n" );
fflush ( stdout );
exit( 91 );
}
if ( CUFFT_SUCCESS != cufftExecC2R( plan, ( cufftComplex* )d_data, ( cufftReal* )d_data ) ) {
printf( "cufftExecR2C failed!\n" );
fflush ( stdout );
exit( 92 );
}
if ( CUDA_SUCCESS != cuMemcpyDtoH( h_data, d_data, planSize ) ) {
printf( "cuMemcpyDtoH failed!\n" );
fflush ( stdout );
exit( 93 );
}
if ( CUDA_SUCCESS != cuMemFree( d_data ) ) {
printf( "cuMemFree failed!\n" );
fflush ( stdout );
exit( 94 );
}
}
开发者ID:sara-nl,项目名称:Omnimatch,代码行数:31,代码来源:saracufft.cpp
示例13: prealloc
bool prealloc(ComputeEnv *env) {
int devid;
if (host_ptr == nullptr) {
host_ptr = _mm_malloc(byte_size, 64);
if (host_ptr == nullptr) {
return false;
}
}
switch (env->target_processor.type) {
case W2XCONV_PROC_HOST:
break;
case W2XCONV_PROC_OPENCL:
devid = env->target_processor.devid;
if (cl_ptr_list[devid] == nullptr) {
cl_int err;
OpenCLDev *dev = &env->cl_dev_list[devid];
cl_ptr_list[devid] = clCreateBuffer(dev->context,
CL_MEM_READ_WRITE,
byte_size, nullptr, &err);
if (cl_ptr_list[devid] == nullptr) {
return false;
}
/* touch memory to force allocation */
char data = 0;
err = clEnqueueWriteBuffer(dev->queue, cl_ptr_list[devid],
CL_TRUE, 0, 1, &data, 0, nullptr, nullptr);
if (err != CL_SUCCESS) {
clReleaseMemObject(cl_ptr_list[devid]);
cl_ptr_list[devid] = nullptr;
return false;
}
}
break;
case W2XCONV_PROC_CUDA:
devid = env->target_processor.devid;
if (cuda_ptr_list[devid] == 0) {
CUresult err;
CUDADev *dev = &env->cuda_dev_list[devid];
cuCtxPushCurrent(dev->context);
err = cuMemAlloc(&cuda_ptr_list[devid], byte_size);
CUcontext old;
cuCtxPopCurrent(&old);
if (err != CUDA_SUCCESS) {
return false;
}
}
break;
}
return true;
}
开发者ID:vitaliylag,项目名称:waifu2x-converter-cpp,代码行数:59,代码来源:Buffer.hpp
示例14: mem_alloc
void mem_alloc(device_memory& mem, MemoryType type)
{
cuda_push_context();
CUdeviceptr device_pointer;
cuda_assert(cuMemAlloc(&device_pointer, mem.memory_size()))
mem.device_pointer = (device_ptr)device_pointer;
cuda_pop_context();
}
开发者ID:vanangamudi,项目名称:blender-main,代码行数:8,代码来源:device_cuda.cpp
示例15: mem
CUresult CuContext::ByteAlloc(size_t size, DeviceMemPtr* ppMem) {
DeviceMemPtr mem(new CuDeviceMem);
CUresult result = cuMemAlloc(&mem->_deviceptr, size);
HANDLE_RESULT();
mem->_size = size;
mem->_context = this;
ppMem->swap(mem);
return CUDA_SUCCESS;
}
开发者ID:CindyYang85,项目名称:mgpu,代码行数:10,代码来源:cucpp.cpp
示例16: device_vector
/// Allocates memory buffer on the device associated with the given queue.
device_vector(const command_queue &q, size_t n) : n(n) {
if (n) {
q.context().set_current();
CUdeviceptr ptr;
cuda_check( cuMemAlloc(&ptr, n * sizeof(T)) );
buffer.reset(reinterpret_cast<char*>(static_cast<size_t>(ptr)), detail::deleter() );
}
}
开发者ID:azuredsky,项目名称:vexcl,代码行数:11,代码来源:device_vector.hpp
示例17: FAIL
static gpudata *cuda_alloc(void *c, size_t size, void *data, int flags,
int *ret) {
gpudata *res;
cuda_context *ctx = (cuda_context *)c;
int fl = CU_EVENT_DISABLE_TIMING;
if ((flags & GA_BUFFER_INIT) && data == NULL) FAIL(NULL, GA_VALUE_ERROR);
if ((flags & (GA_BUFFER_READ_ONLY|GA_BUFFER_WRITE_ONLY)) ==
(GA_BUFFER_READ_ONLY|GA_BUFFER_WRITE_ONLY)) FAIL(NULL, GA_VALUE_ERROR);
/* TODO: figure out how to make this work */
if (flags & GA_BUFFER_HOST) FAIL(NULL, GA_DEVSUP_ERROR);
res = malloc(sizeof(*res));
if (res == NULL) FAIL(NULL, GA_SYS_ERROR);
res->refcnt = 1;
res->sz = size;
res->flags = flags & (GA_BUFFER_READ_ONLY|GA_BUFFER_WRITE_ONLY);
cuda_enter(ctx);
if (ctx->err != CUDA_SUCCESS) {
free(res);
FAIL(NULL, GA_IMPL_ERROR);
}
if (ctx->flags & GA_CTX_MULTI_THREAD)
fl |= CU_EVENT_BLOCKING_SYNC;
ctx->err = cuEventCreate(&res->ev, fl);
if (ctx->err != CUDA_SUCCESS) {
free(res);
cuda_exit(ctx);
FAIL(NULL, GA_IMPL_ERROR);
}
if (size == 0) size = 1;
ctx->err = cuMemAlloc(&res->ptr, size);
if (ctx->err != CUDA_SUCCESS) {
cuEventDestroy(res->ev);
free(res);
cuda_exit(ctx);
FAIL(NULL, GA_IMPL_ERROR);
}
res->ctx = ctx;
ctx->refcnt++;
if (flags & GA_BUFFER_INIT) {
ctx->err = cuMemcpyHtoD(res->ptr, data, size);
if (ctx->err != CUDA_SUCCESS) {
cuda_free(res);
FAIL(NULL, GA_IMPL_ERROR)
}
开发者ID:gburachas,项目名称:libgpuarray,代码行数:54,代码来源:gpuarray_buffer_cuda.c
示例18: halide_dev_malloc
WEAK int halide_dev_malloc(void *user_context, buffer_t *buf) {
DEBUG_PRINTF( user_context, "CUDA: halide_dev_malloc (user_context: %p, buf: %p)\n", user_context, buf );
CudaContext ctx(user_context);
if (ctx.error != CUDA_SUCCESS) {
return ctx.error;
}
size_t size = _buf_size(user_context, buf);
if (buf->dev) {
// This buffer already has a device allocation
halide_assert(user_context, halide_validate_dev_pointer(user_context, buf, size));
return 0;
}
halide_assert(user_context, buf->stride[0] >= 0 && buf->stride[1] >= 0 &&
buf->stride[2] >= 0 && buf->stride[3] >= 0);
DEBUG_PRINTF(user_context, " allocating buffer of %lld bytes, "
"extents: %lldx%lldx%lldx%lld strides: %lldx%lldx%lldx%lld (%d bytes per element)\n",
(long long)size,
(long long)buf->extent[0], (long long)buf->extent[1],
(long long)buf->extent[2], (long long)buf->extent[3],
(long long)buf->stride[0], (long long)buf->stride[1],
(long long)buf->stride[2], (long long)buf->stride[3],
buf->elem_size);
#ifdef DEBUG
uint64_t t_before = halide_current_time_ns(user_context);
#endif
CUdeviceptr p;
DEBUG_PRINTF( user_context, " cuMemAlloc %lld -> ", size );
CUresult err = cuMemAlloc(&p, size);
if (err != CUDA_SUCCESS) {
DEBUG_PRINTF( user_context, "%s\n", _get_error_name(err));
halide_error_varargs(user_context, "CUDA: cuMemAlloc failed (%s)",
_get_error_name(err));
return err;
} else {
DEBUG_PRINTF( user_context, "%p\n", p );
}
halide_assert(user_context, p);
buf->dev = (uint64_t)p;
#ifdef DEBUG
uint64_t t_after = halide_current_time_ns(user_context);
halide_printf(user_context, " Time: %f ms\n", (t_after - t_before) / 1.0e6);
#endif
return 0;
}
开发者ID:bnascimento,项目名称:Halide,代码行数:52,代码来源:cuda.cpp
示例19: nvptx_alloc
static void *
nvptx_alloc (size_t s)
{
CUdeviceptr d;
CUresult r;
r = cuMemAlloc (&d, s);
if (r == CUDA_ERROR_OUT_OF_MEMORY)
return 0;
if (r != CUDA_SUCCESS)
GOMP_PLUGIN_fatal ("cuMemAlloc error: %s", cuda_error (r));
return (void *)d;
}
开发者ID:chinabin,项目名称:gcc-tiny,代码行数:13,代码来源:plugin-nvptx.c
示例20: VCL_CU_SAFE_CALL
void Buffer::allocate()
{
if (hostAccess() == BufferAccess::Unified)
{
// Allocate the required memory
VCL_CU_SAFE_CALL(cuMemAllocManaged(&_devicePtr, size(), CU_MEM_ATTACH_GLOBAL));
}
else
{
// Allocate the required device memory
VCL_CU_SAFE_CALL(cuMemAlloc(&_devicePtr, size()));
}
}
开发者ID:bfierz,项目名称:vcl,代码行数:13,代码来源:buffer.cpp
注:本文中的cuMemAlloc函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论