本文整理汇总了C++中cutilSafeCall函数的典型用法代码示例。如果您正苦于以下问题:C++ cutilSafeCall函数的具体用法?C++ cutilSafeCall怎么用?C++ cutilSafeCall使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了cutilSafeCall函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: AllocateResources
const unsigned long CUDARunner::RunStep()
{
unsigned int best=0;
unsigned int bestg=~0;
if(m_in==0 || m_out==0 || m_devin==0 || m_devout==0)
{
AllocateResources(m_numb,m_numt);
}
cutilSafeCall(cudaMemcpy(m_devin,m_in,sizeof(cuda_in),cudaMemcpyHostToDevice));
cuda_process_helper(m_devin,m_devout,GetStepIterations(),GetStepBitShift(),m_numb,m_numt);
cutilSafeCall(cudaMemcpy(m_out,m_devout,m_numb*m_numt*sizeof(cuda_out),cudaMemcpyDeviceToHost));
for(int i=0; i<m_numb*m_numt; i++)
{
if(m_out[i].m_bestnonce!=0 && m_out[i].m_bestg<bestg)
{
best=m_out[i].m_bestnonce;
bestg=m_out[i].m_bestg;
}
}
return CryptoPP::ByteReverse(best);
}
开发者ID:chancn,项目名称:bitcoin-pool,代码行数:28,代码来源:bitcoinminercuda.cpp
示例2: benchmark
void
benchmark(int iterations)
{
// allocate memory for result
unsigned int *d_result;
unsigned int size = width * height * sizeof(unsigned int);
cutilSafeCall( cudaMalloc( (void**) &d_result, size));
// warm-up
gaussianFilterRGBA(d_img, d_result, d_temp, width, height, sigma, order, nthreads);
cutilSafeCall( cudaThreadSynchronize() );
cutilCheckError( cutStartTimer( timer));
// execute the kernel
for(int i=0; i<iterations; i++) {
gaussianFilterRGBA(d_img, d_result, d_temp, width, height, sigma, order, nthreads);
}
cutilSafeCall( cudaThreadSynchronize() );
cutilCheckError( cutStopTimer( timer));
// check if kernel execution generated an error
cutilCheckMsg("Kernel execution failed");
printf("Processing time: %f (ms)\n", cutGetTimerValue( timer));
printf("%.2f Mpixels/sec\n", (width*height*iterations / (cutGetTimerValue( timer) / 1000.0f)) / 1e6);
cutilSafeCall(cudaFree(d_result));
}
开发者ID:AnkurAnandapu,项目名称:ocelot-fork,代码行数:30,代码来源:recursiveGaussian-host.cpp
示例3: rayIntervalSplatting
void CUDARayCastSDF::render(const HashData& hashData, const HashParams& hashParams, const DepthCameraData& cameraData, const mat4f& lastRigidTransform)
{
rayIntervalSplatting(hashData, hashParams, cameraData, lastRigidTransform);
m_data.d_rayIntervalSplatMinArray = m_rayIntervalSplatting.mapMinToCuda();
m_data.d_rayIntervalSplatMaxArray = m_rayIntervalSplatting.mapMaxToCuda();
// Start query for timing
if(GlobalAppState::getInstance().s_timingsDetailledEnabled)
{
cutilSafeCall(cudaDeviceSynchronize());
m_timer.start();
}
renderCS(hashData, m_data, cameraData, m_params);
//convertToCameraSpace(cameraData);
if (!m_params.m_useGradients)
{
computeNormals(m_data.d_normals, m_data.d_depth4, m_params.m_width, m_params.m_height);
}
m_rayIntervalSplatting.unmapCuda();
// Wait for query
if(GlobalAppState::getInstance().s_timingsDetailledEnabled)
{
cutilSafeCall(cudaDeviceSynchronize());
m_timer.stop();
TimingLog::totalTimeRayCast+=m_timer.getElapsedTimeMS();
TimingLog::countTimeRayCast++;
}
}
开发者ID:ZaneYang,项目名称:VoxelHashing,代码行数:32,代码来源:CUDARayCastSDF.cpp
示例4: linearbackwardeuler_init
__HOST__
int linearbackwardeuler_init(solver_props *props){
solver_mem *mem;
linearbackwardeuler_opts *opts = (linearbackwardeuler_opts*)&props->opts;
unsigned int bandwidth = opts->upperhalfbw + opts->lowerhalfbw + 1;
#if defined TARGET_GPU
// Allocates GPU global memory for solver's persistent data
switch(opts->lsolver){
case LSOLVER_DENSE:
cutilSafeCall(cudaMalloc((void **)&mem, PARALLEL_MODELS * props->statesize * props->statesize * sizeof(CDATAFORMAT)));
break;
case LSOLVER_BANDED:
cutilSafeCall(cudaMalloc((void **)&mem, PARALLEL_MODELS * props->statesize * bandwidth * sizeof(CDATAFORMAT)));
break;
default:
return 1;
}
#else // CPU and OPENMP targets
switch(opts->lsolver){
case LSOLVER_DENSE:
mem = (solver_mem *)malloc(PARALLEL_MODELS * props->statesize * props->statesize * sizeof(CDATAFORMAT));
break;
case LSOLVER_BANDED:
mem = (solver_mem *)malloc(PARALLEL_MODELS * props->statesize * bandwidth * sizeof(CDATAFORMAT));
break;
default:
return 1;
}
#endif
props->mem = mem; /* The matrix */
return 0;
}
开发者ID:joshuaecook,项目名称:simengine,代码行数:35,代码来源:linearbackwardeuler.c
示例5: SOLVER
void SOLVER(bogacki_shampine, free, TARGET, SIMENGINE_STORAGE, bogacki_shampine_mem *mem) {
#if defined TARGET_GPU
bogacki_shampine_mem tmem;
cutilSafeCall(cudaMemcpy(&tmem, mem, sizeof(bogacki_shampine_mem), cudaMemcpyDeviceToHost));
GPU_ENTRY(free_props, SIMENGINE_STORAGE, tmem.props);
cutilSafeCall(cudaFree(tmem.k1));
cutilSafeCall(cudaFree(tmem.k2));
cutilSafeCall(cudaFree(tmem.k3));
cutilSafeCall(cudaFree(tmem.k4));
cutilSafeCall(cudaFree(tmem.temp));
cutilSafeCall(cudaFree(tmem.next_states));
cutilSafeCall(cudaFree(tmem.z_next_states));
cutilSafeCall(cudaFree(tmem.cur_timestep));
cutilSafeCall(cudaFree(mem));
GPU_ENTRY(exit, SIMENGINE_STORAGE);
#else // Used for CPU and OPENMP targets
free(mem->k1);
free(mem->k2);
free(mem->k3);
free(mem->k4);
free(mem->temp);
free(mem->next_states);
free(mem->z_next_states);
free(mem->cur_timestep);
free(mem);
#endif
}
开发者ID:joshuaecook,项目名称:simengine,代码行数:33,代码来源:bogacki_shampine.c
示例6: checkResultCuda
////////////////////////////////////////////////////////////////////////////////
//! Check if the result is correct or write data to file for external
//! regression testing
////////////////////////////////////////////////////////////////////////////////
void checkResultCuda(int argc, char** argv, const GLuint& vbo)
{
cutilSafeCall(cudaGLUnregisterBufferObject(vbo));
// map buffer object
glBindBuffer(GL_ARRAY_BUFFER_ARB, vbo );
float* data = (float*) glMapBuffer(GL_ARRAY_BUFFER, GL_READ_ONLY);
// check result
if(cutCheckCmdLineFlag(argc, (const char**) argv, "regression")) {
// write file for regression test
cutilCheckError(cutWriteFilef("./data/regression.dat",
data, mesh_width * mesh_height * 3, 0.0));
}
// unmap GL buffer object
if(! glUnmapBuffer(GL_ARRAY_BUFFER)) {
fprintf(stderr, "Unmap buffer failed.\n");
fflush(stderr);
}
cutilSafeCall(cudaGLRegisterBufferObject(vbo));
CUT_CHECK_ERROR_GL();
}
开发者ID:AnkurAnandapu,项目名称:ocelot-fork,代码行数:29,代码来源:simpleGL.cpp
示例7: test
bool test(int (*setup)(float ** dev_ptr_ptr, unsigned int * w_ptr, unsigned int * h_ptr)) {
float * dev;
unsigned int width;
unsigned int height;
setup(&dev, &width, &height);
float * val = sat_scan_gold<float>(dev,width, height);
cutilSafeCall(cudaFree(dev));
setup(&dev, &width, &height);
float * yours = sat_scan<float, kind>(dev,width, height);
cutilSafeCall(cudaFree(dev));
float EPSILON = 0.1f;
bool isItGood = true;
/*
for (unsigned int i = 0; i < len; i++) {
printf("%f, ", val[i]);
}
printf("\n");
for (unsigned int i = 0; i < len; i++) {
printf("%f, ", yours[i]);
}
printf("\n");
*/
for (unsigned int i = 0; i < width * height; i++) {
if (!(abs(val[i] - yours[i]) < EPSILON)) {
printf("V: %f Y: %f at %d\n", val[i], yours[i], i);
isItGood = false;
break;
}
}
free(val);
free(yours);
return isItGood;
}
开发者ID:eclipse0922,项目名称:cbench-cis565s11,代码行数:35,代码来源:sat_scan.cpp
示例8: GPU_ENTRY
forwardeuler_mem *SOLVER(forwardeuler, init, TARGET, SIMENGINE_STORAGE, solver_props *props) {
#if defined TARGET_GPU
GPU_ENTRY(init, SIMENGINE_STORAGE);
// Temporary CPU copies of GPU datastructures
forwardeuler_mem tmem;
// GPU datastructures
forwardeuler_mem *dmem;
// Allocate GPU space for mem and pointer fields of mem (other than props)
cutilSafeCall(cudaMalloc((void**)&dmem, sizeof(forwardeuler_mem)));
tmem.props = GPU_ENTRY(init_props, SIMENGINE_STORAGE, props);;
cutilSafeCall(cudaMalloc((void**)&tmem.k1, props->statesize*props->num_models*sizeof(CDATAFORMAT)));
// Copy mem structure to GPU
cutilSafeCall(cudaMemcpy(dmem, &tmem, sizeof(forwardeuler_mem), cudaMemcpyHostToDevice));
return dmem;
#else // Used for CPU and OPENMP targets
forwardeuler_mem *mem = (forwardeuler_mem*)malloc(sizeof(forwardeuler_mem));
mem->props = props;
mem->k1 = (CDATAFORMAT*)malloc(props->statesize*props->num_models*sizeof(CDATAFORMAT));
return mem;
#endif // defined TARGET_GPU
}
开发者ID:joshuaecook,项目名称:simengine,代码行数:30,代码来源:forwardeuler.c
示例9: load_tables_as_textures
void CUDAMarcher::PrepareTerrain()
{
mCudaEdgeTable = 0;
mCudaTriTable = 0;
mCudaVertTable = 0;
mCudaPerlinDst1 = 0;
mCudaPerlinDst2 = 0;
mCudaPerlinDst3 = 0;
//-- Load tables
load_tables_as_textures( &mCudaEdgeTable, &mCudaTriTable, &mCudaVertTable );
//-- Create and load perlin data
host_PerlinInitialize(0);
host_InitPerlinData(PERLIN_DATA_RANK, PERLIN_DATA_SIZE);
unsigned int bufsize = sizeof(uint) * MARCHING_BLOCK_SIZE*MARCHING_BLOCK_SIZE*MARCHING_BLOCK_SIZE;
cutilSafeCall(cudaMalloc((void**) &CUDABlock::cuda_voxelVerts, bufsize));
cutilSafeCall(cudaMalloc((void**) &CUDABlock::cuda_voxelVertsScan, bufsize));
cutilSafeCall(cudaMalloc((void**) &CUDABlock::cuda_voxelOccupied, bufsize));
cutilSafeCall(cudaMalloc((void**) &CUDABlock::cuda_voxelOccupiedScan, bufsize));
cutilSafeCall(cudaMalloc((void**) &CUDABlock::cuda_compVoxelArray, bufsize));
//Init blocks
Init(4, 3, 4);
}
开发者ID:zeroZshadow,项目名称:agt6-cuda,代码行数:26,代码来源:CUDAMarcher.cpp
示例10: allocCuda0
/**
* Allocates a vector in GPU and initializes it to zero.
*
* @param size length of the vector to be allocated.
* @return the pointer to the GPU allocated memory.
*/
void* allocCuda0(int size) {
void* out;
cutilSafeCall( cudaMalloc((void**) &out, size));
cutilSafeCall( cudaMemset(out, 0, size));
if (DEBUG) printf("allocCuda(%d): %p\n", size, out);
return out;
}
开发者ID:edanssandes,项目名称:MASA-CUDAlign,代码行数:13,代码来源:cuda_util.cpp
示例11: raytrace
void raytrace()
{
uint* imagedata;
cutilSafeCall(cudaGLMapBufferObject((void**)&imagedata, pbo));
float3 A, B, C;
camera.getImagePlane(A, B, C);
dev_camera d_cam(camera.getPosition(), A, B, C, aperture, focal);
dev_light d_light(light.getPosition(), light.getColor(), 4096);
//need to change here.
float3 minAABB, maxAABB;
world.getAABB(minAABB, maxAABB);
sceneInfo scene(world.getNumTriangles(), world.getNumSpheres(), world.getNumBoxes(), minAABB, maxAABB);
//TODO: add control for clear buffer here.
//change here for the many object case
raytraceImage(imagedata, dev_lastframe_ptr, dev_num_layers, r_width, r_height, moved, d_cam, d_light, scene);
//for showing the real frame rate
cudaMemcpy(&frame_num, dev_num_layers, sizeof(float), cudaMemcpyDeviceToHost);
frame_num++;
cudaMemcpy(dev_num_layers, &frame_num, sizeof(int), cudaMemcpyHostToDevice);
cutilSafeCall(cudaGLUnmapBufferObject(pbo));
//download texture from pbo
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo);
glBindTexture(GL_TEXTURE_2D, framebuffer);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, r_width, r_height, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
CUT_CHECK_ERROR_GL();
}
开发者ID:duxing,项目名称:GPUFinal,代码行数:30,代码来源:main.cpp
示例12: allocCudaSeq
/**
* Allocates and copies sequence data in GPU.
*
* @param data the sequence data string.
* @param len the length of the sequence.
* @param padding_len extra padding length.
* @param padding_char character to be used as padding.
*
* @return the pointer to the GPU memory allocated for the sequence.
*/
unsigned char* allocCudaSeq(const char* data, const int len, const int padding_len, const char padding_char) {
unsigned char* out = (unsigned char*)allocCuda0(len+padding_len);
if (DEBUG) printf("allocCudaSeq(%p, %d, %d, %d): %p\n", data, len, padding_len, padding_char, out);
cutilSafeCall( cudaMemcpy(out, data, len, cudaMemcpyHostToDevice));
cutilSafeCall( cudaMemset(out+len, padding_char, padding_len) );
return out;
}
开发者ID:edanssandes,项目名称:MASA-CUDAlign,代码行数:17,代码来源:cuda_util.cpp
示例13: cleanup
void cleanup()
{
cutilCheckError( cutDeleteTimer( timer));
if(h_img)cutFree(h_img);
cutilSafeCall(cudaFree(d_img));
cutilSafeCall(cudaFree(d_temp));
// Refer to boxFilter_kernel.cu for implementation
freeTextures();
//DEPRECATED: cutilSafeCall(cudaGLUnregisterBufferObject(pbo));
cudaGraphicsUnregisterResource(cuda_pbo_resource);
glDeleteBuffersARB(1, &pbo);
glDeleteTextures(1, &texid);
glDeleteProgramsARB(1, &shader);
if (g_CheckRender) {
delete g_CheckRender;
g_CheckRender = NULL;
}
if (g_FrameBufferObject) {
delete g_FrameBufferObject;
g_FrameBufferObject = NULL;
}
}
开发者ID:yyzreal,项目名称:gpuocelot,代码行数:26,代码来源:boxFilter.cpp
示例14: uploadToGPU
void uploadToGPU()
{//upload scene to GPU
std::vector<float4> triangles;
std::vector<float4> spheres;
std::vector<float4> boxes;
world.updateToArray(triangles, spheres, boxes);
size_t triangle_size = triangles.size() * sizeof(float4);
size_t sphere_size = spheres.size() * sizeof(float4);
size_t boxes_size = boxes.size() * sizeof(float4);
//merge into one scene
std::vector<float4> sceneObj;
sceneObj.insert(sceneObj.end(), triangles.begin(), triangles.end());
sceneObj.insert(sceneObj.end(), spheres.begin(), spheres.end());
sceneObj.insert(sceneObj.end(), boxes.begin(), boxes.end());
//if the scene is dynamic. this function might has to be updated per frame.
//in that case should avoid mallocing every frame.
cutilSafeCall(cudaMalloc((void**)&dev_scene_pointer, triangle_size + sphere_size + boxes_size));
cudaMemcpy(dev_scene_pointer, &sceneObj[0], triangle_size + sphere_size + boxes_size, cudaMemcpyHostToDevice);//&triangles[0]
bindTexture(dev_scene_pointer, triangles.size()/4, spheres.size()/3, boxes.size()/4);//change the denominator for more information to bind
//add a device framebuffer for last frame.
std::vector<float3> clean(r_width * r_height, make_float3(0.0));
//float zero = 0;
cutilSafeCall(cudaMalloc((void**)&dev_lastframe_ptr, r_width * r_height * sizeof(float3)));
cudaMemcpy(dev_lastframe_ptr, &clean[0], r_width * r_height * sizeof(float3), cudaMemcpyHostToDevice);
cutilSafeCall(cudaMalloc((void**)&dev_num_layers, sizeof(int)));
cudaMemcpy(dev_num_layers, &frame_num, sizeof(int), cudaMemcpyHostToDevice);
}
开发者ID:duxing,项目名称:GPUFinal,代码行数:31,代码来源:main.cpp
示例15: runBenchmark
void runBenchmark(int iterations)
{
// once without timing to prime the GPU
nbody->update(activeParams.m_timestep);
cutilSafeCall(cudaEventRecord(startEvent, 0));
for (int i = 0; i < iterations; ++i)
{
nbody->update(activeParams.m_timestep);
}
cutilSafeCall(cudaEventRecord(stopEvent, 0));
cudaEventSynchronize(stopEvent);
float milliseconds = 0;
cutilSafeCall( cudaEventElapsedTime(&milliseconds, startEvent, stopEvent));
double interactionsPerSecond = 0;
double gflops = 0;
computePerfStats(interactionsPerSecond, gflops, milliseconds, iterations);
printf("%d bodies, total time for %d iterations: %0.3f ms\n",
numBodies, iterations, milliseconds);
printf("= %0.3f billion interactions per second\n", interactionsPerSecond);
printf("= %0.3f GFLOP/s at %d flops per interaction\n", gflops, 20);
}
开发者ID:AnkurAnandapu,项目名称:ocelot-fork,代码行数:26,代码来源:nbody.cpp
示例16: heun_free
__HOST__
int heun_free(solver_props *props){
#if defined TARGET_GPU
heun_mem *dmem = (heun_mem*)props->mem;
heun_mem tmem;
cutilSafeCall(cudaMemcpy(&tmem, dmem, sizeof(heun_mem), cudaMemcpyDeviceToHost));
cutilSafeCall(cudaFree(tmem.base));
cutilSafeCall(cudaFree(tmem.temp));
cutilSafeCall(cudaFree(tmem.predictor));
cutilSafeCall(cudaFree(dmem));
#else // Used for CPU and OPENMP targets
heun_mem *mem =(heun_mem*)props->mem;
free(mem->temp);
free(mem->base);
free(mem->predictor);
free(mem);
#endif // defined TARGET_GPU free(mem->k1);
return 0;
}
开发者ID:joshuaecook,项目名称:simengine,代码行数:25,代码来源:heun.c
示例17: free
void RemoteCUDARunner::DeallocateResources()
{
if(m_in)
{
free(m_in);
m_in=0;
}
if(m_devin)
{
cutilSafeCall(cudaFree(m_devin));
m_devin=0;
}
if(m_out)
{
free(m_out);
m_out=0;
}
if(m_devout)
{
cutilSafeCall(cudaFree(m_devout));
m_devout=0;
}
if(m_metahash)
{
free(m_metahash);
m_metahash=0;
}
if(m_devmetahash)
{
cutilSafeCall(cudaFree(m_devmetahash));
m_devmetahash=0;
}
}
开发者ID:joelthelion,项目名称:bitcoin-pool,代码行数:33,代码来源:bitcoinminercuda.cpp
示例18: mvReductArraysToHost
void
mvReductArraysToHost ( int reduct_bytes )
{
cutilSafeCall ( cudaMemcpy ( OP_reduct_h, OP_reduct_d, reduct_bytes,
cudaMemcpyDeviceToHost ) );
cutilSafeCall ( cudaThreadSynchronize ( ) );
}
开发者ID:doru1004,项目名称:OP2-Common,代码行数:7,代码来源:op_cuda_rt_support.c
示例19: mvConstArraysToDevice
void
mvConstArraysToDevice ( int consts_bytes )
{
cutilSafeCall ( cudaMemcpy ( OP_consts_d, OP_consts_h, consts_bytes,
cudaMemcpyHostToDevice ) );
cutilSafeCall ( cudaThreadSynchronize ( ) );
}
开发者ID:doru1004,项目名称:OP2-Common,代码行数:7,代码来源:op_cuda_rt_support.c
示例20: initPixelBuffer
void initPixelBuffer()
{
if (pbo) {
// unregister this buffer object from CUDA C
cutilSafeCall(cudaGraphicsUnregisterResource(cuda_pbo_resource));
// delete old buffer
glDeleteBuffersARB(1, &pbo);
glDeleteTextures(1, &tex);
}
// create pixel buffer object for display
glGenBuffersARB(1, &pbo);
glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, pbo);
glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, width*height*sizeof(GLubyte)*4, 0, GL_STREAM_DRAW_ARB);
glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
// register this buffer object with CUDA
cutilSafeCall(cudaGraphicsGLRegisterBuffer(&cuda_pbo_resource, pbo, cudaGraphicsMapFlagsWriteDiscard));
// create texture for display
glGenTextures(1, &tex);
glBindTexture(GL_TEXTURE_2D, tex);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glBindTexture(GL_TEXTURE_2D, 0);
}
开发者ID:AnkurAnandapu,项目名称:ocelot-fork,代码行数:28,代码来源:volumeRender.cpp
注:本文中的cutilSafeCall函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论