本文整理汇总了C++中cuModuleGetFunction函数的典型用法代码示例。如果您正苦于以下问题:C++ cuModuleGetFunction函数的具体用法?C++ cuModuleGetFunction怎么用?C++ cuModuleGetFunction使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了cuModuleGetFunction函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: loadCUDAModules
CUresult loadCUDAModules()
{
CUmodule cuModule_;
checkCudaErrors(cuModuleLoad(&cuModule_, "videoPP64.ptx"));
checkCudaErrors(cuModuleGetFunction(&g_kernelNV12toARGB, cuModule_, "NV12ToARGBdrvapi"));
checkCudaErrors(cuModuleGetFunction(&g_kernelARGBtoNV12, cuModule_, "ARGBToNv12drvapi"));
checkCudaErrors(cuModuleGetFunction(&g_kernelARGBpostprocess, cuModule_, "ARGBpostprocess"));
}
开发者ID:ttyio,项目名称:videopp,代码行数:8,代码来源:cudaProcessFrame.cpp
示例2: cuModuleGetFunction
CUfunction CudaModule::getKernel(const std::string& name, int paramSize)
{
CUfunction kernel = NULL;
cuModuleGetFunction(&kernel, m_module, name.c_str());
if (!kernel) {
std::string funcName(std::string("__globfunc_") + name);
cuModuleGetFunction( &kernel, m_module, funcName.c_str() );
}
if (kernel) {
checkError( "cuParamSetSize", cuParamSetSize(kernel, paramSize));
}
return kernel;
}
开发者ID:tcoppex,项目名称:cudaraster-linux,代码行数:15,代码来源:CudaModule.cpp
示例3: cuModuleGetFunction
/* int is 64-bit for some reason... */
CUresult bpnn_adjust_weights_launch
(CUmodule mod, CUdeviceptr delta, long hid, CUdeviceptr ly, long in,
CUdeviceptr w, CUdeviceptr oldw)
{
int bdx, bdy, gdx, gdy;
void* param[] = {&delta, &hid, &ly, &in, &w, &oldw};
CUfunction f;
CUresult res;
bdx = 16;
bdy = 16;
gdx = 1;
gdy = num_blocks;
/* get functions. */
res = cuModuleGetFunction(&f, mod, "_Z24bpnn_adjust_weights_cudaPfiS_iS_S_");
if (res != CUDA_SUCCESS) {
printf("cuModuleGetFunction(adjust_weights) failed: res = %u\n", res);
return res;
}
res = cuLaunchKernel(f, gdx, gdy, 1, bdx, bdy, 1, 0, 0, (void**) param, 0);
if (res != CUDA_SUCCESS) {
printf("cuLaunchKernel(adjust_weights) failed: res = %u\n", res);
return res;
}
return CUDA_SUCCESS;
}
开发者ID:Constellation,项目名称:gdev-bench,代码行数:30,代码来源:backprop_cuda.c
示例4: CreateCuFunction
CUresult CreateCuFunction(const char* name, CuModule* module, int3 blockShape,
FunctionPtr* ppFunction) {
CUfunction func;
CUresult result = cuModuleGetFunction(&func, module->Handle(), name);
if(CUDA_SUCCESS != result) return result;
FunctionPtr f(new CuFunction);
CuFuncAttr& attr = f->_attributes;
cuFuncGetAttribute(&attr.maxThreadsPerBlock,
CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func);
cuFuncGetAttribute(&attr.sharedSizeBytes,
CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, func);
cuFuncGetAttribute(&attr.constSizeBytes,
CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES, func);
cuFuncGetAttribute(&attr.localSizeBytes,
CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, func);
cuFuncGetAttribute(&attr.numRegs,
CU_FUNC_ATTRIBUTE_NUM_REGS, func);
cuFuncGetAttribute(&attr.ptxVersion,
CU_FUNC_ATTRIBUTE_PTX_VERSION, func);
cuFuncGetAttribute(&attr.binaryVersion,
CU_FUNC_ATTRIBUTE_BINARY_VERSION, func);
f->_function = func;
f->_module = module;
f->_functionName = name;
f->_blockShape = blockShape;
ppFunction->swap(f);
return CUDA_SUCCESS;
}
开发者ID:CindyYang85,项目名称:mgpu,代码行数:30,代码来源:cucpp.cpp
示例5: main
int main(){
init_test();
const std::string source =
".version 4.2\n"
".target sm_20\n"
".address_size 64\n"
".visible .entry kernel(.param .u64 kernel_param_0) {\n"
".reg .s32 %r<2>;\n"
".reg .s64 %rd<3>;\n"
"bra BB1_2;\n"
"ld.param.u64 %rd1, [kernel_param_0];\n"
"cvta.to.global.u64 %rd2, %rd1;\n"
"mov.u32 %r1, 5;\n"
"st.global.u32 [%rd2], %r1;\n"
"BB1_2: ret;\n"
"}\n";
CUmodule modId = 0;
CUfunction funcHandle = 0;
cu_assert(cuModuleLoadData(&modId, source.c_str()));
cu_assert(cuModuleGetFunction(&funcHandle, modId, "kernel"));
CUdeviceptr devValue;
int hostValue = 10;
cu_assert(cuMemAlloc(&devValue, sizeof(int)));
cu_assert(cuMemcpyHtoD(devValue, &hostValue, sizeof(hostValue)));
void * params[] = {&devValue};
cu_assert(cuLaunchKernel(funcHandle, 1,1,1, 1,1,1, 0,0, params, nullptr));
cu_assert(cuMemcpyDtoH(&hostValue, devValue, sizeof(hostValue)));
assert(hostValue == 10);
std::cout << hostValue << "\n";
cu_assert(cuMemFree(devValue));
cu_assert(cuModuleUnload(modId));
return 0;
}
开发者ID:sebsgit,项目名称:gemu,代码行数:33,代码来源:basic_branch.cpp
示例6: halide_dev_run
WEAK int halide_dev_run(void *user_context,
void *state_ptr,
const char* entry_name,
int blocksX, int blocksY, int blocksZ,
int threadsX, int threadsY, int threadsZ,
int shared_mem_bytes,
size_t arg_sizes[],
void* args[]) {
DEBUG_PRINTF( user_context, "CUDA: halide_dev_run (user_context: %p, entry: %s, blocks: %dx%dx%d, threads: %dx%dx%d, shmem: %d)\n",
user_context, entry_name,
blocksX, blocksY, blocksZ,
threadsX, threadsY, threadsZ,
shared_mem_bytes );
CUresult err;
CudaContext ctx(user_context);
if (ctx.error != CUDA_SUCCESS) {
return ctx.error;
}
#ifdef DEBUG
uint64_t t_before = halide_current_time_ns(user_context);
#endif
halide_assert(user_context, state_ptr);
CUmodule mod = ((module_state*)state_ptr)->module;
halide_assert(user_context, mod);
CUfunction f;
err = cuModuleGetFunction(&f, mod, entry_name);
if (err != CUDA_SUCCESS) {
halide_error_varargs(user_context, "CUDA: cuModuleGetFunction failed (%s)",
_get_error_name(err));
return err;
}
err = cuLaunchKernel(f,
blocksX, blocksY, blocksZ,
threadsX, threadsY, threadsZ,
shared_mem_bytes,
NULL, // stream
args,
NULL);
if (err != CUDA_SUCCESS) {
halide_error_varargs(user_context, "CUDA: cuLaunchKernel failed (%s)",
_get_error_name(err));
return err;
}
#ifdef DEBUG
err = cuCtxSynchronize();
if (err != CUDA_SUCCESS) {
halide_error_varargs(user_context, "CUDA: cuCtxSynchronize failed (%s)\n",
_get_error_name(err));
return err;
}
uint64_t t_after = halide_current_time_ns(user_context);
halide_printf(user_context, " Time: %f ms\n", (t_after - t_before) / 1.0e6);
#endif
return 0;
}
开发者ID:bnascimento,项目名称:Halide,代码行数:60,代码来源:cuda.cpp
示例7: cuda_over_map
Object cuda_over_map(Object self, int nparts, int *argcv,
Object *argv, int flags) {
CUresult error;
cuInit(0);
int deviceCount = 0;
error = cuDeviceGetCount(&deviceCount);
if (deviceCount == 0) {
raiseError("No CUDA devices found");
}
CUdevice cuDevice;
CUcontext cuContext;
CUmodule cuModule;
CUfunction cuFunc;
error = cuDeviceGet(&cuDevice, 0);
error = cuCtxCreate(&cuContext, 0, cuDevice);
CUdeviceptr d_A;
CUdeviceptr d_B;
CUdeviceptr d_res;
errcheck(cuModuleLoad(&cuModule, grcstring(argv[argcv[0]])));
CUdeviceptr dps[argcv[0]];
void *args[argcv[0]+2];
int size = INT_MAX;
for (int i=0; i<argcv[0]; i++) {
struct CudaFloatArray *a = (struct CudaFloatArray *)argv[i];
if (a->size < size)
size = a->size;
errcheck(cuMemAlloc(&dps[i], size * sizeof(float)));
errcheck(cuMemcpyHtoD(dps[i], &a->data, size * sizeof(float)));
args[i+1] = &dps[i];
}
struct CudaFloatArray *r =
(struct CudaFloatArray *)(alloc_CudaFloatArray(size));
int fsize = sizeof(float) * size;
errcheck(cuMemAlloc(&d_res, fsize));
errcheck(cuMemcpyHtoD(d_res, &r->data, fsize));
args[0] = &d_res;
args[argcv[0]+1] = &size;
int threadsPerBlock = 256;
int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
char name[256];
strcpy(name, "block");
strcat(name, grcstring(argv[argcv[0]]) + strlen("_cuda/"));
for (int i=0; name[i] != 0; i++)
if (name[i] == '.') {
name[i] = 0;
break;
}
errcheck(cuModuleGetFunction(&cuFunc, cuModule, name));
errcheck(cuLaunchKernel(cuFunc, blocksPerGrid, 1, 1,
threadsPerBlock, 1, 1,
0,
NULL, args, NULL));
errcheck(cuMemcpyDtoH(&r->data, d_res, fsize));
cuMemFree(d_res);
for (int i=0; i<argcv[0]; i++)
cuMemFree(dps[i]);
return (Object)r;
}
开发者ID:mwh,项目名称:grace-cuda,代码行数:59,代码来源:cuda.c
示例8: initCuda
static CUresult
initCuda(CUcontext _cuContext, char* executablePath, CUfunction *mathop,
int argc, char** argv, const char* cubin_name, const char* kernel_name)
{
CUdevice cuDevice;
CUT_DEVICE_INIT_DRV(cuDevice, argc, argv);
print_GetProperties(cuDevice);
CUresult status = cuCtxCreate( &_cuContext, 0, cuDevice );
if ( CUDA_SUCCESS != status ) {
Error(_cuContext, status);
}
else printf("(1) context creation successful\n");
char* module_path = cutFindFilePath(cubin_name, executablePath);
printf ("\t cubin:%s, path:%s, mmp_ptr:%lu\n", cubin_name, executablePath, module_path);
if(module_path != NULL)
printf ("\t cubin:%s, path:%s, module_path:%c%c%c%c\n", cubin_name, executablePath, *module_path, *(module_path+1), *(module_path+2), *(module_path+3));
char* data_path = "./data/";
size_t len_path = strlen(data_path);
size_t len_fn = strlen(cubin_name);
// printf ("Sizes: data:%lu, cubinname:%lu\n", len_path, len_fn);
char* module_path_new = (char*)malloc(sizeof(char) * (len_path + len_fn));
strcpy(module_path_new, data_path);
strcat(module_path_new, cubin_name);
strcat(module_path_new, "\0");
if (module_path_new == 0) {
status = CUDA_ERROR_NOT_FOUND;
Error(_cuContext, status);
}
FILE *fp = fopen(module_path_new,"r");
if( fp ) {
printf("(2) cubin_File found in modulepath:%s\n", module_path_new);
fclose(fp);
} else {
printf("(2) cubin file not exist: %s\n", module_path_new);
}
CUmodule cuModule;
status = cuModuleLoad(&cuModule, module_path_new);
cutFree(module_path_new);
if ( CUDA_SUCCESS != status ) {
Error(_cuContext, status);
}
else printf ("(3) module Load successful\n");
CUfunction cuFunction = 0;
status = cuModuleGetFunction(&cuFunction, cuModule, kernel_name);
if ( CUDA_SUCCESS != status) {
Error(_cuContext, status);
}
else printf ("(4) getFunction successful w/cuFunction\n");
*mathop = cuFunction;
return CUDA_SUCCESS;
}
开发者ID:skchoe,项目名称:2012.Functional-GPU-Programming,代码行数:58,代码来源:pass_drv.cpp
示例9: ptxJIT
/*
* This function load the ptx file ptxPath and extract the kernel kName
* to phKernel
* @param phKernel Output kernel handle
* @param ptxPath ptx file name
* @param kName kernel name
*/
void ptxJIT(CUmodule *phModule, CUfunction *phKernel, const char *ptxPath, const char *kName)
{
CUlinkState cuLinkState;
CUjit_option options[6];
void *optionVals[6];
float walltime;
char error_log[8192], info_log[8192];
unsigned int logSize = 8192;
void *cuOut;
size_t outSize;
int myErr = 0;
// Setup linker options
// Return walltime from JIT compilation
options[0] = CU_JIT_WALL_TIME;
optionVals[0] = (void *) &walltime;
// Pass a buffer for info messages
options[1] = CU_JIT_INFO_LOG_BUFFER;
optionVals[1] = (void *) info_log;
// Pass the size of the info buffer
options[2] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
optionVals[2] = (void *) (long)logSize;
// Pass a buffer for error message
options[3] = CU_JIT_ERROR_LOG_BUFFER;
optionVals[3] = (void *) error_log;
// Pass the size of the error buffer
options[4] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
optionVals[4] = (void *) (long) logSize;
// Make the linker verbose
options[5] = CU_JIT_LOG_VERBOSE;
optionVals[5] = (void *) 1;
// Create a pending linker invocation
checkCudaErrors(cuLinkCreate(6,options, optionVals, &cuLinkState));
// Load the ptx from the file
myErr = cuLinkAddFile(cuLinkState, CU_JIT_INPUT_PTX, ptxPath, 0, 0, 0);
if (myErr != CUDA_SUCCESS){
// Errors will be put in error_log, per CU_JIT_ERROR_LOG_BUFFER option above.
fprintf(stderr,"PTX Linker Error:\n%s\n",error_log);
}
// Complete the linker step
checkCudaErrors(cuLinkComplete(cuLinkState, &cuOut, &outSize));
// Linker walltime and info_log were requested in options above.
printf("CUDA Link Completed in %fms. Linker Output:\n%s\n", walltime, info_log);
// Load resulting cuBin into module
checkCudaErrors(cuModuleLoadData(phModule, cuOut));
// Locate the kernel entry point
checkCudaErrors(cuModuleGetFunction(phKernel, *phModule, kName));
// Destroy the linker invocation
checkCudaErrors(cuLinkDestroy(cuLinkState));
}
开发者ID:zhanglx13,项目名称:Automated_particle_filter_design_system,代码行数:64,代码来源:clock.cpp
示例10: main
/**
* This measures the overhead in launching a kernel function on each GPU in the
* system.
*
* It does this by executing a small kernel (copying 1 value in global memory) a
* very large number of times and taking the average execution time. This
* program uses the CUDA driver API.
*/
int main() {
CU_ERROR_CHECK(cuInit(0));
int count;
CU_ERROR_CHECK(cuDeviceGetCount(&count));
float x = 5.0f;
for (int d = 0; d < count; d++) {
CUdevice device;
CU_ERROR_CHECK(cuDeviceGet(&device, d));
CUcontext context;
CU_ERROR_CHECK(cuCtxCreate(&context, 0, device));
CUdeviceptr in, out;
CU_ERROR_CHECK(cuMemAlloc(&in, sizeof(float)));
CU_ERROR_CHECK(cuMemAlloc(&out, sizeof(float)));
CU_ERROR_CHECK(cuMemcpyHtoD(in, &x, sizeof(float)));
CUmodule module;
CU_ERROR_CHECK(cuModuleLoadData(&module, imageBytes));
CUfunction function;
CU_ERROR_CHECK(cuModuleGetFunction(&function, module, "kernel"));
void * params[] = { &in, &out };
CUevent start, stop;
CU_ERROR_CHECK(cuEventCreate(&start, 0));
CU_ERROR_CHECK(cuEventCreate(&stop, 0));
CU_ERROR_CHECK(cuEventRecord(start, 0));
for (int i = 0; i < ITERATIONS; i++)
CU_ERROR_CHECK(cuLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, params, NULL));
CU_ERROR_CHECK(cuEventRecord(stop, 0));
CU_ERROR_CHECK(cuEventSynchronize(stop));
float time;
CU_ERROR_CHECK(cuEventElapsedTime(&time, start, stop));
CU_ERROR_CHECK(cuEventDestroy(start));
CU_ERROR_CHECK(cuEventDestroy(stop));
CU_ERROR_CHECK(cuMemFree(in));
CU_ERROR_CHECK(cuMemFree(out));
fprintf(stdout, "Device %d: %fms\n", d, (time / (double)ITERATIONS));
CU_ERROR_CHECK(cuModuleUnload(module));
CU_ERROR_CHECK(cuCtxDestroy(context));
}
return 0;
}
开发者ID:garymacindoe,项目名称:cuda-cholesky,代码行数:64,代码来源:kernel-test.c
示例11: TestSAXPY
CUresult
TestSAXPY( chCUDADevice *chDevice, size_t N, float alpha )
{
CUresult status;
CUdeviceptr dptrOut = 0;
CUdeviceptr dptrIn = 0;
float *hostOut = 0;
float *hostIn = 0;
CUDA_CHECK( cuCtxPushCurrent( chDevice->context() ) );
CUDA_CHECK( cuMemAlloc( &dptrOut, N*sizeof(float) ) );
CUDA_CHECK( cuMemsetD32( dptrOut, 0, N ) );
CUDA_CHECK( cuMemAlloc( &dptrIn, N*sizeof(float) ) );
CUDA_CHECK( cuMemHostAlloc( (void **) &hostOut, N*sizeof(float), 0 ) );
CUDA_CHECK( cuMemHostAlloc( (void **) &hostIn, N*sizeof(float), 0 ) );
for ( size_t i = 0; i < N; i++ ) {
hostIn[i] = (float) rand() / (float) RAND_MAX;
}
CUDA_CHECK( cuMemcpyHtoDAsync( dptrIn, hostIn, N*sizeof(float ), NULL ) );
{
CUmodule moduleSAXPY;
CUfunction kernelSAXPY;
void *params[] = { &dptrOut, &dptrIn, &N, &alpha };
moduleSAXPY = chDevice->module( "saxpy.ptx" );
if ( ! moduleSAXPY ) {
status = CUDA_ERROR_NOT_FOUND;
goto Error;
}
CUDA_CHECK( cuModuleGetFunction( &kernelSAXPY, moduleSAXPY, "saxpy" ) );
CUDA_CHECK( cuLaunchKernel( kernelSAXPY, 1500, 1, 1, 512, 1, 1, 0, NULL, params, NULL ) );
}
CUDA_CHECK( cuMemcpyDtoHAsync( hostOut, dptrOut, N*sizeof(float), NULL ) );
CUDA_CHECK( cuCtxSynchronize() );
for ( size_t i = 0; i < N; i++ ) {
if ( fabsf( hostOut[i] - alpha*hostIn[i] ) > 1e-5f ) {
status = CUDA_ERROR_UNKNOWN;
goto Error;
}
}
status = CUDA_SUCCESS;
printf( "Well it worked!\n" );
Error:
cuCtxPopCurrent( NULL );
cuMemFreeHost( hostOut );
cuMemFreeHost( hostIn );
cuMemFree( dptrOut );
cuMemFree( dptrIn );
return status;
}
开发者ID:AnilVarmaBiruduraju,项目名称:cudahandbook,代码行数:56,代码来源:saxpyDrv.cpp
示例12: cuModuleLoad
/*
* Class: edu_syr_pcpratts_rootbeer_runtime2_cuda_CudaRuntime2
* Method: loadFunction
* Signature: ()V
*/
JNIEXPORT void JNICALL Java_edu_syr_pcpratts_rootbeer_runtime2_cuda_CudaRuntime2_loadFunction
(JNIEnv *env, jobject this_obj, jlong heap_end_ptr, jstring filename, jint num_blocks){
void * cubin_file;
int offset;
CUresult status;
char * native_filename;
heapEndPtr = heap_end_ptr;
native_filename = (*env)->GetStringUTFChars(env, filename, 0);
status = cuModuleLoad(&cuModule, native_filename);
CHECK_STATUS(env, "error in cuModuleLoad", status);
(*env)->ReleaseStringUTFChars(env, filename, native_filename);
status = cuModuleGetFunction(&cuFunction, cuModule, "_Z5entryPcS_PiPxS1_S0_S0_i");
CHECK_STATUS(env,"error in cuModuleGetFunction",status)
status = cuFuncSetCacheConfig(cuFunction, CU_FUNC_CACHE_PREFER_L1);
CHECK_STATUS(env,"error in cuFuncSetCacheConfig",status)
status = cuParamSetSize(cuFunction, (7 * sizeof(CUdeviceptr) + sizeof(int)));
CHECK_STATUS(env,"error in cuParamSetSize",status)
offset = 0;
status = cuParamSetv(cuFunction, offset, (void *) &gcInfoSpace, sizeof(CUdeviceptr));
CHECK_STATUS(env,"error in cuParamSetv gcInfoSpace",status)
offset += sizeof(CUdeviceptr);
status = cuParamSetv(cuFunction, offset, (void *) &gpuToSpace, sizeof(CUdeviceptr));
CHECK_STATUS(env,"error in cuParamSetv gpuToSpace",status)
offset += sizeof(CUdeviceptr);
status = cuParamSetv(cuFunction, offset, (void *) &gpuHandlesMemory, sizeof(CUdeviceptr));
CHECK_STATUS(env,"error in cuParamSetv gpuHandlesMemory %",status)
offset += sizeof(CUdeviceptr);
status = cuParamSetv(cuFunction, offset, (void *) &gpuHeapEndPtr, sizeof(CUdeviceptr));
CHECK_STATUS(env,"error in cuParamSetv gpuHeapEndPtr",status)
offset += sizeof(CUdeviceptr);
status = cuParamSetv(cuFunction, offset, (void *) &gpuBufferSize, sizeof(CUdeviceptr));
CHECK_STATUS(env,"error in cuParamSetv gpuBufferSize",status)
offset += sizeof(CUdeviceptr);
status = cuParamSetv(cuFunction, offset, (void *) &gpuExceptionsMemory, sizeof(CUdeviceptr));
CHECK_STATUS(env,"error in cuParamSetv gpuExceptionsMemory",status)
offset += sizeof(CUdeviceptr);
status = cuParamSetv(cuFunction, offset, (void *) &gpuClassMemory, sizeof(CUdeviceptr));
CHECK_STATUS(env,"error in cuParamSetv gpuClassMemory",status)
offset += sizeof(CUdeviceptr);
status = cuParamSeti(cuFunction, offset, num_blocks);
CHECK_STATUS(env,"error in cuParamSetv num_blocks",status)
offset += sizeof(int);
}
开发者ID:Chiru2015,项目名称:rootbeer1,代码行数:61,代码来源:CudaRuntime2.c
示例13: spoc_cuda_debug_compile
CAMLprim value spoc_cuda_debug_compile(value moduleSrc, value function_name, value gi){
CAMLparam3(moduleSrc, function_name, gi);
CUmodule module;
CUfunction *kernel;
char* functionN;
char *ptx_source;
const unsigned int jitNumOptions = 4;
CUjit_option jitOptions[4];
void *jitOptVals[4];
int jitLogBufferSize;
char *jitLogBuffer;
int jitRegCount = 32;
BLOCKING_CUDA_GET_CONTEXT;
kernel = malloc(sizeof(CUfunction));
functionN = String_val(function_name);
ptx_source = String_val(moduleSrc);
// set up size of compilation log buffer
jitOptions[0] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
jitLogBufferSize = 1024;
jitOptVals[0] = (void *)(size_t)jitLogBufferSize;
// set up pointer to the compilation log buffer
jitOptions[1] = CU_JIT_INFO_LOG_BUFFER;
jitLogBuffer = malloc(sizeof(char)*jitLogBufferSize);
jitOptVals[1] = jitLogBuffer;
// set up pointer to set the Maximum # of registers for a particular kernel
jitOptions[2] = CU_JIT_MAX_REGISTERS;
jitOptVals[2] = (void *)(size_t)jitRegCount;
jitOptions[3] = CU_JIT_TARGET_FROM_CUCONTEXT;
//CU_JIT_TARGET;
// jitOptVals[3] = (void*)(uintptr_t)CU_TARGET_COMPUTE_10;
cuda_error = (cuModuleLoadDataEx(&module, ptx_source, jitNumOptions, jitOptions, (void **)jitOptVals));
if (cuda_error)
{
fprintf (stderr,"%s\n", jitLogBuffer);
fflush (stderr);
}
cuda_error = (cuModuleGetFunction(kernel, module, functionN));
if (cuda_error)
{
fprintf (stderr, "%s\n", jitLogBuffer);
fflush (stderr);
}
BLOCKING_CUDA_RESTORE_CONTEXT;
free(jitLogBuffer);
CAMLreturn((value) kernel);
}
开发者ID:archonSTB,项目名称:SPOC,代码行数:55,代码来源:Kernel_cuda.c
示例14: init_cuda
/*
* Initializaiton in order to use kernel program
*/
void
init_cuda(void){
thread_num = (N <= 16) ? N : 16 ;
block_num = N / (thread_num*thread_num);
if(N % (thread_num*thread_num) != 0) block_num++;
res = cuInit(0);
if(res != CUDA_SUCCESS){
printf("cuInit failed: res = %s\n", conv(res));
exit(1);
}
res = cuDeviceGet(&dev, 0);
if(res != CUDA_SUCCESS){
printf("cuDeviceGet failed: res = %s\n", conv(res));
exit(1);
}
res = cuCtxCreate(&ctx, 0, dev);
if(res != CUDA_SUCCESS){
printf("cuCtxCreate failed: res = %s\n", conv(res));
exit(1);
}
res = cuModuleLoad(&module, "./cuda_main.cubin");
if(res != CUDA_SUCCESS){
printf("cuModuleLoad() failed: res = %s\n", conv(res));
exit(1);
}
res = cuModuleGetFunction(&function, module, "cuda_main");
if(res != CUDA_SUCCESS){
printf("cuModuleGetFunction() failed: res = %s\n", conv(res));
exit(1);
}
/*
* preparation for launch kernel
*/
res = cuFuncSetSharedSize(function, 0x40); /* just random */
if(res != CUDA_SUCCESS){
printf("cuFuncSetSharedSize() failed: res = %s\n", conv(res));
exit(1);
}
res = cuFuncSetBlockShape(function, thread_num, thread_num, 1);
if(res != CUDA_SUCCESS){
printf("cuFuncSetBlockShape() failed: res = %s\n", conv(res));
exit(1);
}
}
开发者ID:CPFL,项目名称:gtraffic,代码行数:57,代码来源:ov.c
示例15: readCubinFileFromBuffers
/*
* Class: edu_syr_pcpratts_rootbeer_runtime2_cuda_CudaRuntime2
* Method: loadFunction
* Signature: ()V
*/
JNIEXPORT void JNICALL Java_edu_syr_pcpratts_rootbeer_runtime2_cuda_CudaRuntime2_loadFunction
(JNIEnv *env, jobject this_obj, jlong heap_end_ptr, jobject buffers, jint size,
jint total_size, jint num_blocks){
void * cubin_file;
int offset;
CUresult status;
heapEndPtr = heap_end_ptr;
//void * cubin_file = readCubinFile("code_file.cubin");
cubin_file = readCubinFileFromBuffers(env, buffers, size, total_size);
status = cuModuleLoadData(&cuModule, cubin_file);
CHECK_STATUS(env,"error in cuModuleLoad",status)
free(cubin_file);
status = cuModuleGetFunction(&cuFunction, cuModule, "_Z5entryPcS_PiPxS1_S0_i");
CHECK_STATUS(env,"error in cuModuleGetFunction",status)
status = cuFuncSetCacheConfig(cuFunction, CU_FUNC_CACHE_PREFER_L1);
CHECK_STATUS(env,"error in cuFuncSetCacheConfig",status)
status = cuParamSetSize(cuFunction, (6 * sizeof(CUdeviceptr) + sizeof(int)));
CHECK_STATUS(env,"error in cuParamSetSize",status)
offset = 0;
status = cuParamSetv(cuFunction, offset, (void *) &gcInfoSpace, sizeof(CUdeviceptr));
CHECK_STATUS(env,"error in cuParamSetv gcInfoSpace",status)
offset += sizeof(CUdeviceptr);
status = cuParamSetv(cuFunction, offset, (void *) &gpuToSpace, sizeof(CUdeviceptr));
CHECK_STATUS(env,"error in cuParamSetv gpuToSpace",status)
offset += sizeof(CUdeviceptr);
status = cuParamSetv(cuFunction, offset, (void *) &gpuHandlesMemory, sizeof(CUdeviceptr));
CHECK_STATUS(env,"error in cuParamSetv gpuHandlesMemory %",status)
offset += sizeof(CUdeviceptr);
status = cuParamSetv(cuFunction, offset, (void *) &gpuHeapEndPtr, sizeof(CUdeviceptr));
CHECK_STATUS(env,"error in cuParamSetv gpuHeapEndPtr",status)
offset += sizeof(CUdeviceptr);
status = cuParamSetv(cuFunction, offset, (void *) &gpuBufferSize, sizeof(CUdeviceptr));
CHECK_STATUS(env,"error in cuParamSetv gpuBufferSize",status)
offset += sizeof(CUdeviceptr);
status = cuParamSetv(cuFunction, offset, (void *) &gpuExceptionsMemory, sizeof(CUdeviceptr));
CHECK_STATUS(env,"error in cuParamSetv gpuExceptionsMemory",status)
offset += sizeof(CUdeviceptr);
status = cuParamSeti(cuFunction, offset, num_blocks);
CHECK_STATUS(env,"error in cuParamSetv num_blocks",status)
offset += sizeof(int);
}
开发者ID:breznak,项目名称:rootbeer1,代码行数:59,代码来源:CudaRuntime2.c
示例16: loadAndRunDualTestFunction
CUresult loadAndRunDualTestFunction(CUmodule *phModule, std::string name, CUdeviceptr &d_data0,
CUdeviceptr &d_data1,
DataStruct *h_data0,
DataStruct *h_data1,
unsigned int memSize,
int thread_x=1,int thread_y=1,int thread_z=1,
int block_x=1, int block_y=1, int block_z=1)
{
// std::cout << " Start Loading" << std::endl;
// load data the to device
cuMemcpyHtoD(d_data0, h_data0, memSize);
cuMemcpyHtoD(d_data1, h_data1, memSize);
// Locate the kernel entry point
CUfunction phKernel = 0;
CUresult status = cuModuleGetFunction(&phKernel, *phModule, name.data());
if (status != CUDA_SUCCESS)
{printf("ERROR: could not load function\n");}
// Set the kernel parameters
status = cuFuncSetBlockShape(phKernel, thread_x, thread_y, thread_z);
if (status != CUDA_SUCCESS)
{printf("ERROR: during setBlockShape\n");}
int paramOffset = 0, size=0;
size = sizeof(CUdeviceptr);
status = cuParamSetv(phKernel, paramOffset, &d_data0, size);
paramOffset += size;
status = cuParamSetv(phKernel, paramOffset, &d_data1, size);
paramOffset += size;
status = cuParamSetSize(phKernel, paramOffset);
if (status != CUDA_SUCCESS)
{printf("ERROR: during cuParamSetv\n");}
// Launch the kernel
status = cuLaunchGrid(phKernel, block_x, block_y);
if (status != CUDA_SUCCESS)
{printf("ERROR: during grid launch\n");}
// std::cout << " launched CUDA kernel!!" << std::endl;
// Copy the result back to the host
status = cuMemcpyDtoH(h_data0, d_data0, memSize);
status = cuMemcpyDtoH(h_data1, d_data1, memSize);
if (status != CUDA_SUCCESS)
{printf("ERROR: during MemcpyDtoH\n");}
}
开发者ID:jrk,项目名称:llvmptxbackend,代码行数:52,代码来源:PTXBackendTestSuite.cpp
示例17: cuModuleGetFunction
CUresult compute_tran_temp
(CUmodule mod, CUdeviceptr MatrixPower, CUdeviceptr MatrixTemp[2],
int col, int row, int total_iterations, int num_iterations, int blockCols,
int blockRows, int borderCols, int borderRows)
{
int gdx = blockCols;
int gdy = blockRows;
int bdx = BLOCK_SIZE;
int bdy = BLOCK_SIZE;
float grid_height = chip_height / row;
float grid_width = chip_width / col;
float Cap = FACTOR_CHIP * SPEC_HEAT_SI * t_chip * grid_width * grid_height;
float Rx = grid_width / (2.0 * K_SI * t_chip * grid_height);
float Ry = grid_height / (2.0 * K_SI * t_chip * grid_width);
float Rz = t_chip / (K_SI * grid_height * grid_width);
float max_slope = MAX_PD / (FACTOR_CHIP * t_chip * SPEC_HEAT_SI);
float step = PRECISION / max_slope;
float t;
float time_elapsed;
time_elapsed=0.001;
int src = 1, dst = 0;
CUfunction f;
CUresult res;
res = cuModuleGetFunction(&f, mod, "_Z14calculate_tempiPfS_S_iiiiffffff");
if (res != CUDA_SUCCESS) {
printf("cuModuleGetFunction failed: res = %u\n", res);
return 0;
}
for (t = 0; t < total_iterations; t+=num_iterations) {
int it = MIN(num_iterations, total_iterations-t);
int temp = src;
src = dst;
dst = temp;
void *param[] = {&it, &MatrixPower, &MatrixTemp[src], &MatrixTemp[dst],
&col, &row, &borderCols, &borderRows, &Cap,
&Rx, &Ry, &Rz, &step, &time_elapsed};
res = cuLaunchKernel(f, gdx, gdy, 1, bdx, bdy, 1, 0xc00, 0,
(void**) param, NULL);
if (res != CUDA_SUCCESS) {
printf("cuLaunchKernel(euclid) failed: res = %u\n", res);
return 0;
}
}
return dst;
}
开发者ID:Constellation,项目名称:gdev-bench,代码行数:52,代码来源:hotspot_cuda.c
示例18: module_
GpuCompilationContext::GpuCompilationContext(const void* image,
const std::string& kernel_name,
const int device_id,
const void* cuda_mgr,
unsigned int num_options,
CUjit_option* options,
void** option_vals)
: module_(nullptr), kernel_(nullptr), device_id_(device_id), cuda_mgr_(cuda_mgr) {
static_cast<const CudaMgr_Namespace::CudaMgr*>(cuda_mgr_)->setContext(device_id_);
checkCudaErrors(cuModuleLoadDataEx(&module_, image, num_options, options, option_vals));
CHECK(module_);
checkCudaErrors(cuModuleGetFunction(&kernel_, module_, kernel_name.c_str()));
}
开发者ID:kanak,项目名称:mapd-core,代码行数:13,代码来源:NvidiaKernel.cpp
示例19: __get_kernel
static CUfunction __get_kernel(void *user_context, const char* entry_name)
{
CUfunction f;
#ifdef DEBUG
char msg[256];
snprintf(msg, 256, "get_kernel %s (t=%lld)", entry_name, (long long)halide_current_time_ns(user_context) );
#endif
// Get kernel function ptr
TIME_CALL( cuModuleGetFunction(&f, __mod, entry_name), msg );
return f;
}
开发者ID:EEmmanuel7,项目名称:Halide,代码行数:14,代码来源:cuda.cpp
示例20: R_auto_cuModuleGetFunction
SEXP R_auto_cuModuleGetFunction(SEXP r_hfunc, SEXP r_hmod, SEXP r_name)
{
SEXP r_ans = R_NilValue;
CUfunction * hfunc = GET_REF(r_hfunc, CUfunction );
CUmodule hmod = (CUmodule) getRReference(r_hmod);
const char * name = CHAR(STRING_ELT(r_name, 0));
CUresult ans;
ans = cuModuleGetFunction(hfunc, hmod, name);
r_ans = Renum_convert_CUresult(ans) ;
return(r_ans);
}
开发者ID:PachoAlvarez,项目名称:RCUDA,代码行数:14,代码来源:autoModule.c
注:本文中的cuModuleGetFunction函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论