C++ cuModuleGetFunction函数代码示例

OGeek|极客世界-中国程序员成长平台 › 门户 › 编程› C++›C++教程

原作者: [db:作者] 来自: [db:来源] 收藏邀请

本文整理汇总了C++中cuModuleGetFunction函数的典型用法代码示例。如果您正苦于以下问题：C++ cuModuleGetFunction函数的具体用法？C++ cuModuleGetFunction怎么用？C++ cuModuleGetFunction使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了cuModuleGetFunction函数的20个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: loadCUDAModules

CUresult loadCUDAModules()
{
    CUmodule cuModule_;
    checkCudaErrors(cuModuleLoad(&cuModule_, "videoPP64.ptx"));
    checkCudaErrors(cuModuleGetFunction(&g_kernelNV12toARGB, cuModule_, "NV12ToARGBdrvapi"));
    checkCudaErrors(cuModuleGetFunction(&g_kernelARGBtoNV12, cuModule_, "ARGBToNv12drvapi"));
    checkCudaErrors(cuModuleGetFunction(&g_kernelARGBpostprocess, cuModule_, "ARGBpostprocess"));
}

开发者ID:ttyio，项目名称:videopp，代码行数:8，代码来源:cudaProcessFrame.cpp

示例2: cuModuleGetFunction

CUfunction CudaModule::getKernel(const std::string& name, int paramSize)
{
  CUfunction kernel = NULL;
  cuModuleGetFunction(&kernel, m_module, name.c_str());
  
  if (!kernel) {
    std::string funcName(std::string("__globfunc_") + name);    
    cuModuleGetFunction( &kernel, m_module, funcName.c_str() );
  }
  
  if (kernel) {
    checkError( "cuParamSetSize", cuParamSetSize(kernel, paramSize));
  }
  return kernel;
}

开发者ID:tcoppex，项目名称:cudaraster-linux，代码行数:15，代码来源:CudaModule.cpp

示例3: cuModuleGetFunction

/* int is 64-bit for some reason... */
CUresult bpnn_adjust_weights_launch
(CUmodule mod, CUdeviceptr delta, long hid, CUdeviceptr ly, long in,          
 CUdeviceptr w, CUdeviceptr oldw) 
{
	int bdx, bdy, gdx, gdy;
	void* param[] = {&delta, &hid, &ly, &in, &w, &oldw};
	CUfunction f;
	CUresult res;

	bdx = 16;
	bdy = 16;
	gdx = 1;
	gdy = num_blocks;

	/* get functions. */
	res = cuModuleGetFunction(&f, mod, "_Z24bpnn_adjust_weights_cudaPfiS_iS_S_");
	if (res != CUDA_SUCCESS) {
		printf("cuModuleGetFunction(adjust_weights) failed: res = %u\n", res);
		return res;
	}

	res = cuLaunchKernel(f, gdx, gdy, 1, bdx, bdy, 1, 0, 0, (void**) param, 0);
	if (res != CUDA_SUCCESS) {
		printf("cuLaunchKernel(adjust_weights) failed: res = %u\n", res);
		return res;
	}

	return CUDA_SUCCESS;
}

开发者ID:Constellation，项目名称:gdev-bench，代码行数:30，代码来源:backprop_cuda.c

示例4: CreateCuFunction

CUresult CreateCuFunction(const char* name, CuModule* module, int3 blockShape, 
	FunctionPtr* ppFunction) {
	CUfunction func;
	CUresult result = cuModuleGetFunction(&func, module->Handle(), name);
	if(CUDA_SUCCESS != result) return result;

	FunctionPtr f(new CuFunction);

	CuFuncAttr& attr = f->_attributes;
	cuFuncGetAttribute(&attr.maxThreadsPerBlock,
		CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func);
	cuFuncGetAttribute(&attr.sharedSizeBytes, 
		CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, func);
	cuFuncGetAttribute(&attr.constSizeBytes, 
		CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES, func);
	cuFuncGetAttribute(&attr.localSizeBytes, 
		CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, func);
	cuFuncGetAttribute(&attr.numRegs, 
		CU_FUNC_ATTRIBUTE_NUM_REGS, func);
	cuFuncGetAttribute(&attr.ptxVersion, 
		CU_FUNC_ATTRIBUTE_PTX_VERSION, func);
	cuFuncGetAttribute(&attr.binaryVersion, 
		CU_FUNC_ATTRIBUTE_BINARY_VERSION, func);
	f->_function = func;
	f->_module = module;
	f->_functionName = name;
	f->_blockShape = blockShape;
	ppFunction->swap(f);
	return CUDA_SUCCESS;
}

开发者ID:CindyYang85，项目名称:mgpu，代码行数:30，代码来源:cucpp.cpp

示例5: main

int main(){
	init_test();
	const std::string source = 
	".version 4.2\n"
	".target sm_20\n"
	".address_size 64\n"
	".visible .entry kernel(.param .u64 kernel_param_0) {\n"
	".reg .s32 	%r<2>;\n"
	".reg .s64 	%rd<3>;\n"
	"bra 	BB1_2;\n"
	"ld.param.u64 	%rd1, [kernel_param_0];\n"
	"cvta.to.global.u64 	%rd2, %rd1;\n"
	"mov.u32 	%r1, 5;\n"
	"st.global.u32 	[%rd2], %r1;\n"
	"BB1_2: ret;\n"
	"}\n";
	CUmodule modId = 0;
	CUfunction funcHandle = 0;
	cu_assert(cuModuleLoadData(&modId, source.c_str()));
	cu_assert(cuModuleGetFunction(&funcHandle, modId, "kernel"));
	CUdeviceptr devValue;
	int hostValue = 10;
	cu_assert(cuMemAlloc(&devValue, sizeof(int)));
	cu_assert(cuMemcpyHtoD(devValue, &hostValue, sizeof(hostValue)));
	void * params[] = {&devValue};
	cu_assert(cuLaunchKernel(funcHandle, 1,1,1, 1,1,1, 0,0, params, nullptr));
	cu_assert(cuMemcpyDtoH(&hostValue, devValue, sizeof(hostValue)));
	assert(hostValue == 10);
	std::cout << hostValue << "\n";
	cu_assert(cuMemFree(devValue));
	cu_assert(cuModuleUnload(modId));
	return 0;
}

开发者ID:sebsgit，项目名称:gemu，代码行数:33，代码来源:basic_branch.cpp

示例6: halide_dev_run

WEAK int halide_dev_run(void *user_context,
                        void *state_ptr,
                        const char* entry_name,
                        int blocksX, int blocksY, int blocksZ,
                        int threadsX, int threadsY, int threadsZ,
                        int shared_mem_bytes,
                        size_t arg_sizes[],
                        void* args[]) {
    DEBUG_PRINTF( user_context, "CUDA: halide_dev_run (user_context: %p, entry: %s, blocks: %dx%dx%d, threads: %dx%dx%d, shmem: %d)\n",
                  user_context, entry_name,
                  blocksX, blocksY, blocksZ,
                  threadsX, threadsY, threadsZ,
                  shared_mem_bytes );

    CUresult err;
    CudaContext ctx(user_context);
    if (ctx.error != CUDA_SUCCESS) {
        return ctx.error;
    }

    #ifdef DEBUG
    uint64_t t_before = halide_current_time_ns(user_context);
    #endif

    halide_assert(user_context, state_ptr);
    CUmodule mod = ((module_state*)state_ptr)->module;
    halide_assert(user_context, mod);
    CUfunction f;
    err = cuModuleGetFunction(&f, mod, entry_name);
    if (err != CUDA_SUCCESS) {
        halide_error_varargs(user_context, "CUDA: cuModuleGetFunction failed (%s)",
                             _get_error_name(err));
        return err;
    }

    err = cuLaunchKernel(f,
                         blocksX,  blocksY,  blocksZ,
                         threadsX, threadsY, threadsZ,
                         shared_mem_bytes,
                         NULL, // stream
                         args,
                         NULL);
    if (err != CUDA_SUCCESS) {
        halide_error_varargs(user_context, "CUDA: cuLaunchKernel failed (%s)",
                             _get_error_name(err));
        return err;
    }

    #ifdef DEBUG
    err = cuCtxSynchronize();
    if (err != CUDA_SUCCESS) {
        halide_error_varargs(user_context, "CUDA: cuCtxSynchronize failed (%s)\n",
                             _get_error_name(err));
        return err;
    }
    uint64_t t_after = halide_current_time_ns(user_context);
    halide_printf(user_context, "    Time: %f ms\n", (t_after - t_before) / 1.0e6);
    #endif
    return 0;
}

开发者ID:bnascimento，项目名称:Halide，代码行数:60，代码来源:cuda.cpp

示例7: cuda_over_map

Object cuda_over_map(Object self, int nparts, int *argcv,
        Object *argv, int flags) {
    CUresult error;
    cuInit(0);
    int deviceCount = 0;
    error = cuDeviceGetCount(&deviceCount);
    if (deviceCount == 0) {
        raiseError("No CUDA devices found");
    }
    CUdevice cuDevice;
    CUcontext cuContext;
    CUmodule cuModule;
    CUfunction cuFunc;
    error = cuDeviceGet(&cuDevice, 0);
    error = cuCtxCreate(&cuContext, 0, cuDevice);
    CUdeviceptr d_A;
    CUdeviceptr d_B;
    CUdeviceptr d_res;
    errcheck(cuModuleLoad(&cuModule, grcstring(argv[argcv[0]])));
    CUdeviceptr dps[argcv[0]];
    void *args[argcv[0]+2];
    int size = INT_MAX;
    for (int i=0; i<argcv[0]; i++) {
        struct CudaFloatArray *a = (struct CudaFloatArray *)argv[i];
        if (a->size < size)
            size = a->size;
        errcheck(cuMemAlloc(&dps[i], size * sizeof(float)));
        errcheck(cuMemcpyHtoD(dps[i], &a->data, size * sizeof(float)));
        args[i+1] = &dps[i];
    }
    struct CudaFloatArray *r =
        (struct CudaFloatArray *)(alloc_CudaFloatArray(size));
    int fsize = sizeof(float) * size;
    errcheck(cuMemAlloc(&d_res, fsize));
    errcheck(cuMemcpyHtoD(d_res, &r->data, fsize));
    args[0] = &d_res;
    args[argcv[0]+1] = &size;

    int threadsPerBlock = 256;
    int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
    char name[256];
    strcpy(name, "block");
    strcat(name, grcstring(argv[argcv[0]]) + strlen("_cuda/"));
    for (int i=0; name[i] != 0; i++)
        if (name[i] == '.') {
            name[i] = 0;
            break;
        }
    errcheck(cuModuleGetFunction(&cuFunc, cuModule, name));
    errcheck(cuLaunchKernel(cuFunc, blocksPerGrid, 1, 1,
        threadsPerBlock, 1, 1,
        0,
        NULL, args, NULL));
    errcheck(cuMemcpyDtoH(&r->data, d_res, fsize));
    cuMemFree(d_res);
    for (int i=0; i<argcv[0]; i++)
        cuMemFree(dps[i]);
    return (Object)r;
}

开发者ID:mwh，项目名称:grace-cuda，代码行数:59，代码来源:cuda.c

示例8: initCuda

static CUresult
initCuda(CUcontext _cuContext, char* executablePath, CUfunction *mathop, 
	int argc, char** argv, const char* cubin_name, const char* kernel_name)
{
    CUdevice cuDevice;
    CUT_DEVICE_INIT_DRV(cuDevice, argc, argv);
    print_GetProperties(cuDevice);

    CUresult status = cuCtxCreate( &_cuContext, 0, cuDevice );
    if ( CUDA_SUCCESS != status ) {
        Error(_cuContext, status);
    }
    else printf("(1) context creation successful\n");

    char* module_path = cutFindFilePath(cubin_name, executablePath);
    printf ("\t cubin:%s, path:%s, mmp_ptr:%lu\n", cubin_name, executablePath, module_path);
    if(module_path != NULL)
      printf ("\t cubin:%s, path:%s, module_path:%c%c%c%c\n", cubin_name, executablePath, *module_path, *(module_path+1), *(module_path+2), *(module_path+3));
    char* data_path = "./data/";
    size_t len_path = strlen(data_path);
    size_t len_fn = strlen(cubin_name);
    // printf ("Sizes: data:%lu, cubinname:%lu\n", len_path, len_fn);

    char* module_path_new = (char*)malloc(sizeof(char) * (len_path + len_fn));
    strcpy(module_path_new, data_path);
    strcat(module_path_new, cubin_name);
    strcat(module_path_new, "\0");
    if (module_path_new == 0) {
        status = CUDA_ERROR_NOT_FOUND;
        Error(_cuContext, status);
    }
    FILE *fp = fopen(module_path_new,"r");
    if( fp ) {
	printf("(2) cubin_File found in modulepath:%s\n", module_path_new);
        fclose(fp);
    } else {
	printf("(2) cubin file not exist: %s\n", module_path_new);
    }
    CUmodule cuModule;
    status = cuModuleLoad(&cuModule, module_path_new);
    cutFree(module_path_new);
    if ( CUDA_SUCCESS != status ) {
        Error(_cuContext, status);
    }
    else printf ("(3) module Load successful\n");

    CUfunction cuFunction = 0;
    status = cuModuleGetFunction(&cuFunction, cuModule, kernel_name);
    if ( CUDA_SUCCESS != status) {
        Error(_cuContext, status);
    }
    else printf ("(4) getFunction successful w/cuFunction\n");

    *mathop = cuFunction;

    return CUDA_SUCCESS;

}

开发者ID:skchoe，项目名称:2012.Functional-GPU-Programming，代码行数:58，代码来源:pass_drv.cpp

示例9: ptxJIT

/*
 * This function load the ptx file ptxPath and extract the kernel kName
 * to phKernel
 * @param phKernel    Output kernel handle
 * @param ptxPath     ptx file name
 * @param kName       kernel name
 */
void ptxJIT(CUmodule *phModule, CUfunction *phKernel, const char *ptxPath, const char *kName)
{
  CUlinkState cuLinkState;
  CUjit_option options[6];
  void *optionVals[6];
  float walltime;
  char error_log[8192], info_log[8192];
  unsigned int logSize = 8192;
  void *cuOut;
  size_t outSize;
  int myErr = 0;

  // Setup linker options
  // Return walltime from JIT compilation
  options[0] = CU_JIT_WALL_TIME;
  optionVals[0] = (void *) &walltime;
  // Pass a buffer for info messages
  options[1] = CU_JIT_INFO_LOG_BUFFER;
  optionVals[1] = (void *) info_log;
  // Pass the size of the info buffer
  options[2] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
  optionVals[2] = (void *) (long)logSize;
  // Pass a buffer for error message
  options[3] = CU_JIT_ERROR_LOG_BUFFER;
  optionVals[3] = (void *) error_log;
  // Pass the size of the error buffer
  options[4] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
  optionVals[4] = (void *) (long) logSize;
  // Make the linker verbose
  options[5] = CU_JIT_LOG_VERBOSE;
  optionVals[5] = (void *) 1;

  // Create a pending linker invocation
  checkCudaErrors(cuLinkCreate(6,options, optionVals, &cuLinkState));

  // Load the ptx from the file
  myErr = cuLinkAddFile(cuLinkState, CU_JIT_INPUT_PTX, ptxPath, 0, 0, 0);
  if (myErr != CUDA_SUCCESS){
    // Errors will be put in error_log, per CU_JIT_ERROR_LOG_BUFFER option above.
    fprintf(stderr,"PTX Linker Error:\n%s\n",error_log);
  }

  // Complete the linker step
  checkCudaErrors(cuLinkComplete(cuLinkState, &cuOut, &outSize));

  // Linker walltime and info_log were requested in options above.
  printf("CUDA Link Completed in %fms. Linker Output:\n%s\n", walltime, info_log);

  // Load resulting cuBin into module
  checkCudaErrors(cuModuleLoadData(phModule, cuOut));

  // Locate the kernel entry point
  checkCudaErrors(cuModuleGetFunction(phKernel, *phModule, kName));

  // Destroy the linker invocation
  checkCudaErrors(cuLinkDestroy(cuLinkState));
}

开发者ID:zhanglx13，项目名称:Automated_particle_filter_design_system，代码行数:64，代码来源:clock.cpp

示例10: main

/**
 * This measures the overhead in launching a kernel function on each GPU in the
 * system.
 *
 * It does this by executing a small kernel (copying 1 value in global memory) a
 * very large number of times and taking the average execution time.  This
 * program uses the CUDA driver API.
 */
int main() {
  CU_ERROR_CHECK(cuInit(0));

  int count;
  CU_ERROR_CHECK(cuDeviceGetCount(&count));

  float x = 5.0f;
  for (int d = 0; d < count; d++) {
    CUdevice device;
    CU_ERROR_CHECK(cuDeviceGet(&device, d));

    CUcontext context;
    CU_ERROR_CHECK(cuCtxCreate(&context, 0, device));

    CUdeviceptr in, out;
    CU_ERROR_CHECK(cuMemAlloc(&in, sizeof(float)));
    CU_ERROR_CHECK(cuMemAlloc(&out, sizeof(float)));
    CU_ERROR_CHECK(cuMemcpyHtoD(in, &x, sizeof(float)));

    CUmodule module;
    CU_ERROR_CHECK(cuModuleLoadData(&module, imageBytes));

    CUfunction function;
    CU_ERROR_CHECK(cuModuleGetFunction(&function, module, "kernel"));

    void * params[] = { &in, &out };

    CUevent start, stop;
    CU_ERROR_CHECK(cuEventCreate(&start, 0));
    CU_ERROR_CHECK(cuEventCreate(&stop, 0));

    CU_ERROR_CHECK(cuEventRecord(start, 0));
    for (int i = 0; i < ITERATIONS; i++)
      CU_ERROR_CHECK(cuLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, params, NULL));

    CU_ERROR_CHECK(cuEventRecord(stop, 0));
    CU_ERROR_CHECK(cuEventSynchronize(stop));

    float time;
    CU_ERROR_CHECK(cuEventElapsedTime(&time, start, stop));

    CU_ERROR_CHECK(cuEventDestroy(start));
    CU_ERROR_CHECK(cuEventDestroy(stop));

    CU_ERROR_CHECK(cuMemFree(in));
    CU_ERROR_CHECK(cuMemFree(out));

    fprintf(stdout, "Device %d: %fms\n", d, (time / (double)ITERATIONS));

    CU_ERROR_CHECK(cuModuleUnload(module));

    CU_ERROR_CHECK(cuCtxDestroy(context));
  }

  return 0;
}

开发者ID:garymacindoe，项目名称:cuda-cholesky，代码行数:64，代码来源:kernel-test.c

示例11: TestSAXPY

CUresult
TestSAXPY( chCUDADevice *chDevice, size_t N, float alpha )
{
    CUresult status;
    CUdeviceptr dptrOut = 0;
    CUdeviceptr dptrIn = 0;
    float *hostOut = 0;
    float *hostIn = 0;

    CUDA_CHECK( cuCtxPushCurrent( chDevice->context() ) );

    CUDA_CHECK( cuMemAlloc( &dptrOut, N*sizeof(float) ) );
    CUDA_CHECK( cuMemsetD32( dptrOut, 0, N ) );
    CUDA_CHECK( cuMemAlloc( &dptrIn, N*sizeof(float) ) );
    CUDA_CHECK( cuMemHostAlloc( (void **) &hostOut, N*sizeof(float), 0 ) );
    CUDA_CHECK( cuMemHostAlloc( (void **) &hostIn, N*sizeof(float), 0 ) );
    for ( size_t i = 0; i < N; i++ ) {
        hostIn[i] = (float) rand() / (float) RAND_MAX;
    }
    CUDA_CHECK( cuMemcpyHtoDAsync( dptrIn, hostIn, N*sizeof(float ), NULL ) );

    {
        CUmodule moduleSAXPY;
        CUfunction kernelSAXPY;
        void *params[] = { &dptrOut, &dptrIn, &N, &alpha };
        
        moduleSAXPY = chDevice->module( "saxpy.ptx" );
        if ( ! moduleSAXPY ) {
            status = CUDA_ERROR_NOT_FOUND;
            goto Error;
        }
        CUDA_CHECK( cuModuleGetFunction( &kernelSAXPY, moduleSAXPY, "saxpy" ) );

        CUDA_CHECK( cuLaunchKernel( kernelSAXPY, 1500, 1, 1, 512, 1, 1, 0, NULL, params, NULL ) );

    }

    CUDA_CHECK( cuMemcpyDtoHAsync( hostOut, dptrOut, N*sizeof(float), NULL ) );
    CUDA_CHECK( cuCtxSynchronize() );
    for ( size_t i = 0; i < N; i++ ) {
        if ( fabsf( hostOut[i] - alpha*hostIn[i] ) > 1e-5f ) {
            status = CUDA_ERROR_UNKNOWN;
            goto Error;
        }
    }
    status = CUDA_SUCCESS;
    printf( "Well it worked!\n" );

Error:
    cuCtxPopCurrent( NULL );
    cuMemFreeHost( hostOut );
    cuMemFreeHost( hostIn );
    cuMemFree( dptrOut );
    cuMemFree( dptrIn );
    return status;
}

开发者ID:AnilVarmaBiruduraju，项目名称:cudahandbook，代码行数:56，代码来源:saxpyDrv.cpp

示例12: cuModuleLoad

/*
 * Class:     edu_syr_pcpratts_rootbeer_runtime2_cuda_CudaRuntime2
 * Method:    loadFunction
 * Signature: ()V
 */
JNIEXPORT void JNICALL Java_edu_syr_pcpratts_rootbeer_runtime2_cuda_CudaRuntime2_loadFunction
  (JNIEnv *env, jobject this_obj, jlong heap_end_ptr, jstring filename, jint num_blocks){

  void * cubin_file;
  int offset;
  CUresult status;
  char * native_filename;
  heapEndPtr = heap_end_ptr;
  
  native_filename = (*env)->GetStringUTFChars(env, filename, 0);
  status = cuModuleLoad(&cuModule, native_filename);
  CHECK_STATUS(env, "error in cuModuleLoad", status);
  (*env)->ReleaseStringUTFChars(env, filename, native_filename);

  status = cuModuleGetFunction(&cuFunction, cuModule, "_Z5entryPcS_PiPxS1_S0_S0_i"); 
  CHECK_STATUS(env,"error in cuModuleGetFunction",status)

  status = cuFuncSetCacheConfig(cuFunction, CU_FUNC_CACHE_PREFER_L1);
  CHECK_STATUS(env,"error in cuFuncSetCacheConfig",status)

  status = cuParamSetSize(cuFunction, (7 * sizeof(CUdeviceptr) + sizeof(int))); 
  CHECK_STATUS(env,"error in cuParamSetSize",status)

  offset = 0;
  status = cuParamSetv(cuFunction, offset, (void *) &gcInfoSpace, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env,"error in cuParamSetv gcInfoSpace",status)
  offset += sizeof(CUdeviceptr);

  status = cuParamSetv(cuFunction, offset, (void *) &gpuToSpace, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env,"error in cuParamSetv gpuToSpace",status)
  offset += sizeof(CUdeviceptr);

  status = cuParamSetv(cuFunction, offset, (void *) &gpuHandlesMemory, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env,"error in cuParamSetv gpuHandlesMemory %",status)
  offset += sizeof(CUdeviceptr);

  status = cuParamSetv(cuFunction, offset, (void *) &gpuHeapEndPtr, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env,"error in cuParamSetv gpuHeapEndPtr",status)
  offset += sizeof(CUdeviceptr);

  status = cuParamSetv(cuFunction, offset, (void *) &gpuBufferSize, sizeof(CUdeviceptr));
  CHECK_STATUS(env,"error in cuParamSetv gpuBufferSize",status)
  offset += sizeof(CUdeviceptr); 

  status = cuParamSetv(cuFunction, offset, (void *) &gpuExceptionsMemory, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env,"error in cuParamSetv gpuExceptionsMemory",status)
  offset += sizeof(CUdeviceptr);

  status = cuParamSetv(cuFunction, offset, (void *) &gpuClassMemory, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env,"error in cuParamSetv gpuClassMemory",status)
  offset += sizeof(CUdeviceptr);

  status = cuParamSeti(cuFunction, offset, num_blocks); 
  CHECK_STATUS(env,"error in cuParamSetv num_blocks",status)
  offset += sizeof(int);
}

开发者ID:Chiru2015，项目名称:rootbeer1，代码行数:61，代码来源:CudaRuntime2.c

示例13: spoc_cuda_debug_compile

CAMLprim value spoc_cuda_debug_compile(value moduleSrc, value function_name, value gi){
	CAMLparam3(moduleSrc, function_name, gi);
	CUmodule module;
	CUfunction *kernel;
	char* functionN;
	char *ptx_source;
	const unsigned int jitNumOptions = 4;

	CUjit_option jitOptions[4];
	void *jitOptVals[4];
	int jitLogBufferSize;
	char *jitLogBuffer;
	int jitRegCount = 32;

	BLOCKING_CUDA_GET_CONTEXT;

	kernel = malloc(sizeof(CUfunction));
	functionN = String_val(function_name);
	ptx_source = String_val(moduleSrc);

	// set up size of compilation log buffer
	jitOptions[0] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
	jitLogBufferSize = 1024;
	jitOptVals[0] = (void *)(size_t)jitLogBufferSize;

	// set up pointer to the compilation log buffer
	jitOptions[1] = CU_JIT_INFO_LOG_BUFFER;
	jitLogBuffer = malloc(sizeof(char)*jitLogBufferSize);
	jitOptVals[1] = jitLogBuffer;

	// set up pointer to set the Maximum # of registers for a particular kernel
	jitOptions[2] = CU_JIT_MAX_REGISTERS;
	jitOptVals[2] = (void *)(size_t)jitRegCount;

	jitOptions[3] = CU_JIT_TARGET_FROM_CUCONTEXT;
	//CU_JIT_TARGET;
//	jitOptVals[3] =  (void*)(uintptr_t)CU_TARGET_COMPUTE_10;


	cuda_error = (cuModuleLoadDataEx(&module, ptx_source, jitNumOptions, jitOptions, (void **)jitOptVals));
	if (cuda_error)
	  {
	    fprintf (stderr,"%s\n", jitLogBuffer);
	    fflush (stderr);
	  }
	cuda_error = (cuModuleGetFunction(kernel, module, functionN));
	if (cuda_error)
	  {
	    fprintf (stderr, "%s\n", jitLogBuffer);
	    fflush (stderr);
	  }
	BLOCKING_CUDA_RESTORE_CONTEXT;
	free(jitLogBuffer);
	CAMLreturn((value) kernel);
}

开发者ID:archonSTB，项目名称:SPOC，代码行数:55，代码来源:Kernel_cuda.c

示例14: init_cuda

/*
 * Initializaiton in order to use kernel program 
 */
void
init_cuda(void){

  thread_num = (N <= 16) ? N : 16 ;  
  block_num = N / (thread_num*thread_num);
  if(N % (thread_num*thread_num) != 0) block_num++;
  
  res = cuInit(0);
  if(res != CUDA_SUCCESS){
    printf("cuInit failed: res = %s\n", conv(res));
    exit(1);
  }
  
  res = cuDeviceGet(&dev, 0);
  if(res != CUDA_SUCCESS){
    printf("cuDeviceGet failed: res = %s\n", conv(res));
    exit(1);
  }
  
  res = cuCtxCreate(&ctx, 0, dev);
  if(res != CUDA_SUCCESS){
    printf("cuCtxCreate failed: res = %s\n", conv(res));
    exit(1);
  }
  
  res = cuModuleLoad(&module, "./cuda_main.cubin");
  if(res != CUDA_SUCCESS){
    printf("cuModuleLoad() failed: res = %s\n", conv(res));
    exit(1);
  }
  
  res = cuModuleGetFunction(&function, module, "cuda_main");
  if(res != CUDA_SUCCESS){
    printf("cuModuleGetFunction() failed: res = %s\n",  conv(res));
    exit(1);
  }
  

  /* 
   * preparation for launch kernel 
   */
  res = cuFuncSetSharedSize(function, 0x40);  /* just random */
  if(res != CUDA_SUCCESS){
    printf("cuFuncSetSharedSize() failed: res = %s\n", conv(res));
    exit(1);
  }
  
  res = cuFuncSetBlockShape(function, thread_num, thread_num, 1);
  if(res != CUDA_SUCCESS){
    printf("cuFuncSetBlockShape() failed: res = %s\n", conv(res));
    exit(1);
  }

}

开发者ID:CPFL，项目名称:gtraffic，代码行数:57，代码来源:ov.c

示例15: readCubinFileFromBuffers

/*
 * Class:     edu_syr_pcpratts_rootbeer_runtime2_cuda_CudaRuntime2
 * Method:    loadFunction
 * Signature: ()V
 */
JNIEXPORT void JNICALL Java_edu_syr_pcpratts_rootbeer_runtime2_cuda_CudaRuntime2_loadFunction
  (JNIEnv *env, jobject this_obj, jlong heap_end_ptr, jobject buffers, jint size, 
   jint total_size, jint num_blocks){

  void * cubin_file;
  int offset;
  CUresult status;
  heapEndPtr = heap_end_ptr;
  
  //void * cubin_file = readCubinFile("code_file.cubin");
  cubin_file = readCubinFileFromBuffers(env, buffers, size, total_size);
  status = cuModuleLoadData(&cuModule, cubin_file);
  CHECK_STATUS(env,"error in cuModuleLoad",status)
  
  free(cubin_file);

  status = cuModuleGetFunction(&cuFunction, cuModule, "_Z5entryPcS_PiPxS1_S0_i"); 
  CHECK_STATUS(env,"error in cuModuleGetFunction",status)

  status = cuFuncSetCacheConfig(cuFunction, CU_FUNC_CACHE_PREFER_L1);
  CHECK_STATUS(env,"error in cuFuncSetCacheConfig",status)

  status = cuParamSetSize(cuFunction, (6 * sizeof(CUdeviceptr) + sizeof(int))); 
  CHECK_STATUS(env,"error in cuParamSetSize",status)

  offset = 0;
  status = cuParamSetv(cuFunction, offset, (void *) &gcInfoSpace, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env,"error in cuParamSetv gcInfoSpace",status)
  offset += sizeof(CUdeviceptr);

  status = cuParamSetv(cuFunction, offset, (void *) &gpuToSpace, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env,"error in cuParamSetv gpuToSpace",status)
  offset += sizeof(CUdeviceptr);

  status = cuParamSetv(cuFunction, offset, (void *) &gpuHandlesMemory, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env,"error in cuParamSetv gpuHandlesMemory %",status)
  offset += sizeof(CUdeviceptr);

  status = cuParamSetv(cuFunction, offset, (void *) &gpuHeapEndPtr, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env,"error in cuParamSetv gpuHeapEndPtr",status)
  offset += sizeof(CUdeviceptr);

  status = cuParamSetv(cuFunction, offset, (void *) &gpuBufferSize, sizeof(CUdeviceptr));
  CHECK_STATUS(env,"error in cuParamSetv gpuBufferSize",status)
  offset += sizeof(CUdeviceptr); 

  status = cuParamSetv(cuFunction, offset, (void *) &gpuExceptionsMemory, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env,"error in cuParamSetv gpuExceptionsMemory",status)
  offset += sizeof(CUdeviceptr);

  status = cuParamSeti(cuFunction, offset, num_blocks); 
  CHECK_STATUS(env,"error in cuParamSetv num_blocks",status)
  offset += sizeof(int);
}

开发者ID:breznak，项目名称:rootbeer1，代码行数:59，代码来源:CudaRuntime2.c

示例16: loadAndRunDualTestFunction

CUresult loadAndRunDualTestFunction(CUmodule *phModule, std::string name, CUdeviceptr &d_data0, 
				CUdeviceptr &d_data1, 
				DataStruct *h_data0, 
				DataStruct *h_data1, 
				unsigned int memSize, 
                                int thread_x=1,int thread_y=1,int thread_z=1,
                                int block_x=1, int block_y=1, int block_z=1)
{
  //  std::cout << "  Start Loading" << std::endl;

  // load data the to device
  cuMemcpyHtoD(d_data0, h_data0, memSize);         
  cuMemcpyHtoD(d_data1, h_data1, memSize);         

  // Locate the kernel entry point
  CUfunction phKernel = 0;
  CUresult status = cuModuleGetFunction(&phKernel, *phModule, name.data());
   if (status != CUDA_SUCCESS)
     {printf("ERROR: could not load function\n");}
    
  // Set the kernel parameters
  status = cuFuncSetBlockShape(phKernel, thread_x, thread_y, thread_z);
   if (status != CUDA_SUCCESS)
     {printf("ERROR: during setBlockShape\n");}

  int paramOffset = 0, size=0;

  size = sizeof(CUdeviceptr);
  status = cuParamSetv(phKernel, paramOffset, &d_data0, size);
  paramOffset += size;
  status = cuParamSetv(phKernel, paramOffset, &d_data1, size);
  paramOffset += size;



  status = cuParamSetSize(phKernel, paramOffset);
   if (status != CUDA_SUCCESS)
     {printf("ERROR: during cuParamSetv\n");}
    
  // Launch the kernel
  status = cuLaunchGrid(phKernel, block_x, block_y);
  if (status != CUDA_SUCCESS)
    {printf("ERROR: during grid launch\n");}

  //  std::cout << "  launched CUDA kernel!!" << std::endl;
  
  // Copy the result back to the host
  status = cuMemcpyDtoH(h_data0, d_data0, memSize);
  status = cuMemcpyDtoH(h_data1, d_data1, memSize);
  if (status != CUDA_SUCCESS)
    {printf("ERROR: during MemcpyDtoH\n");}
}

开发者ID:jrk，项目名称:llvmptxbackend，代码行数:52，代码来源:PTXBackendTestSuite.cpp

示例17: cuModuleGetFunction

CUresult compute_tran_temp
(CUmodule mod, CUdeviceptr MatrixPower, CUdeviceptr MatrixTemp[2], 
 int col, int row, int total_iterations, int num_iterations, int blockCols, 
 int blockRows, int borderCols, int borderRows) 
{
	int gdx = blockCols;
	int gdy = blockRows;
	int bdx = BLOCK_SIZE;
	int bdy = BLOCK_SIZE;
	
	float grid_height = chip_height / row;
	float grid_width = chip_width / col;
	
	float Cap = FACTOR_CHIP * SPEC_HEAT_SI * t_chip * grid_width * grid_height;
	float Rx = grid_width / (2.0 * K_SI * t_chip * grid_height);
	float Ry = grid_height / (2.0 * K_SI * t_chip * grid_width);
	float Rz = t_chip / (K_SI * grid_height * grid_width);
	
	float max_slope = MAX_PD / (FACTOR_CHIP * t_chip * SPEC_HEAT_SI);
	float step = PRECISION / max_slope;
	float t;
	float time_elapsed;
	time_elapsed=0.001;
	
	int src = 1, dst = 0;
	CUfunction f;
	CUresult res;

	res = cuModuleGetFunction(&f, mod, "_Z14calculate_tempiPfS_S_iiiiffffff");
	if (res != CUDA_SUCCESS) {
		printf("cuModuleGetFunction failed: res = %u\n", res);
		return 0;
	}
	
	for (t = 0; t < total_iterations; t+=num_iterations) {
		int it = MIN(num_iterations, total_iterations-t);
		int temp = src;
		src = dst;
		dst = temp;
		void *param[] = {&it, &MatrixPower, &MatrixTemp[src], &MatrixTemp[dst],
						 &col, &row, &borderCols, &borderRows, &Cap,
						 &Rx, &Ry, &Rz, &step, &time_elapsed};
		res = cuLaunchKernel(f, gdx, gdy, 1, bdx, bdy, 1, 0xc00, 0, 
							 (void**) param, NULL);
		if (res != CUDA_SUCCESS) {
			printf("cuLaunchKernel(euclid) failed: res = %u\n", res);
			return 0;
		}
	}

	return dst;
}

开发者ID:Constellation，项目名称:gdev-bench，代码行数:52，代码来源:hotspot_cuda.c

示例18: module_

GpuCompilationContext::GpuCompilationContext(const void* image,
                                             const std::string& kernel_name,
                                             const int device_id,
                                             const void* cuda_mgr,
                                             unsigned int num_options,
                                             CUjit_option* options,
                                             void** option_vals)
    : module_(nullptr), kernel_(nullptr), device_id_(device_id), cuda_mgr_(cuda_mgr) {
  static_cast<const CudaMgr_Namespace::CudaMgr*>(cuda_mgr_)->setContext(device_id_);
  checkCudaErrors(cuModuleLoadDataEx(&module_, image, num_options, options, option_vals));
  CHECK(module_);
  checkCudaErrors(cuModuleGetFunction(&kernel_, module_, kernel_name.c_str()));
}

开发者ID:kanak，项目名称:mapd-core，代码行数:13，代码来源:NvidiaKernel.cpp

示例19: __get_kernel

static CUfunction __get_kernel(void *user_context, const char* entry_name)
{
    CUfunction f;

    #ifdef DEBUG
    char msg[256];
    snprintf(msg, 256, "get_kernel %s (t=%lld)", entry_name, (long long)halide_current_time_ns(user_context) );
    #endif

    // Get kernel function ptr
    TIME_CALL( cuModuleGetFunction(&f, __mod, entry_name), msg );

    return f;
}

开发者ID:EEmmanuel7，项目名称:Halide，代码行数:14，代码来源:cuda.cpp

示例20: R_auto_cuModuleGetFunction

SEXP R_auto_cuModuleGetFunction(SEXP r_hfunc, SEXP r_hmod, SEXP r_name)
{
    SEXP r_ans = R_NilValue;
    CUfunction * hfunc = GET_REF(r_hfunc, CUfunction );
    CUmodule hmod = (CUmodule) getRReference(r_hmod);
    const char * name = CHAR(STRING_ELT(r_name, 0));
    
    CUresult ans;
    ans = cuModuleGetFunction(hfunc, hmod, name);
    
    r_ans = Renum_convert_CUresult(ans) ;
    
    return(r_ans);
}

开发者ID:PachoAlvarez，项目名称:RCUDA，代码行数:14，代码来源:autoModule.c

注：本文中的cuModuleGetFunction函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。

鲜花

握手

雷人

路过

鸡蛋

该文章已有0人参与评论

请发表评论

全部评论

专题导读

More+

10-27 六六分期app的软件客服如何联系？(六六分期

11-06 可心卡盟:win10系统火狐flash插件崩溃怎么

11-06 亲亲特价:怎么删除回收站图标

11-06 济南大学虚拟社区:鲁大师节能降温的具体办

11-06 xlueops.exe:无线网络安装向导

11-06 女斗合众国:win7系统cf与主机连接不稳定怎

11-06 0xc000022-[cf烟雾头]cf怎么调烟雾头

11-06 qizideyouhuo:应用程序无法正常启动0xc0000

11-06 ipz-185:win7系统vcf文件怎么打开

11-06 傻哥蹦迪:win10系统s4怎么打开usb调试

11-06 八神浩树gtaste:回收站清空了怎么恢复

11-06 妖尾之黑色守护:win10系统电脑没有1440x900

11-06 校园至尊魔王小说:win7系统浏览网页时字体

11-06 女斗合众国:win10系统访问共享文件夹提示请

11-06 tokyo hot n0654:恢复win7系统默认字体一招

11-06 雨酷仙境:设置win7系统转移临时文件夹腾出

11-06 阿穆纳伊之杖:win7系统开始菜单在右边还原

11-06 tunespotting:win10系统火狐flash插件总是

11-06 甘尔葛分析师：计谋网站seo关键词暴涨有什

11-06 蔡贵霖: 计谋网站seo关键词暴涨有什么秘密

11-06 博益网首页:ao3网页版进入不了解决方法

11-06 漏斗子专栏: 网站数据分析小白易懂精华篇

11-06 见证双虹怎么做:win7系统开启telnet命令的

11-06 颾狐蝶蜋:系统资源不足无法完成请求的服务

11-06 国光中学校歌:提交网站到alexa查询详细步骤

11-06 西安有情天:静态网页和动态网页的区别

11-06 红木雅尚斋:外部链接构造对网站的好处

11-06 前官礼遇：防止域名劫持–增强域安全性的10

11-06 密传二转答案: 中文分词算法有哪些

11-06 金泉家园邮编:百度快照劫持的表现及应对方

C++ cube函数代码示例发布时间：2022-05-30

C++ cuMemAlloc函数代码示例发布时间：2022-05-30

剪的笔顺,诠释剪的笔画,认识剪的部首

1 六六分期app的软件客服如何联系？(六六分期

六六分期app的软件客服如何联系？不知道吗？加qq群【895510560】即可！标题：六六分期

阅读：18098|2023-10-27

2 可心卡盟:win10系统火狐flash插件崩溃怎么

今天小编告诉大家如何处理win10系统火狐flash插件总是崩溃的问题，可能很多用户都不知

阅读：9618|2022-11-06

3 亲亲特价:怎么删除回收站图标

今天小编告诉大家如何对win10系统删除桌面回收站图标进行设置，可能很多用户都不知道

阅读：8153|2022-11-06

4 济南大学虚拟社区:鲁大师节能降温的具体办

今天小编告诉大家如何对win10系统电脑设置节能降温的设置方法，想必大家都遇到过需要

阅读：8531|2022-11-06

5 xlueops.exe:无线网络安装向导

我们在使用xp系统的过程中,经常需要对xp系统无线网络安装向导设置进行设置，可能很多

阅读：8432|2022-11-06

6 女斗合众国:win7系统cf与主机连接不稳定怎

今天小编告诉大家如何处理win7系统玩cf老是与主机连接不稳定的问题，可能很多用户都不

阅读：9347|2022-11-06

7 0xc000022-[cf烟雾头]cf怎么调烟雾头

电脑对日常生活的重要性小编就不多说了，可是一旦碰到win7系统设置cf烟雾头的问题，很

阅读：8397|2022-11-06

8 qizideyouhuo:应用程序无法正常启动0xc0000

我们在日常使用电脑的时候，有的小伙伴们可能在打开应用的时候会遇见提示应用程序无法

阅读：7834|2022-11-06

9 ipz-185:win7系统vcf文件怎么打开

今天小编告诉大家如何对win7系统打开vcf文件进行设置，可能很多用户都不知道怎么对win

阅读：8387|2022-11-06

10 傻哥蹦迪:win10系统s4怎么打开usb调试

今天小编告诉大家如何对win10系统s4开启USB调试模式进行设置，可能很多用户都不知道怎

阅读：7380|2022-11-06

客服电话

电子邮件

C++ cuModuleGetFunction函数代码示例

示例1: loadCUDAModules

示例2: cuModuleGetFunction

示例3: cuModuleGetFunction

示例4: CreateCuFunction

示例5: main

示例6: halide_dev_run

示例7: cuda_over_map

示例8: initCuda

示例9: ptxJIT

示例10: main

示例11: TestSAXPY

示例12: cuModuleLoad

示例13: spoc_cuda_debug_compile

示例14: init_cuda

示例15: readCubinFileFromBuffers

示例16: loadAndRunDualTestFunction

示例17: cuModuleGetFunction

示例18: module_

示例19: __get_kernel

示例20: R_auto_cuModuleGetFunction

请发表评论

全部评论

上一篇：

下一篇：

GalliumOS/linux: The Linux kernel, with

dpull/lua-webclient: Nonblocking lua htt

qier222/YesPlayMusic: 高颜值的第三方网易

avnyadav/machine_learning_project: This

星组词，描写星星的成语大全

剪的笔顺,诠释剪的笔画,认识剪的部首

六六分期app的软件客服如何联系？(六六分期

florent37/ViewAnimator: A fluent Android

florent37/Shrine-MaterialDesign2: implem

CVE-2020-36276

SimpleSoftwareIO/simple-sms: Send and re

关于我们

产品与服务

解决方案

139-2527-9053