本文整理汇总了C++中clCreateKernel函数的典型用法代码示例。如果您正苦于以下问题:C++ clCreateKernel函数的具体用法?C++ clCreateKernel怎么用?C++ clCreateKernel使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了clCreateKernel函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: CreateProgramAndKernel
// Function to read in kernel from uncompiled source, create the OCL program and build the OCL program
// **************************************************************************************************
int CreateProgramAndKernel(cl_context cxGPUContext, cl_device_id* cdDevices, const char *kernel_name, cl_kernel *kernel, bool bDouble)
{
cl_program cpProgram;
size_t szSourceLen;
cl_int ciErrNum = CL_SUCCESS;
// Read the kernel in from file
shrLog("\nLoading Uncompiled kernel from .cl file, using %s\n", clSourcefile);
char* cPathAndFile = shrFindFilePath(clSourcefile, cExecutablePath);
oclCheckError(cPathAndFile != NULL, shrTRUE);
char* pcSource = oclLoadProgSource(cPathAndFile, "", &szSourceLen);
oclCheckError(pcSource != NULL, shrTRUE);
// Check OpenCL version -> vec3 types are supported only from version 1.1 and above
char cOCLVersion[32];
clGetDeviceInfo(cdDevices[0], CL_DEVICE_VERSION, sizeof(cOCLVersion), &cOCLVersion, 0);
int iVec3Length = 3;
if( strncmp("OpenCL 1.0", cOCLVersion, 10) == 0 ) {
iVec3Length = 4;
}
//for double precision
char *pcSourceForDouble;
std::stringstream header;
if (bDouble)
{
header << "#define REAL double";
header << std::endl;
header << "#define REAL4 double4";
header << std::endl;
header << "#define REAL3 double" << iVec3Length;
header << std::endl;
header << "#define ZERO3 {0.0, 0.0, 0.0" << ((iVec3Length == 4) ? ", 0.0}" : "}");
header << std::endl;
}
else
{
header << "#define REAL float";
header << std::endl;
header << "#define REAL4 float4";
header << std::endl;
header << "#define REAL3 float" << iVec3Length;
header << std::endl;
header << "#define ZERO3 {0.0f, 0.0f, 0.0f" << ((iVec3Length == 4) ? ", 0.0f}" : "}");
header << std::endl;
}
header << pcSource;
pcSourceForDouble = (char *)malloc(header.str().size() + 1);
szSourceLen = header.str().size();
#ifdef WIN32
strcpy_s(pcSourceForDouble, szSourceLen + 1, header.str().c_str());
#else
strcpy(pcSourceForDouble, header.str().c_str());
#endif
// create the program
cpProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&pcSourceForDouble, &szSourceLen, &ciErrNum);
oclCheckError(ciErrNum, CL_SUCCESS);
shrLog("clCreateProgramWithSource\n");
// Build the program with 'mad' Optimization option
#ifdef MAC
char *flags = "-cl-fast-relaxed-math -DMAC";
#else
char *flags = "-cl-fast-relaxed-math";
#endif
ciErrNum = clBuildProgram(cpProgram, 0, NULL, flags, NULL, NULL);
if (ciErrNum != CL_SUCCESS)
{
// write out standard error, Build Log and PTX, then cleanup and exit
shrLogEx(LOGBOTH | ERRORMSG, ciErrNum, STDERROR);
oclLogBuildInfo(cpProgram, oclGetFirstDev(cxGPUContext));
oclLogPtx(cpProgram, oclGetFirstDev(cxGPUContext), "oclNbody.ptx");
oclCheckError(ciErrNum, CL_SUCCESS);
}
shrLog("clBuildProgram\n");
// create the kernel
*kernel = clCreateKernel(cpProgram, kernel_name, &ciErrNum);
oclCheckError(ciErrNum, CL_SUCCESS);
shrLog("clCreateKernel\n");
size_t wgSize;
ciErrNum = clGetKernelWorkGroupInfo(*kernel, cdDevices[0], CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wgSize, NULL);
if (wgSize == 64) {
shrLog(
"ERROR: Minimum work-group size 256 required by this application is not supported on this device.\n");
exit(0);
}
free(pcSourceForDouble);
return 0;
}
开发者ID:AswinMohanASU,项目名称:nvidia-opencl-examples,代码行数:99,代码来源:oclBodySystemOpenclLaunch.cpp
示例2: calloc
//.........这里部分代码省略.........
memcpy(&length, w + 289, 4);
w = binaries[gpu]; remaining = binary_sizes[gpu];
if (!advance(&w, &remaining, "ELF"))
{patchbfi = 0; goto build;}
w++; remaining--;
if (!advance(&w, &remaining, "ELF")) {
/* 32 bit builds only one ELF */
w--; remaining++;
}
w--; remaining++;
w += start; remaining -= start;
if (opt_debug)
applog(LOG_DEBUG, "At %p (%u rem. bytes), to begin patching",
w, remaining);
patch_opcodes(w, length);
status = clReleaseProgram(clState->program);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error: Releasing program. (clReleaseProgram)");
return NULL;
}
clState->program = clCreateProgramWithBinary(clState->context, 1, &devices[gpu], &binary_sizes[gpu], (const unsigned char **)&binaries[gpu], &status, NULL);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error: Loading Binary into cl_program (clCreateProgramWithBinary)");
return NULL;
}
clRetainProgram(clState->program);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error: Retaining Program (clRetainProgram)");
return NULL;
}
}
free(source);
free(rawsource);
/* Save the binary to be loaded next time */
binaryfile = fopen(binaryfilename, "wb");
if (!binaryfile) {
/* Not a fatal problem, just means we build it again next time */
if (opt_debug)
applog(LOG_DEBUG, "Unable to create file %s", binaryfilename);
} else {
if (unlikely(fwrite(binaries[gpu], 1, binary_sizes[gpu], binaryfile) != binary_sizes[gpu])) {
applog(LOG_ERR, "Unable to fwrite to binaryfile");
return NULL;
}
fclose(binaryfile);
}
if (binaries[gpu])
free(binaries[gpu]);
built:
free(binaries);
free(binary_sizes);
applog(LOG_INFO, "Initialising kernel %s with%s BFI_INT patching, %d vectors and worksize %d",
filename, patchbfi ? "" : "out", clState->preferred_vwidth, clState->work_size);
/* create a cl program executable for all the devices specified */
status = clBuildProgram(clState->program, 1, &devices[gpu], NULL, NULL, NULL);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error: Building Program (clBuildProgram)");
size_t logSize;
status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
char *log = malloc(logSize);
status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, logSize, log, NULL);
applog(LOG_INFO, "%s", log);
return NULL;
}
/* get a kernel object handle for a kernel with the given name */
clState->kernel = clCreateKernel(clState->program, "search", &status);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error: Creating Kernel from program. (clCreateKernel)");
return NULL;
}
/////////////////////////////////////////////////////////////////
// Create an OpenCL command queue
/////////////////////////////////////////////////////////////////
clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu],
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &status);
if (status != CL_SUCCESS) /* Try again without OOE enable */
clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu], 0 , &status);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Creating Command Queue. (clCreateCommandQueue)");
return NULL;
}
clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, BUFFERSIZE, NULL, &status);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error: clCreateBuffer (outputBuffer)");
return NULL;
}
return clState;
}
开发者ID:furyan,项目名称:cgminer,代码行数:101,代码来源:ocl.c
示例3: main
//.........这里部分代码省略.........
if (ret != CL_SUCCESS)
{
printf("error: call to 'clCreateProgramWithSource' failed\n");
exit(1);
}
printf("program=%p\n", program);
/* Build program */
ret = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
if (ret != CL_SUCCESS )
{
size_t size;
char *log;
/* Get log size */
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,0, NULL, &size);
/* Allocate log and print */
log = malloc(size);
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,size, log, NULL);
printf("error: call to 'clBuildProgram' failed:\n%s\n", log);
/* Free log and exit */
free(log);
exit(1);
}
printf("program built\n");
printf("\n");
/* Create a Kernel Object */
cl_kernel kernel;
kernel = clCreateKernel(program, "logb_float", &ret);
if (ret != CL_SUCCESS)
{
printf("error: call to 'clCreateKernel' failed\n");
exit(1);
}
/* Create and allocate host buffers */
size_t num_elem = 10;
/* Create and init host side src buffer 0 */
cl_float *src_0_host_buffer;
src_0_host_buffer = malloc(num_elem * sizeof(cl_float));
for (int i = 0; i < num_elem; i++)
src_0_host_buffer[i] = (cl_float)(2.0);
/* Create and init device side src buffer 0 */
cl_mem src_0_device_buffer;
src_0_device_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, num_elem * sizeof(cl_float), NULL, &ret);
if (ret != CL_SUCCESS)
{
printf("error: could not create source buffer\n");
exit(1);
}
ret = clEnqueueWriteBuffer(command_queue, src_0_device_buffer, CL_TRUE, 0, num_elem * sizeof(cl_float), src_0_host_buffer, 0, NULL, NULL);
if (ret != CL_SUCCESS)
{
printf("error: call to 'clEnqueueWriteBuffer' failed\n");
exit(1);
}
/* Create host dst buffer */
cl_float *dst_host_buffer;
开发者ID:xianggong,项目名称:m2c-llvm-devtools-host,代码行数:67,代码来源:logb_float_src.c
示例4: clCreateFromGLTexture3D
void OpenCLExecuter::ocl_filter_shared(void)
{
cl_int err; // debugging variables
size_t szParmDataBytes; // Byte size of context information
cl_mem src_buffer; // OpenCL device source buffer
cl_mem dst_buffer; // OpenCL device source buffer
cl_sampler sampler; // OpenCL sampler
cl_kernel ckKernel; // OpenCL kernel
int iNumElements = volobj->texwidth*volobj->texheight*volobj->texdepth; // Length of float arrays to process
// set Local work size dimensions
// size_t local_threads[3] ={256,256,64};
// set Global work size dimensions
// size_t global_threads[3] ={roundup((int) volobj->texwidth/local_threads[0], 0)*local_threads[0], roundup((int) volobj->texheight/local_threads[1], 0)*local_threads[1], roundup((int) volobj->texdepth/local_threads[2], 0)*local_threads[2]};
// set Global work size dimensions
size_t global_threads[3] ={volobj->texwidth, volobj->texheight, volobj->texdepth};
// allocate the source buffer memory object
src_buffer = clCreateFromGLTexture3D (ocl_wrapper->context, CL_MEM_READ_WRITE, GL_TEXTURE_3D, 0, volobj->TEXTURE3D_RED, &err);
printf("OPENCL: clCreateFromGLTexture3D: %s\n", ocl_wrapper->get_error(err));
// allocate the destination buffer memory object
dst_buffer = clCreateBuffer (ocl_wrapper->context, CL_MEM_READ_WRITE, sizeof(unsigned char) * iNumElements, NULL, &err);
printf("OPENCL: clCreateBuffer: %s\n", ocl_wrapper->get_error(err));
// create a sampler object
sampler = clCreateSampler(ocl_wrapper->context, CL_FALSE, CL_ADDRESS_CLAMP, CL_FILTER_NEAREST, &err);
printf("OPENCL: clCreateSampler: %s\n", ocl_wrapper->get_error(err));
// Create the kernel
ckKernel = clCreateKernel (cpProgram, "myFunc", &err);
printf("OPENCL: clCreateKernel: %s\n", ocl_wrapper->get_error(err));
// Set the Argument values
err = clSetKernelArg (ckKernel, 0, sizeof(cl_mem), (void*)&src_buffer);
printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
err = clSetKernelArg (ckKernel, 1, sizeof(cl_mem), (void*)&dst_buffer);
printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
err = clSetKernelArg (ckKernel, 2, sizeof(sampler), (void*)&sampler);
printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
size_t local;
err = clGetKernelWorkGroupInfo(ckKernel, ocl_wrapper->devices[ocl_wrapper->deviceUsed], CL_KERNEL_LOCAL_MEM_SIZE , sizeof(local), &local, NULL);
printf("OPENCL: clGetKernelWorkGroupInfo (kernel memory): %s\n", ocl_wrapper->get_error(err));
printf("OPENCL: Kernel local memory use: %d Bytes\n", (int)local);
// grab input data from OpenGL, compute, copy the results back to OpenGL
// Runs asynchronous to host, up until blocking clFinish at the end
glFinish();
glFlush();
// grab the OpenGL texture object for read/writing from OpenCL
err = clEnqueueAcquireGLObjects(ocl_wrapper->commandQue, 1, &src_buffer, 0,NULL,NULL);
printf("OPENCL: clEnqueueAcquireGLObjects: %s\n", ocl_wrapper->get_error(err));
// Execute a kernel
err = clEnqueueNDRangeKernel (ocl_wrapper->commandQue, ckKernel, 3, NULL, global_threads, NULL, 0, NULL, NULL);
printf("OPENCL: clEnqueueNDRangeKernel: %s\n", ocl_wrapper->get_error(err));
/*
// Blocking read of results from GPU to Host
int size = volobj->texwidth*volobj->texheight*volobj->texdepth;
unsigned char* result = new unsigned char[size];
err = clEnqueueReadBuffer (ocl_wrapper->commandQue, dst_buffer, CL_TRUE, 0, sizeof(unsigned char) * iNumElements, result, 0, NULL, NULL);
printf("OPENCL: clEnqueueReadBuffer: %s\n", ocl_wrapper->get_error(err));
for(int i=0; i<size; i++) volobj->texture3d[3*i+0] = result[i];
delete[] result;
*/
// copy OpenCL buffer to OpenGl texture
size_t corigin[3] = {0,0,0};
size_t cdimensions[3] = {(unsigned int)volobj->texwidth, (unsigned int)volobj->texheight, (unsigned int)volobj->texdepth};
err = clEnqueueCopyBufferToImage(ocl_wrapper->commandQue , dst_buffer, src_buffer, 0, corigin, cdimensions, 0, NULL, NULL);
printf("OPENCL: clEnqueueCopyBufferToImage: %s\n", ocl_wrapper->get_error(err));
//make sure we block until we are done.
//err = clFinish(ocl_wrapper->commandQue);
//printf("OPENCL: clFinish: %s\n", ocl_wrapper->get_error(err));
//release opengl objects now
err = clEnqueueReleaseGLObjects(ocl_wrapper->commandQue, 1, &src_buffer, 0,0,0);
printf("OPENCL: clEnqueueAcquireGLObjects: %s\n", ocl_wrapper->get_error(err));
// Cleanup allocated objects
printf("OPENCL: Releasing kernel memory\n");
if(ckKernel)clReleaseKernel(ckKernel);
//need to release any other OpenCL memory objects here
if(src_buffer)clReleaseMemObject(src_buffer);
if(dst_buffer)clReleaseMemObject(dst_buffer);
}
开发者ID:ut666,项目名称:VolViewer,代码行数:94,代码来源:OpenCLExecuter.cpp
示例5: roundup
void OpenCLExecuter::ocl_parrallelReduction(void)
{
cl_int err; // debugging variables
size_t szParmDataBytes; // Byte size of context information
cl_mem src_buffer; // OpenCL device source buffer
cl_mem tmp_buffer; // OpenCL device source buffer
cl_mem dst_buffer; // OpenCL device source buffer
size_t szGlobalWorkSize; // 1D var for Total # of work items
size_t szLocalWorkSize; // 1D var for # of work items in the work group
size_t numWorkGroups;
cl_kernel ckKernel; // OpenCL kernel
int iNumElements = 65536; //65536 // Length of float arrays to process
// set Local work size dimensions
szLocalWorkSize = 512;
// set Global work size dimensions
szGlobalWorkSize = roundup((int) iNumElements/szLocalWorkSize, 0)*szLocalWorkSize;
//szGlobalWorkSize = iNumElements;
numWorkGroups = (float)szGlobalWorkSize/(float)szLocalWorkSize;
printf("OPENCL: number of elements: %d\n", (int)iNumElements);
printf("OPENCL: local worksize: %d\n", (int)szLocalWorkSize);
printf("OPENCL: global worksize: %d\n", (int)szGlobalWorkSize);
printf("OPENCL: work groups: %d\n", (int)(numWorkGroups));
//temp array
int* data = new int[iNumElements];
for(int i=0; i<iNumElements; i++)
data[i] = randomFloat(1.0, (float)iNumElements);
data[iNumElements/2] = -100.0;
//for(int i=0; i<iNumElements; i++)
// printf("data: %d\n", data[i]);
size_t global_threads[1] ={iNumElements};
// allocate the source buffer memory object
src_buffer = clCreateBuffer (ocl_wrapper->context, CL_MEM_READ_ONLY, sizeof(int) * iNumElements, NULL, &err);
printf("OPENCL: clCreateBuffer: %s\n", ocl_wrapper->get_error(err));
// allocate the temp buffer memory object
tmp_buffer = clCreateBuffer (ocl_wrapper->context, CL_MEM_READ_WRITE, sizeof(int) * iNumElements, NULL, &err);
printf("OPENCL: clCreateBuffer: %s\n", ocl_wrapper->get_error(err));
// allocate the destination buffer memory object
dst_buffer = clCreateBuffer (ocl_wrapper->context, CL_MEM_WRITE_ONLY, sizeof(int) * iNumElements, NULL, &err);
printf("OPENCL: clCreateBuffer: %s\n", ocl_wrapper->get_error(err));
// Create the kernel
ckKernel = clCreateKernel (cpProgram, "min_reduce", &err);
printf("OPENCL: clCreateKernel: %s\n", ocl_wrapper->get_error(err));
// Set the Argument values
err = clSetKernelArg (ckKernel, 0, sizeof(cl_mem), (void*)&src_buffer);
printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
err = clSetKernelArg (ckKernel, 1, sizeof(int)*szLocalWorkSize, NULL);
printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
err = clSetKernelArg (ckKernel, 2, sizeof(int), (void*)&iNumElements);
printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
err = clSetKernelArg (ckKernel, 3, sizeof(cl_mem), (void*)&dst_buffer);
printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
// Copy input data to GPU, compute, copy results back
// Runs asynchronous to host, up until blocking read at end
int numb_iterations = sqrt((float)numWorkGroups);
numb_iterations=0;
bool cont = true;
Timer timer;
timer.startTimer();
//for(int i=0; i<numb_iterations; i++)
while(cont)
{
// Write data from host to GPU
err = clEnqueueWriteBuffer (ocl_wrapper->commandQue, src_buffer, CL_FALSE, 0, sizeof(int) * iNumElements, data, 0, NULL, NULL);
printf("OPENCL: clEnqueueWriteBuffer: %s\n", ocl_wrapper->get_error(err));
// Launch kernel
err = clEnqueueNDRangeKernel (ocl_wrapper->commandQue, ckKernel, 1, NULL, &szGlobalWorkSize, &szLocalWorkSize, 0, NULL, NULL);
printf("OPENCL: clEnqueueNDRangeKernel: %s\n", ocl_wrapper->get_error(err));
// Blocking read of results from GPU to Host
err = clEnqueueReadBuffer (ocl_wrapper->commandQue, dst_buffer, CL_TRUE, 0, sizeof(int) * iNumElements, data, 0, NULL, NULL);
printf("OPENCL: clEnqueueReadBuffer: %s\n", ocl_wrapper->get_error(err));
numb_iterations++;
if(data[1]==0) cont = false;
//printf("min: %d\n", data[0]);
for(int i=0; i<numWorkGroups; i++)
printf("min: %d\n", data[i]);
}
timer.endTimer("GPU find min");
timer.startTimer();
int min=iNumElements;
for(int i=0; i<iNumElements; i++)
//.........这里部分代码省略.........
开发者ID:ut666,项目名称:VolViewer,代码行数:101,代码来源:OpenCLExecuter.cpp
示例6: printf
void OpenCLExecuter::ocl_filter_multi(void)
{
cl_int err; // debugging variables
size_t szParmDataBytes; // Byte size of context information
cl_mem src_buffer[MAX_DEVICES]; // OpenCL device source buffer
cl_mem dst_buffer[MAX_DEVICES]; // OpenCL device source buffer
cl_command_queue queues[MAX_DEVICES]; // OpenCL device queue
cl_kernel ckKernel[MAX_DEVICES]; // OpenCL kernel
cl_event gpuDone[MAX_DEVICES];
// int iNumElements = volobj->texwidth*volobj->texheight*volobj->texdepth*3; // Length of float arrays to process
int xdim, ydim, zdim;
xdim = (float)volobj->texwidth; // (float)ocl_wrapper->numDevices;
ydim = (float)volobj->texheight; // (float)ocl_wrapper->numDevices;
zdim = (float)volobj->texdepth / (float)ocl_wrapper->numDevices;
//Length of array to process
int iNumElements = (xdim*ydim*zdim);
size_t global_threads[3] = {xdim, ydim, zdim};
//temp array
unsigned char** data = new unsigned char*[ocl_wrapper->numDevices];
for(int i=0; i<ocl_wrapper->numDevices; i++)
data[i] = new unsigned char[iNumElements];
for(int i=0; i<ocl_wrapper->numDevices; i++)
{
printf("OPENCL: Computing Device%d\n", i);
//create the command queue we will use to execute OpenCL commands
queues[i] = clCreateCommandQueue(ocl_wrapper->context, ocl_wrapper->devices[i], 0, &err);
printf("OPENCL: clCreateCommandQueue: %s\n", ocl_wrapper->get_error(err));
// allocate the source buffer memory object
src_buffer[i] = clCreateBuffer (ocl_wrapper->context, CL_MEM_READ_ONLY, sizeof(unsigned char) * iNumElements, NULL, &err);
printf("OPENCL: clCreateBuffer: %s\n", ocl_wrapper->get_error(err));
// allocate the destination buffer memory object
dst_buffer[i] = clCreateBuffer (ocl_wrapper->context, CL_MEM_WRITE_ONLY, sizeof(unsigned char) * iNumElements, NULL, &err);
printf("OPENCL: clCreateBuffer: %s\n", ocl_wrapper->get_error(err));
// Create the kernel
ckKernel[i] = clCreateKernel (cpProgram, "myFunc", &err);
printf("OPENCL: clCreateKernel: %s\n", ocl_wrapper->get_error(err));
// Set the Argument values
err = clSetKernelArg (ckKernel[i], 0, sizeof(cl_mem), (void*)&src_buffer[i]);
printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
err = clSetKernelArg (ckKernel[i], 1, sizeof(cl_mem), (void*)&dst_buffer[i]);
printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
err = clSetKernelArg (ckKernel[i], 2, sizeof(int), (void*)&global_threads[0]);
printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
err = clSetKernelArg (ckKernel[i], 3, sizeof(int), (void*)&global_threads[1]);
printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
err = clSetKernelArg (ckKernel[i], 4, sizeof(int), (void*)&global_threads[2]);
printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
//Prepare data to upload
int iOffsetElements = (xdim*ydim*zdim*i);
for(int j=iOffsetElements; j<iNumElements+iOffsetElements; j++)
data[i][j-iOffsetElements] = volobj->texture3d[3*j+0];
// Write data from host to GPU
err = clEnqueueWriteBuffer (queues[i], src_buffer[i], CL_FALSE, 0, sizeof(unsigned char) * iNumElements, data[i], 0, NULL, NULL);
printf("OPENCL: clEnqueueWriteBuffer: %s\n", ocl_wrapper->get_error(err));
}
for(int i=0; i<ocl_wrapper->numDevices; i++)
{
// Launch kernel
err = clEnqueueNDRangeKernel (queues[i], ckKernel[i], 3, NULL, global_threads, NULL, 0, NULL, NULL);
printf("OPENCL: clEnqueueNDRangeKernel: %s\n", ocl_wrapper->get_error(err));
}
for(int i=0; i<ocl_wrapper->numDevices; i++)
{
// Blocking read of results from GPU to Host
err = clEnqueueReadBuffer (queues[i], dst_buffer[i], CL_TRUE, 0, sizeof(unsigned char) * iNumElements, data[i], 0, NULL, &gpuDone[i]);
printf("OPENCL: clEnqueueReadBuffer: %s\n", ocl_wrapper->get_error(err));
}
// Synchronize with the GPUs
printf("OPENCL: Waiting for devices to sync\n");
clWaitForEvents(ocl_wrapper->numDevices, gpuDone);
for(int i=0; i<ocl_wrapper->numDevices; i++)
{
//read data back
int iOffsetElements = (xdim*ydim*zdim*i);
for(int j=iOffsetElements; j<iNumElements+iOffsetElements; j++)
volobj->texture3d[3*j+0] = data[i][j-iOffsetElements];
}
for(int i=0; i<ocl_wrapper->numDevices; i++)
{
// Cleanup allocated objects
//.........这里部分代码省略.........
开发者ID:ut666,项目名称:VolViewer,代码行数:101,代码来源:OpenCLExecuter.cpp
示例7: clCreateBuffer
void OpenCLExecuter::ocl_filterBoundingBox(int channel, int window_size)
{
cl_int err; // debugging variables
size_t szParmDataBytes; // Byte size of context information
cl_mem src_buffer; // OpenCL device source buffer
cl_mem bbmin_buffer; // OpenCL device source buffer
cl_mem bbmax_buffer; // OpenCL device source buffer
size_t szGlobalWorkSize; // 1D var for Total # of work items
size_t szLocalWorkSize; // 1D var for # of work items in the work group
cl_kernel ckKernel; // OpenCL kernel
cl_int4 minbb;
cl_int4 maxbb;
minbb.s[0] = minbb.s[1] = minbb.s[2] = 8192;
maxbb.s[0] = maxbb.s[1] = maxbb.s[2] = -8192;
int iNumElements = 3*volobj->texwidth*volobj->texheight*volobj->texdepth; // Length of float arrays to process
size_t global_threads[3] ={volobj->texwidth, volobj->texheight, volobj->texdepth};
// allocate the source buffer memory object
src_buffer = clCreateBuffer (ocl_wrapper->context, CL_MEM_READ_ONLY, sizeof(unsigned char) * iNumElements, NULL, &err);
printf("OPENCL: clCreateBuffer: %s\n", ocl_wrapper->get_error(err));
// allocate the destination buffer memory object
bbmin_buffer = clCreateBuffer (ocl_wrapper->context, CL_MEM_READ_WRITE, sizeof(cl_int4), NULL, &err);
printf("OPENCL: clCreateBuffer: %s\n", ocl_wrapper->get_error(err));
bbmax_buffer = clCreateBuffer (ocl_wrapper->context, CL_MEM_READ_WRITE, sizeof(cl_int4), NULL, &err);
printf("OPENCL: clCreateBuffer: %s\n", ocl_wrapper->get_error(err));
// Create the kernel
ckKernel = clCreateKernel (cpProgram, "myFunc", &err);
printf("OPENCL: clCreateKernel: %s\n", ocl_wrapper->get_error(err));
// Set the Argument values
err = clSetKernelArg (ckKernel, 0, sizeof(cl_mem), (void*)&src_buffer);
printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
err = clSetKernelArg (ckKernel, 1, sizeof(cl_mem), (void*)&bbmin_buffer);
printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
err = clSetKernelArg (ckKernel, 2, sizeof(cl_mem), (void*)&bbmax_buffer);
printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
err = clSetKernelArg (ckKernel, 2, sizeof(int), (void*)&volobj->texwidth);
printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
err = clSetKernelArg (ckKernel, 3, sizeof(int), (void*)&volobj->texheight);
printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
err = clSetKernelArg (ckKernel, 4, sizeof(int), (void*)&volobj->texdepth);
printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
err = clSetKernelArg (ckKernel, 5, sizeof(int), (void*)&channel);
printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
// Copy input data to GPU, compute, copy results back
// Runs asynchronous to host, up until blocking read at end
// Write data from host to GPU
err = clEnqueueWriteBuffer (ocl_wrapper->commandQue, src_buffer, CL_FALSE, 0, sizeof(unsigned char) * iNumElements, volobj->texture3d, 0, NULL, NULL);
printf("OPENCL: clEnqueueWriteBuffer: %s\n", ocl_wrapper->get_error(err));
err = clEnqueueWriteBuffer (ocl_wrapper->commandQue, bbmin_buffer, CL_FALSE, 0, sizeof(cl_int4), (void*)&minbb, 0, NULL, NULL);
printf("OPENCL: clEnqueueWriteBuffer: %s\n", ocl_wrapper->get_error(err));
err = clEnqueueWriteBuffer (ocl_wrapper->commandQue, bbmax_buffer, CL_FALSE, 0, sizeof(cl_int4), (void*)&maxbb, 0, NULL, NULL);
printf("OPENCL: clEnqueueWriteBuffer: %s\n", ocl_wrapper->get_error(err));
// Launch kernel
err = clEnqueueNDRangeKernel (ocl_wrapper->commandQue, ckKernel, 3, NULL, global_threads, NULL, 0, NULL, NULL);
printf("OPENCL: clEnqueueNDRangeKernel: %s\n", ocl_wrapper->get_error(err));
// Blocking read of results from GPU to Host
err = clEnqueueReadBuffer (ocl_wrapper->commandQue, bbmin_buffer, CL_TRUE, 0, sizeof(cl_int4), (void*)&minbb, 0, NULL, NULL);
printf("OPENCL: clEnqueueReadBuffer: %s\n", ocl_wrapper->get_error(err));
err = clEnqueueReadBuffer (ocl_wrapper->commandQue, bbmax_buffer, CL_TRUE, 0, sizeof(cl_int4), (void*)&maxbb, 0, NULL, NULL);
printf("OPENCL: clEnqueueReadBuffer: %s\n", ocl_wrapper->get_error(err));
// Cleanup allocated objects
printf("OPENCL: Releasing kernel memory\n");
if(ckKernel)clReleaseKernel(ckKernel);
//need to release any other OpenCL memory objects here
if(src_buffer)clReleaseMemObject(src_buffer);
if(bbmin_buffer)clReleaseMemObject(bbmin_buffer);
if(bbmax_buffer)clReleaseMemObject(bbmax_buffer);
maxbb.s[0] += (float)window_size/2.0;
maxbb.s[1] += (float)window_size/2.0;
maxbb.s[2] += (float)window_size/2.0;
minbb.s[0] -= (float)window_size/2.0;
minbb.s[1] -= (float)window_size/2.0;
minbb.s[2] -= (float)window_size/2.0;
maxbb.s[0] += 2;
maxbb.s[1] += 2;
maxbb.s[2] += 2;
minbb.s[0] -= 2;
minbb.s[1] -= 2;
minbb.s[2] -= 2;
//.........这里部分代码省略.........
开发者ID:ut666,项目名称:VolViewer,代码行数:101,代码来源:OpenCLExecuter.cpp
示例8: main
int main(void) {
// se crea los 2 vectores de entrada
int i;
const int LIST_SIZE = 1024;
int *A = (int*)malloc(sizeof(int)*LIST_SIZE);
int *B = (int*)malloc(sizeof(int)*LIST_SIZE);
for(i = 0; i < LIST_SIZE; i++) {
A[i] = i;
B[i] = LIST_SIZE - i;
}
// cargamos el kernel en source_str
FILE *fp;
char *source_str;
size_t source_size;
fp = fopen("vector_add_kernel.cl", "r");
if (!fp) {
fprintf(stderr, "Failed to load kernel.\n");
exit(1);
}
source_str = (char*)malloc(MAX_SOURCE_SIZE);
source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);
fclose( fp );
// obtenemos las plataformas y informacion de los devices
cl_platform_id platform_id = NULL;
cl_device_id device_id = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 1,
&device_id, &ret_num_devices);
// creamos un contexto OpenCL
cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
// creamos la cola de comandos
cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
// creamos el buffer de memoria en el device para cada vector
cl_mem a_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY,
LIST_SIZE * sizeof(int), NULL, &ret);
cl_mem b_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY,
LIST_SIZE * sizeof(int), NULL, &ret);
cl_mem c_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
LIST_SIZE * sizeof(int), NULL, &ret);
// copiamos los vectores A y B a sus respectivas memorias buffer
ret = clEnqueueWriteBuffer(command_queue, a_mem_obj, CL_TRUE, 0,
LIST_SIZE * sizeof(int), A, 0, NULL, NULL);
ret = clEnqueueWriteBuffer(command_queue, b_mem_obj, CL_TRUE, 0,
LIST_SIZE * sizeof(int), B, 0, NULL, NULL);
// creamos un programa para el kernel
cl_program program = clCreateProgramWithSource(context, 1,
(const char **)&source_str, (const size_t *)&source_size, &ret);
// generamos el programa
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
// creamos el kernel
cl_kernel kernel = clCreateKernel(program, "vector_add", &ret);
// establecemos los argumentos del kernel
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_mem_obj);
ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_mem_obj);
ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_mem_obj);
// ejecutamos el kernel de la lista
size_t global_item_size = LIST_SIZE;
size_t local_item_size = 64; // dividimos los work items en grupos de 64
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,
&global_item_size, &local_item_size, 0, NULL, NULL);
// copiamos la memoria buffer C del device hacia la variable local C
int *C = (int*)malloc(sizeof(int)*LIST_SIZE);
ret = clEnqueueReadBuffer(command_queue, c_mem_obj, CL_TRUE, 0,
LIST_SIZE * sizeof(int), C, 0, NULL, NULL);
// muestra el resultado
for(i = 0; i < LIST_SIZE; i++)
printf("%d + %d = %d\n", A[i], B[i], C[i]);
free(A);
free(B);
free(C);
return 0;
}
开发者ID:pioh123,项目名称:parallel,代码行数:90,代码来源:vector.c
示例9: main
//.........这里部分代码省略.........
u_d = clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, &status);
chk(status,"clCreatebuffer");
f_d = clCreateBuffer(context, CL_MEM_READ_WRITE, size1, NULL, &status);
chk(status, "clCreatebuffer");
// perform computing on GPU
// copy data from host to device
status = clEnqueueWriteBuffer(cmdQueue, u_d, CL_FALSE, 0, size, u_h, 0, NULL, NULL);
chk(status,"ClEnqueueWriteBuffer");
status = clEnqueueWriteBuffer(cmdQueue, f_d, CL_FALSE, 0, size1, f_h, 0, NULL, NULL);
chk(status, "clEnqueueWriteBuffer");
// create program with source code
cl_program program = clCreateProgramWithSource(context,1,(const char**)&programSource, NULL, &status);
chk(status, "clCreateProgramWithSource");
// Compile program for the device
status = clBuildProgram(program, numDevices, devices, NULL, NULL,NULL);
// chk(status, "ClBuildProgram");
if(status != CL_SUCCESS){
printf("clBuildProgram failed (%d) \n", status);
size_t log_size;
clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
char *log = (char *) malloc(log_size);
clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
printf("%s\n", log);
exit(-1);
}
printf("successfully built program \n");
// Create lattice-boltzman kernel
cl_kernel kernel, kernel1;
kernel = clCreateKernel(program, "lbiteration", &status);
kernel1 = clCreateKernel(program, "Denrho", &status);
chk(status, "clCreateKernel");
printf("successfully create kernel \n");
// Associate the input and output buffers with the kernel
status = clSetKernelArg(kernel,0, sizeof(cl_mem), &f_d);
status |= clSetKernelArg(kernel1,0, sizeof(cl_mem), &u_d);
status |= clSetKernelArg(kernel1,1, sizeof(cl_mem), &f_d);
status |= clSetKernelArg(kernel, 1, sizeof(int), &ArraySizeX);
status |= clSetKernelArg(kernel1,2, sizeof(int), &ArraySizeX);
status |= clSetKernelArg(kernel, 2, sizeof(int), &ArraySizeY);
status |= clSetKernelArg(kernel1,3, sizeof(int),&ArraySizeY);
chk(status, "clSerKernelArg");
// set the work dimensions
size_t localworksize[2] = {BLOCK_SIZE_X,BLOCK_SIZE_Y};
int nBLOCKSX = (ArraySizeX-2)/(BLOCK_SIZE_X -2);
int nBLOCKSY = (ArraySizeY-2)/(BLOCK_SIZE_Y -2);
size_t globalworksize[2] = {nBLOCKSX*BLOCK_SIZE_X,nBLOCKSY*BLOCK_SIZE_Y};
// loop the kernel
for( nsteps = 0; nsteps < 100; nsteps++){
status = clEnqueueNDRangeKernel(cmdQueue, kernel, 2, NULL, globalworksize,localworksize,0,NULL,&event);
clWaitForEvents(1 , &event);
clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START,
sizeof(time_start), &time_start, NULL);
clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
sizeof(time_end), &time_end, NULL);
total_time += time_end - time_start;
}
printf("Good so far \n");
status = clEnqueueNDRangeKernel(cmdQueue, kernel1, 2, NULL, globalworksize,localworksize,0,NULL,&event);
开发者ID:hietwll,项目名称:parallel-lattice-Boltzmann,代码行数:67,代码来源:Lattice_BoltzmannOpenCL.c
示例10: setup_opencl
//---------------------------------------------------------------------
// Set up the OpenCL environment.
//---------------------------------------------------------------------
void setup_opencl(int argc, char *argv[])
{
cl_int err_code;
char *source_dir = "EP";
if (argc > 1) source_dir = argv[1];
#ifdef TIMER_DETAIL
if (timers_enabled) {
int i;
for (i = T_OPENCL_API; i < T_END; i++) timer_clear(i);
}
#endif
DTIMER_START(T_OPENCL_API);
// 1. Find the default device type and get a device for the device type
device_type = clu_GetDefaultDeviceType();
device = clu_GetAvailableDevice(device_type);
device_name = clu_GetDeviceName(device);
// 2. Create a context for the specified device
context = clCreateContext(NULL, 1, &device, NULL, NULL, &err_code);
clu_CheckError(err_code, "clCreateContext()");
// 3. Create a command queue
cmd_queue = clCreateCommandQueue(context, device, 0, &err_code);
clu_CheckError(err_code, "clCreateCommandQueue()");
DTIMER_STOP(T_OPENCL_API);
// 4. Build the program
DTIMER_START(T_BUILD);
char *source_file;
char build_option[30];
sprintf(build_option, "-DM=%d -I.", M);
if (device_type == CL_DEVICE_TYPE_CPU) {
source_file = "ep_cpu.cl";
GROUP_SIZE = 16;
} else {
source_file = "ep_gpu.cl";
GROUP_SIZE = 64;
}
program = clu_MakeProgram(context, device, source_dir, source_file,
build_option);
DTIMER_STOP(T_BUILD);
// 5. Create buffers
DTIMER_START(T_BUFFER_CREATE);
gq_size = np / GROUP_SIZE * NQ * sizeof(double);
gsx_size = np / GROUP_SIZE * sizeof(double);
gsy_size = np / GROUP_SIZE * sizeof(double);
pgq = clCreateBuffer(context, CL_MEM_READ_WRITE, gq_size, NULL, &err_code);
clu_CheckError(err_code, "clCreateBuffer() for pgq");
pgsx = clCreateBuffer(context, CL_MEM_READ_WRITE, gsx_size,NULL, &err_code);
clu_CheckError(err_code, "clCreateBuffer() for pgsx");
pgsy = clCreateBuffer(context, CL_MEM_READ_WRITE, gsy_size,NULL, &err_code);
clu_CheckError(err_code, "clCreateBuffer() for pgsy");
DTIMER_STOP(T_BUFFER_CREATE);
// 6. Create a kernel
DTIMER_START(T_OPENCL_API);
kernel = clCreateKernel(program, "embar", &err_code);
clu_CheckError(err_code, "clCreateKernel()");
DTIMER_STOP(T_OPENCL_API);
}
开发者ID:NatTuck,项目名称:cakemark,代码行数:73,代码来源:ep.c
示例11: main
int
main(int argc, char** argv)
{
srand(1000);
int i;
unsigned int size_A = WA * HA;
unsigned int mem_size_A = sizeof(float) * size_A;
float* h_A = (float*) malloc(mem_size_A);
unsigned int size_B = WB * HB;
unsigned int mem_size_B = sizeof(float) * size_B;
float* h_B = (float*) malloc(mem_size_B);
randomInit(h_A, size_A);
randomInit(h_B, size_B);
unsigned int size_C = WC * HC;
unsigned int mem_size_C = sizeof(float) * size_C;
float* h_C = (float*) malloc(mem_size_C);
cl_context clGPUContext;
cl_command_queue clCommandQue;
cl_program clProgram;
cl_kernel clKernel;
cl_event mm;
size_t dataBytes;
size_t kernelLength;
cl_int errcode;
cl_mem d_A;
cl_mem d_B;
cl_mem d_C;
clGPUContext = clCreateContextFromType(0,
CL_DEVICE_TYPE_GPU,
NULL, NULL, &errcode);
errcode = clGetContextInfo(clGPUContext,
CL_CONTEXT_DEVICES, 0, NULL,
&dataBytes);
cl_device_id *clDevices = (cl_device_id *)
malloc(dataBytes);
errcode |= clGetContextInfo(clGPUContext,
CL_CONTEXT_DEVICES, dataBytes,
clDevices, NULL);
clCommandQue = clCreateCommandQueue(clGPUContext,
clDevices[0], CL_QUEUE_PROFILING_ENABLE, &errcode);
d_C = clCreateBuffer(clGPUContext,
CL_MEM_READ_WRITE,
mem_size_A, NULL, &errcode);
d_A = clCreateBuffer(clGPUContext,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
mem_size_A, h_A, &errcode);
d_B = clCreateBuffer(clGPUContext,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
mem_size_B, h_B, &errcode);
FILE* fp = fopen("hw2.cl", "r");
fseek (fp , 0 , SEEK_END);
const size_t lSize = ftell(fp);
rewind(fp);
unsigned char* buffer;
buffer = (unsigned char*) malloc (lSize);
fread(buffer, 1, lSize, fp);
fclose(fp);
cl_int status;
clProgram = clCreateProgramWithBinary(clGPUContext,
1, (const cl_device_id *)clDevices,
&lSize, (const unsigned char**)&buffer,
&status, &errcode);
errcode = clBuildProgram(clProgram, 0, NULL, NULL,
NULL, NULL);
errcode = clBuildProgram(clProgram, 0,
NULL, NULL, NULL, NULL);
clKernel = clCreateKernel(clProgram,
"MM", &errcode);
//.........这里部分代码省略.........
开发者ID:hemantjp,项目名称:HW2,代码行数:101,代码来源:hw2.c
示例12: clCreateKernel
// Create the kernel
cl_kernel bones_kernel_<algorithm_name>_0 = clCreateKernel(bones_program, "bones_kernel_<algorithm_name>_0", &bones_errors); error_check(bones_errors);
// Set all the arguments to the kernel function
int bones_num_args = 0;
<kernel_argument_list>
// Start the kernel
size_t bones_global_worksize[] = {<parallelism>};
bones_errors = clEnqueueNDRangeKernel(bones_queue,bones_kernel_<algorithm_name>_0,1,NULL,bones_global_worksize,NULL,0,NULL,&bones_event); error_check(bones_errors);
// Synchronize and clean-up the kernel
clFinish(bones_queue);
clReleaseKernel(bones_kernel_<algorithm_name>_0);
开发者ID:gjvdbraak,项目名称:bones,代码行数:13,代码来源:default.host.c
示例13: crackMD5
int crackMD5(char *hash, char *cs, int passlen) {
clut_device dev; // device struct
cl_event evt; // performance measurement event
cl_kernel kernel; // execution kernel
cl_int ret; // error code
double td;
int cs_len, sync_flag;
long chunk, disp;
unsigned char bin_hash[HASH_SIZE];
cs_len = strlen(cs);
sync_flag = 0;
strToBin(hash, bin_hash, 2*HASH_SIZE);
disp = DISPOSITIONS(cs_len, passlen);
chunk = DISP_PER_CORE(disp, AVAILABLE_THREADS);
debug("HOST", "Numero di disposizione da calcolare per stream processing unit = %lu\n", chunk);
clut_open_device(&dev, PATH_TO_KERNEL);
clut_print_device_info(&dev);
/* ----------------------------------------- Create execution kernel ----------------------------------------- */
kernel = clCreateKernel(dev.program, KERNEL_NAME, &ret);
clut_check_err(ret, "Fallita la creazione del kernel");
/* ----------------------------------- Create memory buffers on the device ----------------------------------- */
cl_mem dchunk = clCreateBuffer(dev.context, CL_MEM_READ_WRITE, sizeof(long), NULL, &ret);
if (ret)
clut_panic(ret, "Fallita l'allocazione della memoria sul device per la memorizzazione del chunk");
cl_mem dhash = clCreateBuffer(dev.context, CL_MEM_READ_ONLY, HASH_SIZE * sizeof(unsigned char), NULL, &ret);
if (ret)
clut_panic(ret, "Fallita l'allocazione della memoria sul device per la memorizzazione dell'hash");
cl_mem charset = clCreateBuffer(dev.context, CL_MEM_READ_ONLY, cs_len * sizeof(char), NULL, &ret);
if (ret)
clut_panic(ret, "Fallita l'allocazione della memoria sul device per la memorizzazione del charset");
cl_mem charset_size = clCreateBuffer(dev.context, CL_MEM_READ_ONLY, sizeof(int), NULL, &ret);
if (ret)
clut_panic(ret, "Fallita l'allocazione della memoria sul device per la memorizzazione della taglia del charset");
cl_mem dpasslen = clCreateBuffer(dev.context, CL_MEM_READ_ONLY, sizeof(int), NULL, &ret);
if (ret)
clut_panic(ret, "Fallita l'allocazione della memoria sul device per la memorizzazione della taglia del charset");
//cl_mem sync = clCreateBuffer(dev.context, CL_MEM_READ_WRITE, AVAILABLE_CORES * sizeof(int), NULL, &ret);
cl_mem sync = clCreateBuffer(dev.context, CL_MEM_READ_WRITE, sizeof(int), NULL, &ret);
if (ret)
clut_panic(ret, "Fallita l'allocazione della memoria sul device per la memorizzazione del flag di sync");
cl_mem dcracked = clCreateBuffer(dev.context, CL_MEM_READ_WRITE, HASH_SIZE, NULL, &ret);
if (ret)
clut_panic(ret, "Fallita l'allocazione della memoria sul device per la memorizzazione della password in chiaro");
cl_mem computed_hash = clCreateBuffer(dev.context, CL_MEM_READ_WRITE, HASH_SIZE * sizeof(unsigned char), NULL, &ret);
if (ret)
clut_panic(ret, "Fallita l'allocazione della memoria sul device per la memorizzazione della password in chiaro");
/* ----------------------------------- Write memory buffers on the device ------------------------------------ */
ret = clEnqueueWriteBuffer(dev.queue, dchunk, CL_TRUE, 0, sizeof(long), &chunk, 0, NULL, NULL);
if(ret)
clut_panic(ret, "Fallita la scrittura del chunk sul buffer di memoria del device");
ret = clEnqueueWriteBuffer(dev.queue, dhash, CL_TRUE, 0, HASH_SIZE * sizeof(unsigned char), (int *)bin_hash, 0, NULL, NULL);
if(ret)
clut_panic(ret, "Fallita la scrittura dell'hash sul buffer di memoria del device");
ret = clEnqueueWriteBuffer(dev.queue, charset, CL_TRUE, 0, cs_len * sizeof(char), cs, 0, NULL, NULL);
if(ret)
clut_panic(ret, "Fallita la scrittura de
|
请发表评论