本文整理汇总了C++中clReleaseContext函数的典型用法代码示例。如果您正苦于以下问题:C++ clReleaseContext函数的具体用法?C++ clReleaseContext怎么用?C++ clReleaseContext使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了clReleaseContext函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: main
//.........这里部分代码省略.........
err = clEnqueueNDRangeKernel(commands, kernel_compute_step_factor, 1, NULL, &global_size, NULL, 0, NULL, &ocdTempEvent);
clFinish(commands);
START_TIMER(ocdTempEvent, OCD_TIMER_KERNEL, "CFD Step Factor Kernel", ocdTempTimer)
END_TIMER(ocdTempTimer)
CHKERR(err, "Failed to execute kernel[kernel_compute_step_factor]!");
for(int j = 0; j < RK; j++)
{
err = 0;
err = clSetKernelArg(kernel_compute_flux_contributions, 0, sizeof(int), &nelr);
err |= clSetKernelArg(kernel_compute_flux_contributions, 1, sizeof(cl_mem),&variables);
err |= clSetKernelArg(kernel_compute_flux_contributions, 2, sizeof(cl_mem), &fc_momentum_x);
err |= clSetKernelArg(kernel_compute_flux_contributions, 3, sizeof(cl_mem), &fc_momentum_y);
err |= clSetKernelArg(kernel_compute_flux_contributions, 4, sizeof(cl_mem), &fc_momentum_z);
err |= clSetKernelArg(kernel_compute_flux_contributions, 5, sizeof(cl_mem), &fc_density_energy);
CHKERR(err, "Failed to set kernel arguments!");
// Get the maximum work group size for executing the kernel on the device
err = clGetKernelWorkGroupInfo(kernel_compute_flux_contributions, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), (void *) &local_size, NULL);
CHKERR(err, "Failed to retrieve kernel_compute_flux_contributions work group info!");
err = clEnqueueNDRangeKernel(commands, kernel_compute_flux_contributions, 1, NULL, &global_size, NULL, 0, NULL, &ocdTempEvent);
clFinish(commands);
START_TIMER(ocdTempEvent, OCD_TIMER_KERNEL, "CFD Flux Contribution Kernel", ocdTempTimer)
//compute_flux_contributions(nelr, variables, fc_momentum_x, fc_momentum_y, fc_momentum_z, fc_density_energy);
END_TIMER(ocdTempTimer)
CHKERR(err, "Failed to execute kernel [kernel_compute_flux_contributions]!");
err = 0;
err = clSetKernelArg(kernel_compute_flux, 0, sizeof(int), &nelr);
err |= clSetKernelArg(kernel_compute_flux, 1, sizeof(cl_mem), &elements_surrounding_elements);
err |= clSetKernelArg(kernel_compute_flux, 2, sizeof(cl_mem), &normals);
err |= clSetKernelArg(kernel_compute_flux, 3, sizeof(cl_mem), &variables);
err |= clSetKernelArg(kernel_compute_flux, 4, sizeof(cl_mem), &fc_momentum_x);
err |= clSetKernelArg(kernel_compute_flux, 5, sizeof(cl_mem), &fc_momentum_y);
err |= clSetKernelArg(kernel_compute_flux, 6, sizeof(cl_mem), &fc_momentum_z);
err |= clSetKernelArg(kernel_compute_flux, 7, sizeof(cl_mem), &fc_density_energy);
err |= clSetKernelArg(kernel_compute_flux, 8, sizeof(cl_mem), &fluxes);
err |= clSetKernelArg(kernel_compute_flux, 9, sizeof(cl_mem), &ff_variable);
err |= clSetKernelArg(kernel_compute_flux, 10, sizeof(cl_mem), &ff_fc_momentum_x);
err |= clSetKernelArg(kernel_compute_flux, 11, sizeof(cl_mem), &ff_fc_momentum_y);
err |= clSetKernelArg(kernel_compute_flux, 12, sizeof(cl_mem), &ff_fc_momentum_z);
err |= clSetKernelArg(kernel_compute_flux, 13, sizeof(cl_mem), &ff_fc_density_energy);
CHKERR(err, "Failed to set kernel arguments!");
// Get the maximum work group size for executing the kernel on the device
err = clGetKernelWorkGroupInfo(kernel_compute_flux, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), (void *) &local_size, NULL);
CHKERR(err, "Failed to retrieve kernel_compute_flux work group info!");
err = clEnqueueNDRangeKernel(commands, kernel_compute_flux, 1, NULL, &global_size, NULL, 0, NULL, &ocdTempEvent);
clFinish(commands);
START_TIMER(ocdTempEvent, OCD_TIMER_KERNEL, "CFD Flux Kernel", ocdTempTimer)
END_TIMER(ocdTempTimer)
CHKERR(err, "Failed to execute kernel [kernel_compute_flux]!");
err = 0;
err = clSetKernelArg(kernel_time_step, 0, sizeof(int), &j);
err |= clSetKernelArg(kernel_time_step, 1, sizeof(int), &nelr);
err |= clSetKernelArg(kernel_time_step, 2, sizeof(cl_mem), &old_variables);
err |= clSetKernelArg(kernel_time_step, 3, sizeof(cl_mem), &variables);
err |= clSetKernelArg(kernel_time_step, 4, sizeof(cl_mem), &step_factors);
err |= clSetKernelArg(kernel_time_step, 5, sizeof(cl_mem), &fluxes);
CHKERR(err, "Failed to set kernel arguments!");
// Get the maximum work group size for executing the kernel on the device
err = clGetKernelWorkGroupInfo(kernel_time_step, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), (void *) &local_size, NULL);
CHKERR(err, "Failed to retrieve kernel_time_step work group info!");
err = clEnqueueNDRangeKernel(commands, kernel_time_step, 1, NULL, &global_size, NULL, 0, NULL, &ocdTempEvent);
clFinish(commands);
START_TIMER(ocdTempEvent, OCD_TIMER_KERNEL, "CFD Time Step Kernel", ocdTempTimer)
END_TIMER(ocdTempTimer)
CHKERR(err, "Failed to execute kernel [kernel_time_step]!");
}
}
clFinish(commands);
std::cout << "Finished" << std::endl;
std::cout << "Saving solution..." << std::endl;
dump(commands, variables, nel, nelr);
std::cout << "Saved solution..." << std::endl;
std::cout << "Cleaning up..." << std::endl;
clReleaseProgram(program);
clReleaseKernel(kernel_compute_flux);
clReleaseKernel(kernel_compute_flux_contributions);
clReleaseKernel(kernel_compute_step_factor);
clReleaseKernel(kernel_time_step);
clReleaseKernel(kernel_initialize_variables);
clReleaseCommandQueue(commands);
clReleaseContext(context);
dealloc<float>(areas);
dealloc<int>(elements_surrounding_elements);
dealloc<float>(normals);
dealloc<float>(variables);
dealloc<float>(old_variables);
dealloc<float>(fluxes);
dealloc<float>(step_factors);
dealloc<float>(fc_momentum_x);
dealloc<float>(fc_momentum_y);
dealloc<float>(fc_momentum_z);
dealloc<float>(fc_density_energy);
std::cout << "Done..." << std::endl;
ocd_finalize();
return 0;
}
开发者ID:CharudattaSChitale,项目名称:OpenDwarfs,代码行数:101,代码来源:cfd.cpp
示例2: main
//.........这里部分代码省略.........
ret = clSetKernelArg(nonlinearpart, 3, sizeof(cl_mem),(void* )&cl_v[1]);
ret = clSetKernelArg(nonlinearpart, 4, sizeof(float),(void* )&dt);
ret = clSetKernelArg(nonlinearpart, 5, sizeof(float),(void* )&a[0]);
ret = clSetKernelArg(nonlinearpart, 6, sizeof(float),(void* )&a[1]);
ret = clEnqueueNDRangeKernel(command_queue, nonlinearpart, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
ret = clFinish(command_queue);
// linear part
ret = clfftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &command_queue, 0, NULL, NULL,cl_u, cl_uhat, tmpBufferu);
ret = clfftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &command_queue, 0, NULL, NULL,cl_v, cl_vhat, tmpBufferv);
ret = clFinish(command_queue);
ret = clSetKernelArg(linearpart, 0, sizeof(cl_mem),(void *)&cl_uhat[0]);
ret = clSetKernelArg(linearpart, 1, sizeof(cl_mem),(void *)&cl_uhat[1]);
ret = clSetKernelArg(linearpart, 2, sizeof(cl_mem),(void *)&cl_vhat[0]);
ret = clSetKernelArg(linearpart, 3, sizeof(cl_mem),(void *)&cl_vhat[1]);
ret = clSetKernelArg(linearpart, 4, sizeof(cl_mem),(void* )&cl_kx);
ret = clSetKernelArg(linearpart, 5, sizeof(cl_mem),(void* )&cl_ky);
ret = clSetKernelArg(linearpart, 6, sizeof(cl_mem),(void* )&cl_kz);
ret = clSetKernelArg(linearpart, 7, sizeof(float),(void* )&dt);
ret = clSetKernelArg(linearpart, 8, sizeof(float),(void* )&Du);
ret = clSetKernelArg(linearpart, 9, sizeof(float),(void* )&Dv);
ret = clSetKernelArg(linearpart, 10, sizeof(float),(void* )&A);
ret = clSetKernelArg(linearpart, 11, sizeof(float),(void* )&B);
ret = clSetKernelArg(linearpart, 12, sizeof(float),(void* )&b[0]);
ret = clSetKernelArg(linearpart, 13, sizeof(float),(void* )&b[1]);
ret = clSetKernelArg(linearpart, 14, sizeof(int),(void* )&Nx);
ret = clSetKernelArg(linearpart, 15, sizeof(int),(void* )&Ny);
ret = clSetKernelArg(linearpart, 16, sizeof(int),(void* )&Nz);
ret = clEnqueueNDRangeKernel(command_queue, linearpart, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
ret = clFinish(command_queue);
ret = clfftEnqueueTransform(planHandle, CLFFT_BACKWARD, 1, &command_queue, 0, NULL, NULL,cl_uhat, cl_u, tmpBufferu);
ret = clfftEnqueueTransform(planHandle, CLFFT_BACKWARD, 1, &command_queue, 0, NULL, NULL,cl_vhat, cl_v, tmpBufferv);
ret = clFinish(command_queue);
// done
if(n==plottime){
printf("time:%f, step:%d,%d\n",n*dt,n,plotnum);
plottime=plottime+plotgap;
plotnum=plotnum+1;
ret = clEnqueueReadBuffer(command_queue, cl_u[0], CL_TRUE, 0, N * sizeof(float), u[0], 0, NULL, NULL);
ret = clEnqueueReadBuffer(command_queue, cl_v[0], CL_TRUE, 0, N * sizeof(float), v[0], 0, NULL, NULL);
ret = clFinish(command_queue);
//output of data U
char tmp_str[10];
strcpy(nameconfig,"./data/u");
sprintf(tmp_str,"%d",10000000+plotnum);
strcat(nameconfig,tmp_str);
strcat(nameconfig,".datbin");
fp=fopen(nameconfig,"wb");
if (!fp) {fprintf(stderr, "Failed to write u-data.\n"); exit(1); }
for(i=0;i<N;i++){fwrite(&u[0][i], sizeof(float), 1, fp);}
fclose( fp );
//V
strcpy(nameconfig,"./data/v");
sprintf(tmp_str,"%d",10000000+plotnum);
strcat(nameconfig,tmp_str);
strcat(nameconfig,".datbin");
fp=fopen(nameconfig,"wb");
if (!fp) {fprintf(stderr, "Failed to write v-data.\n"); exit(1); }
for(i=0;i<N;i++){fwrite(&v[0][i], sizeof(float), 1, fp);}
fclose( fp );
}
}
gettimeofday(&tve, NULL);
printf("Finished time stepping\n");
elapsedTime = (tve.tv_sec - tvs.tv_sec) * 1000.0; // sec to ms
elapsedTime += (tve.tv_usec - tvs.tv_usec) / 1000.0; // us to ms
printf("%f,",elapsedTime);
clReleaseMemObject(cl_u[0]);
clReleaseMemObject(cl_u[1]);
clReleaseMemObject(cl_v[0]);
clReleaseMemObject(cl_v[1]);
clReleaseMemObject(cl_uhat[0]);
clReleaseMemObject(cl_uhat[1]);
clReleaseMemObject(cl_vhat[0]);
clReleaseMemObject(cl_vhat[1]);
clReleaseMemObject(cl_kx);
clReleaseMemObject(cl_ky);
clReleaseMemObject(cl_kz);
ret = clReleaseKernel(frequencies); ret = clReleaseProgram(p_frequencies);
ret = clReleaseKernel(linearpart); ret = clReleaseProgram(p_linearpart);
ret = clReleaseKernel(nonlinearpart); ret = clReleaseProgram(p_nonlinearpart);
free(u[0]);
free(v[0]);
clReleaseMemObject(tmpBufferu);
clReleaseMemObject(tmpBufferv);
/* Release the plan. */
ret = clfftDestroyPlan(&planHandle);
/* Release clFFT library. */
clfftTeardown();
ret = clReleaseCommandQueue(command_queue);
ret = clReleaseContext(context);
printf("Program execution complete\n");
return 0;
}
开发者ID:MichaelQuell,项目名称:GrayScott-OpenCl,代码行数:101,代码来源:grayscottOpenCLs.c
示例3: main
//.........这里部分代码省略.........
cl_mem dst_device_buffer;
dst_device_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, num_elem *sizeof(cl_float16), NULL, &ret);
if (ret != CL_SUCCESS)
{
printf("error: could not create dst buffer\n");
exit(1);
}
/* Set kernel arguments */
ret = CL_SUCCESS;
ret |= clSetKernelArg(kernel, 0, sizeof(cl_mem), &src_0_device_buffer);
ret |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &dst_device_buffer);
if (ret != CL_SUCCESS)
{
printf("error: call to 'clSetKernelArg' failed\n");
exit(1);
}
/* Launch the kernel */
size_t global_work_size = num_elem;
size_t local_work_size = num_elem;
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL);
if (ret != CL_SUCCESS)
{
printf("error: call to 'clEnqueueNDRangeKernel' failed\n");
exit(1);
}
/* Wait for it to finish */
clFinish(command_queue);
/* Read results from GPU */
ret = clEnqueueReadBuffer(command_queue, dst_device_buffer, CL_TRUE,0, num_elem * sizeof(cl_float16), dst_host_buffer, 0, NULL, NULL);
if (ret != CL_SUCCESS)
{
printf("error: call to 'clEnqueueReadBuffer' failed\n");
exit(1);
}
/* Dump dst buffer to file */
char dump_file[100];
sprintf((char *)&dump_file, "%s.result", argv[0]);
write_buffer(dump_file, (const char *)dst_host_buffer, num_elem * sizeof(cl_float16));
printf("Result dumped to %s\n", dump_file);
/* Free host dst buffer */
free(dst_host_buffer);
/* Free device dst buffer */
ret = clReleaseMemObject(dst_device_buffer);
if (ret != CL_SUCCESS)
{
printf("error: call to 'clReleaseMemObject' failed\n");
exit(1);
}
/* Free host side src buffer 0 */
free(src_0_host_buffer);
/* Free device side src buffer 0 */
ret = clReleaseMemObject(src_0_device_buffer);
if (ret != CL_SUCCESS)
{
printf("error: call to 'clReleaseMemObject' failed\n");
exit(1);
}
/* Release kernel */
ret = clReleaseKernel(kernel);
if (ret != CL_SUCCESS)
{
printf("error: call to 'clReleaseKernel' failed\n");
exit(1);
}
/* Release program */
ret = clReleaseProgram(program);
if (ret != CL_SUCCESS)
{
printf("error: call to 'clReleaseProgram' failed\n");
exit(1);
}
/* Release command queue */
ret = clReleaseCommandQueue(command_queue);
if (ret != CL_SUCCESS)
{
printf("error: call to 'clReleaseCommandQueue' failed\n");
exit(1);
}
/* Release context */
ret = clReleaseContext(context);
if (ret != CL_SUCCESS)
{
printf("error: call to 'clReleaseContext' failed\n");
exit(1);
}
return 0;
}
开发者ID:xianggong,项目名称:m2c_unit_test,代码行数:101,代码来源:sign_float16_bin.c
示例4: main
//.........这里部分代码省略.........
d_C = clCreateBuffer(clGPUContext,
CL_MEM_READ_WRITE,
mem_size_A, NULL, &errcode);
d_A = clCreateBuffer(clGPUContext,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
mem_size_A, h_A, &errcode);
d_B = clCreateBuffer(clGPUContext,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
mem_size_B, h_B, &errcode);
FILE* fp = fopen("hw2.cl", "r");
fseek (fp , 0 , SEEK_END);
const size_t lSize = ftell(fp);
rewind(fp);
unsigned char* buffer;
buffer = (unsigned char*) malloc (lSize);
fread(buffer, 1, lSize, fp);
fclose(fp);
cl_int status;
clProgram = clCreateProgramWithBinary(clGPUContext,
1, (const cl_device_id *)clDevices,
&lSize, (const unsigned char**)&buffer,
&status, &errcode);
errcode = clBuildProgram(clProgram, 0, NULL, NULL,
NULL, NULL);
errcode = clBuildProgram(clProgram, 0,
NULL, NULL, NULL, NULL);
clKernel = clCreateKernel(clProgram,
"MM", &errcode);
size_t globalWorkSize[2];
int wA = WA;
int wC = WC;
errcode = clSetKernelArg(clKernel, 0,
sizeof(cl_mem), (void *)&d_C);
errcode |= clSetKernelArg(clKernel, 1,
sizeof(cl_mem), (void *)&d_A);
errcode |= clSetKernelArg(clKernel, 2,
sizeof(cl_mem), (void *)&d_B);
errcode |= clSetKernelArg(clKernel, 3,
sizeof(int), (void *)&wA);
errcode |= clSetKernelArg(clKernel, 4,
sizeof(int), (void *)&wC);
globalWorkSize[0] = 16;
globalWorkSize[1] = 16;
cl_ulong time_start, time_end, total_time = 0;
errcode = clEnqueueNDRangeKernel(clCommandQue,
clKernel, 2, NULL, globalWorkSize,
NULL, 0, NULL, &mm);
printf("Average time = %lu\n");
clFinish(clCommandQue);
clGetEventProfilingInfo(mm, CL_PROFILING_COMMAND_START,
sizeof(time_start), &time_start, NULL);
clGetEventProfilingInfo(mm, CL_PROFILING_COMMAND_END,
sizeof(time_end), &time_end, NULL);
total_time += time_end - time_start;
printf("Average time = %lu\n", total_time);
errcode = clEnqueueReadBuffer(clCommandQue,
d_C, CL_TRUE, 0, mem_size_C,
h_C, 0, NULL, NULL);
free(h_A);
free(h_B);
free(h_C);
clReleaseMemObject(d_A);
clReleaseMemObject(d_C);
clReleaseMemObject(d_B);
free(clDevices);
clReleaseContext(clGPUContext);
clReleaseKernel(clKernel);
clReleaseProgram(clProgram);
clReleaseCommandQueue(clCommandQue);
}
开发者ID:hemantjp,项目名称:HW2,代码行数:101,代码来源:hw2.c
示例5: clReleaseCommandQueue
Context::~Context()
{
clReleaseCommandQueue(queue);
clReleaseContext(context);
}
开发者ID:hduregger,项目名称:crowd,代码行数:5,代码来源:Context.cpp
示例6: main
//.........这里部分代码省略.........
cl_context context = NULL;
cl_command_queue command_queue = NULL;
cl_program program = NULL;
cl_kernel kernel_mt = NULL, kernel_pi = NULL;
size_t kernel_code_size;
char *kernel_src_str;
cl_uint *result;
cl_int ret;
FILE *fp;
cl_mem rand, count;
size_t global_item_size[3], local_item_size[3];
cl_mem dev_mts;
cl_event ev_mt_end, ev_pi_end, ev_copy_end;
cl_ulong prof_start, prof_mt_end, prof_pi_end, prof_copy_end;
clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id,
&ret_num_devices);
context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
result = (cl_uint*)malloc(sizeof(cl_uint)*num_generator);
command_queue = clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &ret);
fp = fopen("mt.cl", "r");
kernel_src_str = (char*)malloc(MAX_SOURCE_SIZE);
kernel_code_size = fread(kernel_src_str, 1, MAX_SOURCE_SIZE, fp);
fclose(fp);
/* Create output buffer */
rand = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_uint)*num_rand*num_generator, NULL, &ret);
count = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_uint)*num_generator, NULL, &ret);
/* Build Program*/
program = clCreateProgramWithSource(context, 1, (const char **)&kernel_src_str,
(const size_t *)&kernel_code_size, &ret);
clBuildProgram(program, 1, &device_id, "", NULL, NULL);
kernel_mt = clCreateKernel(program, "genrand", &ret);
kernel_pi = clCreateKernel(program, "calc_pi", &ret);
/* Create input parameter */
dev_mts = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(mts), NULL, &ret);
clEnqueueWriteBuffer(command_queue, dev_mts, CL_TRUE, 0, sizeof(mts), mts, 0, NULL, NULL);
/* Set Kernel Arguments */
clSetKernelArg(kernel_mt, 0, sizeof(cl_mem), (void*)&rand); /* Random numbers (output of genrand) */
clSetKernelArg(kernel_mt, 1, sizeof(cl_mem), (void*)&dev_mts); /* MT parameter (input to genrand) */
clSetKernelArg(kernel_mt, 2, sizeof(num_rand), &num_rand); /* Number of random numbers to generate */
clSetKernelArg(kernel_pi, 0, sizeof(cl_mem), (void*)&count); /* Counter for points within circle (output of calc_pi) */
clSetKernelArg(kernel_pi, 1, sizeof(cl_mem), (void*)&rand); /* Random numbers (input to calc_pi) */
clSetKernelArg(kernel_pi, 2, sizeof(num_rand), &num_rand); /* Number of random numbers used */
global_item_size[0] = num_generator; global_item_size[1] = 1; global_item_size[2] = 1;
local_item_size[0] = num_generator; local_item_size[1] = 1; local_item_size[2] = 1;
/* Create a random number array */
clEnqueueNDRangeKernel(command_queue, kernel_mt, 1, NULL, global_item_size, local_item_size, 0, NULL, &ev_mt_end);
/* Compute PI */
clEnqueueNDRangeKernel(command_queue, kernel_pi, 1, NULL, global_item_size, local_item_size, 0, NULL, &ev_pi_end);
/* Get result */
clEnqueueReadBuffer(command_queue, count, CL_TRUE, 0, sizeof(cl_uint)*num_generator, result, 0, NULL, &ev_copy_end);
/* Average the values of PI */
count_all = 0;
for (i=0; i < num_generator; i++) {
count_all += result[i];
}
pi = ((double)count_all)/(num_rand * num_generator) * 4;
printf("pi = %f\n", pi);
/* Get execution time info */
clGetEventProfilingInfo(ev_mt_end, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &prof_start, NULL);
clGetEventProfilingInfo(ev_mt_end, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &prof_mt_end, NULL);
clGetEventProfilingInfo(ev_pi_end, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &prof_pi_end, NULL);
clGetEventProfilingInfo(ev_copy_end, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &prof_copy_end, NULL);
printf(" mt: %f[ms]\n"
" pi: %f[ms]\n"
" copy: %f[ms]\n",
(prof_mt_end - prof_start)/(1000000.0),
(prof_pi_end - prof_mt_end)/(1000000.0),
(prof_copy_end - prof_pi_end)/(1000000.0));
clReleaseEvent(ev_mt_end);
clReleaseEvent(ev_pi_end);
clReleaseEvent(ev_copy_end);
clReleaseMemObject(rand);
clReleaseMemObject(count);
clReleaseKernel(kernel_mt);
clReleaseKernel(kernel_pi);
clReleaseProgram(program);
clReleaseCommandQueue(command_queue);
clReleaseContext(context);
free(kernel_src_str);
free(result);
return 0;
}
开发者ID:manug2,项目名称:manug2-repo,代码行数:101,代码来源:mt.c
示例7: main
//.........这里部分代码省略.........
NULL, &status);
// Create a buffer object that will contain the data
// from the host array B
cl_mem imgBuf;
imgBuf = clCreateBuffer(context, CL_MEM_READ_ONLY, datasize,
NULL, &status);
// Create a buffer object that will hold the output data
cl_mem dataDimensionsBuf;
dataDimensionsBuf = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 3 * sizeof(int),
NULL, &status);
// Create a buffer object that will hold the output data
cl_mem curvBuf;
curBuf = clCreateBuffer(context, CL_MEM_WRITE_ONLY, datasize,
NULL, &status);
// Create a buffer object that will hold the output data
cl_mem phiBuf;
phiBuf = clCreateBuffer(context, CL_MEM_WRITE_ONLY, datasize,
NULL, &status);
status = clEnqueueWriteBuffer(cmdQueue, imgBuf, CL_FALSE,
0, datasize, img, 0, NULL, NULL);
status = clEnqueueWriteBuffer(cmdQueue, dataDimensionsBuf, CL_FALSE,
0, 3 * sizeof(int), dataDimensions, 0, NULL, NULL);
status = clEnqueueWriteBuffer(cmdQueue, curvBuf, CL_FALSE,
0, datasize, curv, 0, NULL, NULL);
// Write input array A to the device buffer bufferA
status = clEnqueueWriteBuffer(cmdQueue, phiBuf, CL_FALSE,
0, datasize, phi, 0, NULL, NULL);
// Create a program with source code
cl_program program = clCreateProgramWithSource(context, 1,
(const char**)&programSource, NULL, &status);
// Build (compile) the program for the device
status = clBuildProgram(program, numDevices, devices,
NULL, NULL, NULL);
// Create the vector addition kernel
cl_kernel kernel;
kernel = clCreateKernel(program, "segmentation", &status);
// Associate the input and output buffers with the kernel
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &contourBuf);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &imgBuf);
status = clSetKernelArg(kernel, 2, sizeof(cl_mem), &dataDimensionsBuf);
status = clSetKernelArg(kernel, 3, sizeof(cl_mem), &curvBuf);
status = clSetKernelArg(kernel, 4, sizeof(cl_mem), &phiBuf);
// Define an index space (global work size) of work
// items for execution. A workgroup size (local work size)
// is not required, but can be used.
size_t globalWorkSize[1];
// There are 'elements' work-items
globalWorkSize[0] = elements;
// Execute the kernel for execution
status = clEnqueueNDRangeKernel(cmdQueue, kernel, 1, NULL,
globalWorkSize, NULL, 0, NULL, NULL);
// Read the device output buffer to the host output array
clEnqueueReadBuffer(cmdQueue, contourBuf, CL_TRUE, 0,
datasize, contour, 0, NULL, NULL);
imwrite(contour, N1, N2, fname_out);
// Free OpenCL resources
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseCommandQueue(cmdQueue);
clReleaseMemObject(contourBuf);
clReleaseMemObject(imgBuf);
clReleaseMemObject(dataDimensionsBuf);
clReleaseMemObject(curvBuf);
clReleaseMemObject(phiBuf);
clReleaseContext(context);
// Free host resources
free(phi);
free(curv);
free(img);
free(contour);
free(dataDimensions);
free(platforms);
free(devices);
return 0;
}
开发者ID:capatton,项目名称:CS133-Project-Segmentation,代码行数:101,代码来源:seg_main_cl.c
示例8: xcl_release_world
void xcl_release_world(xcl_world world) {
clReleaseCommandQueue(world.command_queue);
clReleaseContext(world.context);
}
开发者ID:shvo,项目名称:Rodinia-FPGA,代码行数:4,代码来源:xcl.c
示例9: main
//.........这里部分代码省略.........
cl_program program; // program
cl_kernel kernel; // kernel
// Size, in bytes, of each vector
size_t bytes = n*sizeof(double);
// Allocate memory for each vector on host
h_a = (double*)malloc(bytes);
h_b = (double*)malloc(bytes);
h_c = (double*)malloc(bytes);
// Initialize vectors on host
int i;
for( i = 0; i < n; i++ )
{
h_a[i] = sinf(i)*sinf(i);
h_b[i] = cosf(i)*cosf(i);
}
size_t globalSize, localSize;
cl_int err;
// Number of work items in each local work group
localSize = 64;
// Number of total work items - localSize must be devisor
globalSize = ceil(n/(float)localSize)*localSize;
// Bind to platform
err = clGetPlatformIDs(1, &cpPlatform, NULL);
// Get ID for the device
err = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
// Create a context
context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
// Create a command queue
queue = clCreateCommandQueue(context, device_id, 0, &err);
// Create the compute program from the source buffer
program = clCreateProgramWithSource(context, 1,
(const char **) & kernelSource, NULL, &err);
// Build the program executable
clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
// Create the compute kernel in the program we wish to run
kernel = clCreateKernel(program, "vecAdd", &err);
// Create the input and output arrays in device memory for our calculation
d_a = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, NULL);
d_b = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, NULL);
d_c = clCreateBuffer(context, CL_MEM_WRITE_ONLY, bytes, NULL, NULL);
// Write our data set into the input array in device memory
err = clEnqueueWriteBuffer(queue, d_a, CL_TRUE, 0,
bytes, h_a, 0, NULL, NULL);
err |= clEnqueueWriteBuffer(queue, d_b, CL_TRUE, 0,
bytes, h_b, 0, NULL, NULL);
// Set the arguments to our compute kernel
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_a);
err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_b);
err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_c);
err |= clSetKernelArg(kernel, 3, sizeof(unsigned int), &n);
// Execute the kernel over the entire range of the data set
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalSize, &localSize,
0, NULL, NULL);
// Wait for the command queue to get serviced before reading back results
clFinish(queue);
// Read the results from the device
clEnqueueReadBuffer(queue, d_c, CL_TRUE, 0,
bytes, h_c, 0, NULL, NULL );
//Sum up vector c and print result divided by n, this should equal 1 within error
double sum = 0;
for(i=0; i<n; i++)
sum += h_c[i];
printf("final result: %f\n", sum/n);
// release OpenCL resources
clReleaseMemObject(d_a);
clReleaseMemObject(d_b);
clReleaseMemObject(d_c);
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseCommandQueue(queue);
clReleaseContext(context);
//release host memory
free(h_a);
free(h_b);
free(h_c);
return 0;
}
开发者ID:GongYiLiao,项目名称:C_Daily,代码行数:101,代码来源:test0.c
示例10: main
//.........这里部分代码省略.........
ret = clSetKernelArg(nonlinearpart_a, 0, sizeof(cl_mem),(void *)&cl_u);
ret = clSetKernelArg(nonlinearpart_a, 1, sizeof(cl_mem),(void* )&cl_v);
ret = clSetKernelArg(nonlinearpart_a, 2, sizeof(double),(void* )&A);
ret = clSetKernelArg(nonlinearpart_a, 3, sizeof(double),(void* )&dt);
ret = clSetKernelArg(nonlinearpart_a, 4, sizeof(double),(void* )&a);
ret = clEnqueueNDRangeKernel(command_queue, nonlinearpart_a, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
ret = clFinish(command_queue);
//nonlinearpart_b
ret = clSetKernelArg(nonlinearpart_b, 0, sizeof(cl_mem),(void *)&cl_u);
ret = clSetKernelArg(nonlinearpart_b, 1, sizeof(cl_mem),(void* )&cl_v);
ret = clSetKernelArg(nonlinearpart_b, 2, sizeof(double),(void* )&A);
ret = clSetKernelArg(nonlinearpart_b, 3, sizeof(double),(void* )&dt);
ret = clSetKernelArg(nonlinearpart_b, 4, sizeof(double),(void* )&b);
ret = clEnqueueNDRangeKernel(command_queue, nonlinearpart_b, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
ret = clFinish(command_queue);
//linear
fft2dfor(&cl_u, &cl_uhat,&planHandle,&command_queue,&tmpBuffer);
fft2dfor(&cl_v, &cl_vhat,&planHandle,&command_queue,&tmpBuffer);
//printf("A%f,B%f\n",A,B);
ret = clSetKernelArg(linearpart, 0, sizeof(cl_mem),(void *)&cl_uhat);
ret = clSetKernelArg(linearpart, 1, sizeof(cl_mem),(void *)&cl_vhat);
ret = clSetKernelArg(linearpart, 2, sizeof(cl_mem),(void* )&cl_kx);
ret = clSetKernelArg(linearpart, 3, sizeof(cl_mem),(void* )&cl_ky);
ret = clSetKernelArg(linearpart, 4, sizeof(double),(void* )&Du);
ret = clSetKernelArg(linearpart, 5, sizeof(double),(void* )&Dv);
ret = clSetKernelArg(linearpart, 6, sizeof(double),(void* )&A);
ret = clSetKernelArg(linearpart, 7, sizeof(double),(void* )&B);
ret = clSetKernelArg(linearpart, 8, sizeof(double),(void* )&dt);
ret = clSetKernelArg(linearpart, 9, sizeof(double),(void* )&c);
ret = clSetKernelArg(linearpart, 10, sizeof(int),(void* )&Nx);
ret = clSetKernelArg(linearpart, 11, sizeof(int),(void* )&Ny);
ret = clEnqueueNDRangeKernel(command_queue, linearpart, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
ret = clFinish(command_queue);
fft2dback(&cl_u, &cl_uhat,&planHandle,&command_queue,&tmpBuffer);
fft2dback(&cl_v, &cl_vhat,&planHandle,&command_queue,&tmpBuffer);
//nonlinearpart_b
ret = clSetKernelArg(nonlinearpart_b, 0, sizeof(cl_mem),(void *)&cl_u);
ret = clSetKernelArg(nonlinearpart_b, 1, sizeof(cl_mem),(void* )&cl_v);
ret = clSetKernelArg(nonlinearpart_b, 2, sizeof(double),(void* )&A);
ret = clSetKernelArg(nonlinearpart_b, 3, sizeof(double),(void* )&dt);
ret = clSetKernelArg(nonlinearpart_b, 4, sizeof(double),(void* )&b);
ret = clEnqueueNDRangeKernel(command_queue, nonlinearpart_b, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
ret = clFinish(command_queue);
//nonlinearpart_a
ret = clSetKernelArg(nonlinearpart_a, 0, sizeof(cl_mem),(void *)&cl_u);
ret = clSetKernelArg(nonlinearpart_a, 1, sizeof(cl_mem),(void* )&cl_v);
ret = clSetKernelArg(nonlinearpart_a, 2, sizeof(double),(void* )&A);
ret = clSetKernelArg(nonlinearpart_a, 3, sizeof(double),(void* )&dt);
ret = clSetKernelArg(nonlinearpart_a, 4, sizeof(double),(void* )&a);
ret = clEnqueueNDRangeKernel(command_queue, nonlinearpart_a, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
ret = clFinish(command_queue);
// done
if(n==plottime){
printf("time:%f, step:%d,%d,umax:%f,vmax:%f\n",n*dt,n,plotnum,umax[plotnum],vmax[plotnum]);
plottime=plottime+plotgap;
plotnum=plotnum+1;
writedata_C(&cl_u, &command_queue,Nx,Ny,plotnum,"u");
writedata_C(&cl_v, &command_queue,Nx,Ny,plotnum,"v");
umax[plotnum]=writeimage(&cl_u, &command_queue,Nx,Ny,plotnum,"u");
vmax[plotnum]=writeimage(&cl_v, &command_queue,Nx,Ny,plotnum,"v");
}
}//end timestepping
printf("Finished time stepping\n");
mtime_e(&tvs,"Programm took:");
writearray(umax,(Tmax/plotgap)+1,"u");
writearray(vmax,(Tmax/plotgap)+1,"v");
free(umax);
free(vmax);
clReleaseMemObject(cl_u);
clReleaseMemObject(cl_v);
clReleaseMemObject(cl_uhat);
clReleaseMemObject(cl_vhat);
clReleaseMemObject(cl_kx);
clReleaseMemObject(cl_ky);
ret = clReleaseKernel(initialdata);
ret = clReleaseKernel(frequencies);
ret = clReleaseKernel(linearpart);
ret = clReleaseKernel(nonlinearpart_a);
ret = clReleaseKernel(nonlinearpart_b);
fftdestroy(&planHandle, &tmpBuffer);
ret = clReleaseCommandQueue(command_queue);
ret = clReleaseContext(context);
for(i=0;i<num_platforms;i++){free(device_id[i]);}
free(device_id);
free(platform_id);
free(num_devices);
printf("Program execution complete\n");
return 0;
}
开发者ID:MichaelQuell,项目名称:PSNM,代码行数:101,代码来源:main_gs.c
示例11: exec_trig_kernel
int
exec_trig_kernel(const char *program_source,
int n, void *srcA, void *dst)
{
cl_context context;
cl_command_queue cmd_queue;
cl_device_id *devices;
cl_program program;
cl_kernel kernel;
cl_mem memobjs[2];
size_t global_work_size[1];
size_t local_work_size[1];
size_t cb;
cl_int err;
float c = 7.3f; // a scalar number to test non-pointer args
// create the OpenCL context on a GPU device
context = poclu_create_any_context();
if (context == (cl_context)0)
return -1;
// get the list of GPU devices associated with context
clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &cb);
devices = (cl_device_id *) malloc(cb);
clGetContextInfo(context, CL_CONTEXT_DEVICES, cb, devices, NULL);
// create a command-queue
cmd_queue = clCreateCommandQueue(context, devices[0], 0, NULL);
if (cmd_queue == (cl_command_queue)0)
{
clReleaseContext(context);
free(devices);
return -1;
}
free(devices);
// allocate the buffer memory objects
memobjs[0] = clCreateBuffer(context,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
sizeof(cl_float4) * n, srcA, NULL);
if (memobjs[0] == (cl_mem)0)
{
clReleaseCommandQueue(cmd_queue);
clReleaseContext(context);
return -1;
}
memobjs[1] = clCreateBuffer(context,
CL_MEM_READ_WRITE,
sizeof(cl_float4) * n, NULL, NULL);
if (memobjs[1] == (cl_mem)0)
{
delete_memobjs(memobjs, 1);
clReleaseCommandQueue(cmd_queue);
clReleaseContext(context);
return -1;
}
// create the program
program = clCreateProgramWithSource(context,
1, (const char**)&program_source, NULL, NULL);
if (program == (cl_program)0)
{
delete_memobjs(memobjs, 2);
clReleaseCommandQueue(cmd_queue);
clReleaseContext(context);
return -1;
}
// build the program
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
delete_memobjs(memobjs, 2);
clReleaseProgram(program);
clReleaseCommandQueue(cmd_queue);
clReleaseContext(context);
return -1;
}
// create the kernel
kernel = clCreateKernel(program, "trig", NULL);
if (kernel == (cl_kernel)0)
{
delete_memobjs(memobjs, 2);
clReleaseProgram(program);
clReleaseCommandQueue(cmd_queue);
clReleaseContext(context);
return -1;
}
// set the args values
err = clSetKernelArg(kernel, 0,
sizeof(cl_mem), (void *) &memobjs[0]);
err |= clSetKernelArg(kernel, 1,
sizeof(cl_mem), (void *) &memobjs[1]);
err |= clSetKernelArg(kernel, 2,
sizeof(float), (void *) &c);
//.........这里部分代码省略.........
开发者ID:Drako,项目名称:pocl,代码行数:101,代码来源:trig_exec.c
示例12: runProgram
//.........这里部分代码省略.........
// copy data to device
err = clEnqueueWriteBuffer(queue, A_d, CL_TRUE, 0, sizeof(float)*N*N, A, 0, NULL , &event[0]);
OCL_CHECK(err);
size_t localsize[2];
size_t globalsize[2];
localsize[0] = 16;
localsize[1] = 16;
globalsize[0] = N;
globalsize[1] = N;
err = clSetKernelArg(kernel[0], 0, sizeof(cl_mem), &A_d);
if(err != 0) { printf("%d\n",err); OCL_CHECK(err); exit(1);}
err = clSetKernelArg(kernel[0], 1, sizeof(cl_mem), &Aout_d);
if(err != 0) { printf("%d\n",err); OCL_CHECK(err); exit(1);}
err = clEnqueueNDRangeKernel(queue, kernel[0], 2, NULL, globalsize, localsize, 0, NULL, NULL);
OCL_CHECK(err);
clFinish(queue);
// read device data back to host
clEnqueueReadBuffer(queue, Aout_d, CL_TRUE, 0, sizeof(float)*N*N, Aout, 0, NULL , &event[1]);
err = clWaitForEvents(1,&event[1]);
OCL_CHECK(err);
err = clGetEventProfilingInfo (event[0], CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &gstart, NULL);
OCL_CHECK(err);
err = clGetEventProfilingInfo (event[1], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &gend, NULL);
OCL_CHECK(err);
gpuTime = (double)(gend -gstart)/1000000000.0;
//check_1d_f(sum, blks+1);
#ifdef DEBUG
puts("Output");
check_2d_f(Aout,N,N);
#endif
printf("oclTime = %lf (s)\n", gpuTime );
// free
clReleaseMemObject(A_d);
clReleaseMemObject(Aout_d);
// // check
// int flag = 1;
// for(i=0;i<N;++i){
// for(j=0;j<N;++j){
// if(A[i*N+j] != At[j*N+i])
// {
// flag = 0;
// break;
// }
// }
// }
// if( flag == 0 )
// {
// puts("Bugs! Check program.");
// }else{
// puts("Succeed!");
// }
clReleaseProgram(program);
clReleaseContext(context);
clReleaseCommandQueue(queue);
for(i=0;i<NumK;++i){
clReleaseKernel(kernel[i]);
}
for(i=0;i<NumE;++i){
clReleaseEvent(event[i]);
}
free(kernelSource);
#ifdef SAVEBIN
free(bin);
#endif
free(A);
free(Aout);
return;
}
开发者ID:Anmol-007,项目名称:oclKernels,代码行数:101,代码来源:template.c
示例13: main
int
main(void)
{
cl_int err;
cl_platform_id platform = 0;
cl_device_id device = 0;
cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
cl_context ctx = 0;
cl_command_queue queue = 0;
cl_mem bufX, bufY;
cl_event event = NULL;
int ret = 0;
int lenX = 1 + (N-1)*abs(incx);
int lenY = 1 + (N-1)*abs(incy);
/* Setup OpenCL environment. */
err = clGetPlatformIDs(1, &platform, NULL);
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &device, NULL);
if (err != CL_SUCCESS) {
printf( "clGetPlatformIDs() failed with %d\n", err );
return 1;
}
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
if (err != CL_SUCCESS) {
printf( "clGetDeviceIDs() failed with %d\n", err );
return 1;
}
props[1] = (cl_context_properties)platform;
ctx = clCreateContext(props, 1, &device, NULL, NULL, &err);
if (err != CL_SUCCESS) {
printf( "clCreateContext() failed with %d\n", err );
return 1;
}
queue = clCreateCommandQueue(ctx, device, 0, &err);
if (err != CL_SUCCESS) {
printf( "clCreateCommandQueue() failed with %d\n", err );
clReleaseContext(ctx);
return 1;
}
/* Setup clblas. */
err = clblasSetup();
if (err != CL_SUCCESS) {
printf("clblasSetup() failed with %d\n", err);
clReleaseCommandQueue(queue);
clReleaseContext(ctx);
return 1;
}
/* Prepare OpenCL memory objects and place matrices inside them. */
bufX = clCreateBuffer(ctx, CL_MEM_READ_WRITE, (lenX*sizeof(cl_float)), NULL, &err);
bufY = clCreateBuffer(ctx, CL_MEM_READ_WRITE, (lenY*sizeof(cl_float)), NULL, &err);
err = clEnqueueWriteBuffer(queue, bufX, CL_TRUE, 0, (lenX*sizeof(cl_float)), X, 0, NULL, NULL);
err = clEnqueueWriteBuffer(queue, bufY, CL_TRUE, 0, (lenY*sizeof(cl_float)), Y, 0, NULL, NULL);
printResult();
/* Call clblas function. */
err = clblasSrot(N, bufX, 0, incx, bufY, 0, incy, C, S, 1, &queue, 0, NULL, &event);
// printf("here\n");
if (err != CL_SUCCESS) {
printf("clblasSrot() failed with %d\n", err);
ret = 1;
}
else {
/* Wait for calculations to be finished. */
err = clWaitForEvents(1, &event);
/* Fetch results of calculations from GPU memory. */
err = clEnqueueReadBuffer(queue, bufY, CL_TRUE, 0, (lenY*sizeof(cl_float)),
Y, 0, NULL, NULL);
err = clEnqueueReadBuffer(queue, bufX, CL_TRUE, 0, (lenX*sizeof(cl_float)),
X, 0, NULL, NULL);
/* At this point you will get the result of SROT placed in vector Y. */
printResult();
}
/* Release OpenCL events. */
clReleaseEvent(event);
/* Release OpenCL memory objects. */
clReleaseMemObject(bufY);
clReleaseMemObject(bufX);
/* Finalize work with clblas. */
clblasTeardown();
/* Release OpenCL working objects. */
clReleaseCommandQueue(queue);
clReleaseContext(ctx);
return ret;
}
开发者ID:AndreasMiller,项目名称:clBLAS,代码行数:100,代码来源:example_srot.c
示例14: DeleteCL
void DeleteCL()
{
clReleaseContext(g_cxMainContext);
clReleaseCommandQueue(g_cqCommandQue);
}
开发者ID:erwincoumans,项目名称:wxWidgets,代码行数:5,代码来源:main.cpp
示例15: simpleExample
//.........这里部分代码省略.........
/* Build program */
program = build_program(context, device, PROGRAM_FILE);
/* Create data buffer */
data_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, ARRAY_SIZE * sizeof(float), data, &err);
sum_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float), NULL, &err);
if(err < 0) {
perror("Couldn't create a buffer");
exit(1);
};
/* Create a command queue */
queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
if(err < 0) {
perror("Couldn't create a command queue");
exit(1);
};
/* Create kernels */
vector_kernel = clCreateKernel(program, KERNEL_1, &err);
complete_kernel = clCreateKernel(program, KERNEL_2, &err);
if(err < 0) {
perror("Couldn't create a kernel");
exit(1);
};
/* Set arguments for vector kernel */
err = clSetKernelArg(vector_kernel, 0, sizeof(cl_mem), &data_buffer);
err |= clSetKernelArg(vector_kernel, 1, local_size * 4 * sizeof(float), NULL);
/* Set arguments for complete kernel */
err = clSetKernelArg(complete_kernel, 0, sizeof(cl_mem), &data_buffer);
err |= clSetKernelArg(complete_kernel, 1, local_size * 4 * sizeof(float), NULL);
e
|
请发表评论