本文整理汇总了C++中clReleaseCommandQueue函数的典型用法代码示例。如果您正苦于以下问题:C++ clReleaseCommandQueue函数的具体用法?C++ clReleaseCommandQueue怎么用?C++ clReleaseCommandQueue使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了clReleaseCommandQueue函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: main
//.........这里部分代码省略.........
}
fseek(text_handle, 0, SEEK_END);
text_size = ftell(text_handle)-1;
rewind(text_handle);
text = (char*)calloc(text_size, sizeof(char));
fread(text, sizeof(char), text_size, text_handle);
fclose(text_handle);
chars_per_item = text_size / global_size + 1;
/* Create program from file */
program = clCreateProgramWithSource(context, 1,
(const char**)&program_buffer, &program_size, &err);
if(err < 0) {
perror("Couldn't create the program");
exit(1);
}
free(program_buffer);
/* Build program */
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if(err < 0) {
/* Find size of log and print to std output */
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
0, NULL, &log_size);
program_log = (char*) calloc(log_size+1, sizeof(char));
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
log_size+1, program_log, NULL);
printf("%s\n", program_log);
free(program_log);
exit(1);
}
/* Create a kernel */
kernel = clCreateKernel(program, KERNEL_FUNC, &err);
if(err < 0) {
perror("Couldn't create a kernel");
exit(1);
};
/* Create buffers to hold the text characters and count */
text_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY |
CL_MEM_COPY_HOST_PTR, text_size, text, &err);
if(err < 0) {
perror("Couldn't create a buffer");
exit(1);
};
result_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE |
CL_MEM_COPY_HOST_PTR, sizeof(result), result, NULL);
/* Create kernel argument */
err = clSetKernelArg(kernel, 0, sizeof(pattern), pattern);
err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &text_buffer);
err |= clSetKernelArg(kernel, 2, sizeof(chars_per_item), &chars_per_item);
err |= clSetKernelArg(kernel, 3, 4 * sizeof(int), NULL);
err |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &result_buffer);
if(err < 0) {
printf("Couldn't set a kernel argument");
exit(1);
};
/* Create a command queue */
queue = clCreateCommandQueue(context, device, 0, &err);
if(err < 0) {
perror("Couldn't create a command queue");
exit(1);
};
/* Enqueue kernel */
err = clEnqueueNDRangeKernel(queue, kernel, 1, &offset, &global_size,
&local_size, 0, NULL, NULL);
if(err < 0) {
perror("Couldn't enqueue the kernel");
printf("Error code: %d\n", err);
exit(1);
}
/* Read and print the result */
err = clEnqueueReadBuffer(queue, result_buffer, CL_TRUE, 0,
sizeof(result), &result, 0, NULL, NULL);
if(err < 0) {
perror("Couldn't read the buffer");
exit(1);
}
printf("\nResults: \n");
printf("Number of occurrences of 'that': %d\n", result[0]);
printf("Number of occurrences of 'with': %d\n", result[1]);
printf("Number of occurrences of 'have': %d\n", result[2]);
printf("Number of occurrences of 'from': %d\n", result[3]);
/* Deallocate resources */
clReleaseMemObject(result_buffer);
clReleaseMemObject(text_buffer);
clReleaseKernel(kernel);
clReleaseCommandQueue(queue);
clReleaseProgram(program);
clReleaseContext(context);
return 0;
}
开发者ID:sunlianqiang,项目名称:openclDemo,代码行数:101,代码来源:string_search.c
示例2: main
int main() {
/* OpenCL data structures */
cl_device_id device;
cl_context context;
cl_command_queue queue;
cl_program program;
cl_kernel kernel;
cl_int err;
/* Data and events */
char *kernel_msg;
float data[4096];
cl_mem data_buffer;
cl_event kernel_event, read_event;
/* Create a device and context */
device = create_device();
context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
if(err < 0) {
perror("Couldn't create a context");
exit(1);
}
/* Build the program and create a kernel */
program = build_program(context, device, PROGRAM_FILE);
kernel = clCreateKernel(program, KERNEL_FUNC, &err);
if(err < 0) {
perror("Couldn't create a kernel");
exit(1);
};
/* Create a write-only buffer to hold the output data */
data_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(data), NULL, &err);
if(err < 0) {
perror("Couldn't create a buffer");
exit(1);
};
/* Create kernel argument */
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &data_buffer);
if(err < 0) {
perror("Couldn't set a kernel argument");
exit(1);
};
/* Create a command queue */
queue = clCreateCommandQueue(context, device, 0, &err);
if(err < 0) {
perror("Couldn't create a command queue");
exit(1);
};
/* Enqueue kernel */
err = clEnqueueTask(queue, kernel, 0, NULL, &kernel_event);
if(err < 0) {
perror("Couldn't enqueue the kernel");
exit(1);
}
/* Read the buffer */
err = clEnqueueReadBuffer(queue, data_buffer, CL_FALSE, 0,
sizeof(data), &data, 0, NULL, &read_event);
if(err < 0) {
perror("Couldn't read the buffer");
exit(1);
}
/* Set event handling routines */
kernel_msg = "The kernel finished successfully.\n\0";
err = clSetEventCallback(kernel_event, CL_COMPLETE,
&kernel_complete, kernel_msg);
if(err < 0) {
perror("Couldn't set callback for event");
exit(1);
}
clSetEventCallback(read_event, CL_COMPLETE, &read_complete, data);
/* Deallocate resources */
clReleaseMemObject(data_buffer);
clReleaseKernel(kernel);
clReleaseCommandQueue(queue);
clReleaseProgram(program);
clReleaseContext(context);
return 0;
}
开发者ID:dashmoment,项目名称:opencl_in_action,代码行数:87,代码来源:callback.c
示例3: main
//.........这里部分代码省略.........
int numberOfBins = (int)ceil(size/(2*cutoff)) + 2;
// Bins will only exist on the device.
particle_t* bins;
// How many particles are there in each bin - also only exists on the device.
volatile int* binSizes;
// Number of bins to be initialized.
size_t clearAmt = numberOfBins*numberOfBins;
// Allocate memory for bins on the device.
cl_mem d_binSizes = clCreateBuffer(context, CL_MEM_READ_WRITE, numberOfBins * numberOfBins * sizeof(volatile int), NULL, &ret);
cl_mem d_bins = clCreateBuffer(context, CL_MEM_READ_WRITE, numberOfBins * numberOfBins * maxParticles * sizeof(particle_t), NULL, &ret);
// SETTING ARGUMENTS FOR THE KERNELS
// Set arguments for the init / clear kernel
ret = clSetKernelArg(binInitKernel, 0, sizeof(cl_mem), (void *)&d_binSizes);
ret = clSetKernelArg(binInitKernel, 1, sizeof(int), &numberOfBins);
// Set arguments for the binning kernel
ret = clSetKernelArg(binKernel, 0, sizeof(cl_mem), (void *)&d_particles);
ret = clSetKernelArg(binKernel, 1, sizeof(int), &n);
ret = clSetKernelArg(binKernel, 2, sizeof(cl_mem), (void *)&d_bins);
ret = clSetKernelArg(binKernel, 3, sizeof(cl_mem), (void *)&d_binSizes);
ret = clSetKernelArg(binKernel, 4, sizeof(int), &numberOfBins);
// Set arguments for force kernel.
ret = clSetKernelArg(forceKernel, 0, sizeof(cl_mem), (void *)&d_particles);
ret = clSetKernelArg(forceKernel, 1, sizeof(int), &n);
ret = clSetKernelArg(forceKernel, 2, sizeof(cl_mem), (void *)&d_bins);
ret = clSetKernelArg(forceKernel, 3, sizeof(cl_mem), (void *)&d_binSizes);
ret = clSetKernelArg(forceKernel, 4, sizeof(int), &numberOfBins);
// Set arguments for move kernel
ret = clSetKernelArg(moveKernel, 0, sizeof(cl_mem), (void *)&d_particles);
ret = clSetKernelArg(moveKernel, 1, sizeof(int), &n);
ret = clSetKernelArg(moveKernel, 2, sizeof(double), &size);
// Variable to check if kernel execution is done.
cl_event kernelDone;
double simulation_time = read_timer();
int step = 0;
for (step = 0; step < NSTEPS; step++) {
// Execute bin initialization (clearing after first iteration)
ret = clEnqueueNDRangeKernel(commandQueue, binInitKernel, 1, NULL, &clearAmt, NULL, 0, NULL, &kernelDone);
ret = clWaitForEvents(1, &kernelDone);
// Execute binning kernel
ret = clEnqueueNDRangeKernel(commandQueue, binKernel, 1, NULL, &globalItemSize, &localItemSize, 0, NULL, &kernelDone);
// ret = clEnqueueNDRangeKernel(commandQueue, binKernel, 1, NULL, &globalItemSize, &localItemSize, 0, NULL, &kernelDone);
ret = clWaitForEvents(1, &kernelDone);
// Execute force kernel
ret = clEnqueueNDRangeKernel(commandQueue, forceKernel, 1, NULL, &globalItemSize, &localItemSize, 0, NULL, &kernelDone);
ret = clWaitForEvents(1, &kernelDone);
// Execute move kernel
ret = clEnqueueNDRangeKernel(commandQueue, moveKernel, 1, NULL, &globalItemSize, &localItemSize, 0, NULL, &kernelDone);
ret = clWaitForEvents(1, &kernelDone);
if (fsave && (step%SAVEFREQ) == 0) {
// Copy the particles back to the CPU
ret = clEnqueueReadBuffer(commandQueue, d_particles, CL_TRUE, 0, n * sizeof(particle_t), particles, 0, NULL, &kernelDone);
ret = clWaitForEvents(1, &kernelDone);
save(fsave, n, particles);
}
}
simulation_time = read_timer() - simulation_time;
printf("CPU-GPU copy time = %g seconds\n", copy_time);
printf("n = %d, simulation time = %g seconds\n", n, simulation_time);
if (fsum)
fprintf(fsum, "%d %lf \n", n, simulation_time);
if (fsum)
fclose(fsum);
free(particles);
if (fsave)
fclose(fsave);
ret = clFlush(commandQueue);
ret = clFinish(commandQueue);
ret = clReleaseCommandQueue(commandQueue);
ret = clReleaseKernel(forceKernel);
ret = clReleaseKernel(moveKernel);
ret = clReleaseProgram(program);
ret = clReleaseMemObject(d_particles);
ret = clReleaseContext(context);
return 0;
}
开发者ID:Abercus,项目名称:openCL,代码行数:101,代码来源:simulation.c
示例4: clean_all
void clean_all(void) {
printf("Cleaning Variables ... \n\n");
// Opencl environment variables
clReleaseCommandQueue(command_queue);
clReleaseContext(context);
// Release all memory allocated
if (Data_MeshType == UNSTRUCTURED) {
// Mesh Variables
free(MeshElementArray.Node1);
free(MeshElementArray.Node2);
free(MeshElementArray.Node3);
free(MeshElementArray.Node4);
free(MeshNodeArray_double.x);
free(MeshNodeArray_double.y);
free(MeshNodeArray_double.z);
free(MeshElementArray.Neighborindex1);
free(MeshElementArray.Neighborindex2);
free(MeshElementArray.Neighborindex3);
free(MeshElementArray.Neighborindex4);
clReleaseMemObject(Mesh_Node_x);
clReleaseMemObject(Mesh_Node_y);
clReleaseMemObject(Mesh_Node_z);
clReleaseMemObject(Mesh_Element_Node1);
clReleaseMemObject(Mesh_Element_Node2);
clReleaseMemObject(Mesh_Element_Node3);
clReleaseMemObject(Mesh_Element_Node4);
clReleaseMemObject(Mesh_Element_Neighborindex1);
clReleaseMemObject(Mesh_Element_Neighborindex2);
clReleaseMemObject(Mesh_Element_Neighborindex3);
clReleaseMemObject(Mesh_Element_Neighborindex4);
clReleaseMemObject(r);
clReleaseMemObject(s);
clReleaseMemObject(t);
clReleaseMemObject(eid);
}
// Cleaning Velocity variables
free(velocity.u0);
free(velocity.v0);
free(velocity.w0);
free(velocity.u1);
free(velocity.v1);
free(velocity.w1);
free(velocity.time0);
free(velocity.time1);
free(Tracer.x);
Tracer.x = NULL;
free(Tracer.y);
Tracer.y = NULL;
free(Tracer.z);
Tracer.z = NULL;
free(Tracer.ElementIndex);
Tracer.ElementIndex = NULL;
free(Tracer.Start_time);
Tracer.Start_time = NULL;
free(Tracer.Stop_time);
Tracer.Stop_time = NULL;
free(Tracer.LeftDomain);
Tracer.LeftDomain = NULL;
if (Trace_ReleaseStrategy == 1) {
free(Tracer1.x);
Tracer1.x = NULL;
free(Tracer1.y);
Tracer1.y = NULL;
free(Tracer1.z);
Tracer1.z = NULL;
free(Tracer1.ElementIndex);
Tracer1.ElementIndex = NULL;
free(Tracer1.Start_time);
Tracer1.Start_time = NULL;
free(Tracer1.Stop_time);
Tracer1.Stop_time = NULL;
free(Tracer1.LeftDomain);
Tracer1.LeftDomain = NULL;
free(index1);
//.........这里部分代码省略.........
开发者ID:linyufly,项目名称:FlowVC-1,代码行数:101,代码来源:clean.c
示例5: clReleaseCommandQueue
void OpenCLDevice::deinitialize()
{
if (this->m_queue) {
clReleaseCommandQueue(this->m_queue);
}
}
开发者ID:Walid-Shouman,项目名称:Blender,代码行数:6,代码来源:COM_OpenCLDevice.cpp
示例6: main
//.........这里部分代码省略.........
ret = clSetKernelArg(nonlinearpart_a, 0, sizeof(cl_mem),(void *)&cl_u);
ret = clSetKernelArg(nonlinearpart_a, 1, sizeof(cl_mem),(void* )&cl_v);
ret = clSetKernelArg(nonlinearpart_a, 2, sizeof(double),(void* )&A);
ret = clSetKernelArg(nonlinearpart_a, 3, sizeof(double),(void* )&dt);
ret = clSetKernelArg(nonlinearpart_a, 4, sizeof(double),(void* )&a);
ret = clEnqueueNDRangeKernel(command_queue, nonlinearpart_a, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
ret = clFinish(command_queue);
//nonlinearpart_b
ret = clSetKernelArg(nonlinearpart_b, 0, sizeof(cl_mem),(void *)&cl_u);
ret = clSetKernelArg(nonlinearpart_b, 1, sizeof(cl_mem),(void* )&cl_v);
ret = clSetKernelArg(nonlinearpart_b, 2, sizeof(double),(void* )&A);
ret = clSetKernelArg(nonlinearpart_b, 3, sizeof(double),(void* )&dt);
ret = clSetKernelArg(nonlinearpart_b, 4, sizeof(double),(void* )&b);
ret = clEnqueueNDRangeKernel(command_queue, nonlinearpart_b, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
ret = clFinish(command_queue);
//linear
fft2dfor(&cl_u, &cl_uhat,&planHandle,&command_queue,&tmpBuffer);
fft2dfor(&cl_v, &cl_vhat,&planHandle,&command_queue,&tmpBuffer);
//printf("A%f,B%f\n",A,B);
ret = clSetKernelArg(linearpart, 0, sizeof(cl_mem),(void *)&cl_uhat);
ret = clSetKernelArg(linearpart, 1, sizeof(cl_mem),(void *)&cl_vhat);
ret = clSetKernelArg(linearpart, 2, sizeof(cl_mem),(void* )&cl_kx);
ret = clSetKernelArg(linearpart, 3, sizeof(cl_mem),(void* )&cl_ky);
ret = clSetKernelArg(linearpart, 4, sizeof(double),(void* )&Du);
ret = clSetKernelArg(linearpart, 5, sizeof(double),(void* )&Dv);
ret = clSetKernelArg(linearpart, 6, sizeof(double),(void* )&A);
ret = clSetKernelArg(linearpart, 7, sizeof(double),(void* )&B);
ret = clSetKernelArg(linearpart, 8, sizeof(double),(void* )&dt);
ret = clSetKernelArg(linearpart, 9, sizeof(double),(void* )&c);
ret = clSetKernelArg(linearpart, 10, sizeof(int),(void* )&Nx);
ret = clSetKernelArg(linearpart, 11, sizeof(int),(void* )&Ny);
ret = clEnqueueNDRangeKernel(command_queue, linearpart, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
ret = clFinish(command_queue);
fft2dback(&cl_u, &cl_uhat,&planHandle,&command_queue,&tmpBuffer);
fft2dback(&cl_v, &cl_vhat,&planHandle,&command_queue,&tmpBuffer);
//nonlinearpart_b
ret = clSetKernelArg(nonlinearpart_b, 0, sizeof(cl_mem),(void *)&cl_u);
ret = clSetKernelArg(nonlinearpart_b, 1, sizeof(cl_mem),(void* )&cl_v);
ret = clSetKernelArg(nonlinearpart_b, 2, sizeof(double),(void* )&A);
ret = clSetKernelArg(nonlinearpart_b, 3, sizeof(double),(void* )&dt);
ret = clSetKernelArg(nonlinearpart_b, 4, sizeof(double),(void* )&b);
ret = clEnqueueNDRangeKernel(command_queue, nonlinearpart_b, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
ret = clFinish(command_queue);
//nonlinearpart_a
ret = clSetKernelArg(nonlinearpart_a, 0, sizeof(cl_mem),(void *)&cl_u);
ret = clSetKernelArg(nonlinearpart_a, 1, sizeof(cl_mem),(void* )&cl_v);
ret = clSetKernelArg(nonlinearpart_a, 2, sizeof(double),(void* )&A);
ret = clSetKernelArg(nonlinearpart_a, 3, sizeof(double),(void* )&dt);
ret = clSetKernelArg(nonlinearpart_a, 4, sizeof(double),(void* )&a);
ret = clEnqueueNDRangeKernel(command_queue, nonlinearpart_a, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
ret = clFinish(command_queue);
// done
if(n==plottime){
printf("time:%f, step:%d,%d,umax:%f,vmax:%f\n",n*dt,n,plotnum,umax[plotnum],vmax[plotnum]);
plottime=plottime+plotgap;
plotnum=plotnum+1;
writedata_C(&cl_u, &command_queue,Nx,Ny,plotnum,"u");
writedata_C(&cl_v, &command_queue,Nx,Ny,plotnum,"v");
umax[plotnum]=writeimage(&cl_u, &command_queue,Nx,Ny,plotnum,"u");
vmax[plotnum]=writeimage(&cl_v, &command_queue,Nx,Ny,plotnum,"v");
}
}//end timestepping
printf("Finished time stepping\n");
mtime_e(&tvs,"Programm took:");
writearray(umax,(Tmax/plotgap)+1,"u");
writearray(vmax,(Tmax/plotgap)+1,"v");
free(umax);
free(vmax);
clReleaseMemObject(cl_u);
clReleaseMemObject(cl_v);
clReleaseMemObject(cl_uhat);
clReleaseMemObject(cl_vhat);
clReleaseMemObject(cl_kx);
clReleaseMemObject(cl_ky);
ret = clReleaseKernel(initialdata);
ret = clReleaseKernel(frequencies);
ret = clReleaseKernel(linearpart);
ret = clReleaseKernel(nonlinearpart_a);
ret = clReleaseKernel(nonlinearpart_b);
fftdestroy(&planHandle, &tmpBuffer);
ret = clReleaseCommandQueue(command_queue);
ret = clReleaseContext(context);
for(i=0;i<num_platforms;i++){free(device_id[i]);}
free(device_id);
free(platform_id);
free(num_devices);
printf("Program execution complete\n");
return 0;
}
开发者ID:MichaelQuell,项目名称:PSNM,代码行数:101,代码来源:main_gs.c
示例7: xcl_release_world
void xcl_release_world(xcl_world world) {
clReleaseCommandQueue(world.command_queue);
clReleaseContext(world.context);
}
开发者ID:shvo,项目名称:Rodinia-FPGA,代码行数:4,代码来源:xcl.c
示例8:
ofxClScheduler::~ofxClScheduler() {
if(globalQ) clReleaseCommandQueue(globalQ);
if(context) clReleaseContext(context);
}
开发者ID:GunioRobot,项目名称:ofxPrimes,代码行数:4,代码来源:ofxClScheduler.cpp
示例9: main
//.........这里部分代码省略.........
exit(1);
}
/* Build OpenCL program object and dump the error message, if any */
char *program_log;
size_t log_size;
char* build_options = "-fbin-llvmir -fbin-amdil -fbin-exe";
error = clBuildProgram(program, 1, &devices[i], build_options, NULL, NULL);
if(error != CL_SUCCESS) {
// If there's an error whilst building the program, dump the log
clGetProgramBuildInfo(program, devices[i], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
program_log = (char*) malloc(log_size+1);
program_log[log_size] = '\0';
clGetProgramBuildInfo(program, devices[i], CL_PROGRAM_BUILD_LOG,
log_size+1, program_log, NULL);
printf("\n=== ERROR ===\n\n%s\n=============\n", program_log);
free(program_log);
exit(1);
}
/* Query the program as to how many kernels were detected */
cl_uint numOfKernels;
error = clCreateKernelsInProgram(program, 0, NULL, &numOfKernels);
if (error != CL_SUCCESS) {
perror("Unable to retrieve kernel count from program");
exit(1);
}
cl_kernel* kernels = (cl_kernel*) alloca(sizeof(cl_kernel) * numOfKernels);
error = clCreateKernelsInProgram(program, numOfKernels, kernels, NULL);
/* Loop thru each kernel and execute on device */
for(cl_uint j = 0; j < numOfKernels; j++) {
char kernelName[32];
cl_uint argCnt;
clGetKernelInfo(kernels[j], CL_KERNEL_FUNCTION_NAME, sizeof(kernelName), kernelName, NULL);
clGetKernelInfo(kernels[j], CL_KERNEL_NUM_ARGS, sizeof(argCnt), &argCnt, NULL);
printf("Kernel name: %s with arity: %d\n", kernelName, argCnt);
printf("About to create command queue and enqueue this kernel...\n");
/* Create a command queue */
cl_command_queue cQ = clCreateCommandQueue(context, devices[i], 0, &error);
if (error != CL_SUCCESS) {
perror("Unable to create command-queue");
exit(1);
}
/* Create a buffer and copy the data from the main buffer */
cl_mem outobj = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_float8) * DATA_SIZE, 0, &error);
if (error != CL_SUCCESS) {
perror("Unable to create sub-buffer object");
exit(1);
}
/* Let OpenCL know that the kernel is suppose to receive an argument */
error = clSetKernelArg(kernels[j], 0, sizeof(cl_mem), &inobj);
error = clSetKernelArg(kernels[j], 1, sizeof(cl_mem), &outobj);
if (error != CL_SUCCESS) {
perror("Unable to set buffer object in kernel");
exit(1);
}
/* Enqueue the kernel to the command queue */
error = clEnqueueTask(cQ, kernels[j], 0, NULL, NULL);
if (error != CL_SUCCESS) {
perror("Unable to enqueue task to command-queue");
exit(1);
}
printf("Task has been enqueued successfully!\n");
/* Enqueue the read-back from device to host */
error = clEnqueueReadBuffer(cQ, outobj,
CL_TRUE, // blocking read
0, // read from the start
sizeof(cl_float8)*DATA_SIZE, // how much to copy
ud_out, 0, NULL, NULL);
/* Check the returned data */
if ( valuesOK(ud_in, ud_out, DATA_SIZE) ) {
printf("Check passed!\n");
} else printf("Check failed!\n");
/* Release the command queue */
clReleaseCommandQueue(cQ);
clReleaseMemObject(outobj);
}
/* Clean up */
for(cl_uint i = 0; i < numOfKernels; i++) { clReleaseKernel(kernels[i]); }
for(int i=0; i< NUMBER_OF_FILES; i++) { free(buffer[i]); }
clReleaseProgram(program);
}// end of device loop and execution
clReleaseMemObject(inobj);
clReleaseContext(context);
}// end of platform loop
free(ud_in);
free(ud_out);
}
开发者ID:gujunli,项目名称:OpenCL-Benchmark,代码行数:101,代码来源:vectorization.c
示例10: main
//.........这里部分代码省略.........
// Write a and b vectors into compute device memory
err = clEnqueueWriteBuffer(commands, d_a, CL_TRUE, 0, sizeof(float) * mycount, &h_a[rank*mycount], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
printf("Error: Failed to write h_a to source array!\n");
exit(1);
}
err = clEnqueueWriteBuffer(commands, d_b, CL_TRUE, 0, sizeof(float) * mycount, &h_b[rank*mycount], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
printf("Error: Failed to write h_b to source array!\n");
exit(1);
}
// Set the arguments to our compute kernel
err = clSetKernelArg(ko_vadd, 0, sizeof(cl_mem), &d_a);
err |= clSetKernelArg(ko_vadd, 1, sizeof(cl_mem), &d_b);
err |= clSetKernelArg(ko_vadd, 2, sizeof(cl_mem), &d_c);
err |= clSetKernelArg(ko_vadd, 3, sizeof(unsigned int), &mycount);
if (err != CL_SUCCESS)
{
printf("Error: Failed to set kernel arguments! %d\n", err);
exit(1);
}
// Get the maximum work group size for executing the kernel on the device
err = clGetKernelWorkGroupInfo(ko_vadd, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL);
if (err != CL_SUCCESS)
{
printf("Error: Failed to retrieve kernel work group info! %d\n", err);
exit(1);
}
// Execute the kernel over the entire range of our 1d input data set
// using the maximum number of work group items for this device
global = LENGTH;
err = clEnqueueNDRangeKernel(commands, ko_vadd, 1, NULL, &global, &local, 0, NULL, NULL);
if (err)
{
printf("Error: Failed to execute kernel!\n");
return EXIT_FAILURE;
}
// Wait for the commands to complete before reading back results
clFinish(commands);
// Read back the results from the compute device
err = clEnqueueReadBuffer( commands, d_c, CL_TRUE, 0, sizeof(float) * mycount, &h_c, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
printf("Error: Failed to read output array! %d\n", err);
exit(1);
}
err = MPI_Gather (h_c, mycount, MPI_FLOAT, _h_c, mycount, MPI_FLOAT, 0, MPI_COMM_WORLD);
if (err != MPI_SUCCESS)
{
printf ("MPI_Gather failed receiving h_c\n");
exit (-1);
}
if (rank == 0)
{
// Test the results
correct = 0;
float tmp;
for(i = 0; i < LENGTH; i++)
{
tmp = h_a[i] + h_b[i]; // assign element i of a+b to tmp
tmp -= _h_c[i]; // compute deviation of expected and output result
if(tmp*tmp < TOL*TOL) // correct if square deviation is less than tolerance squared
correct++;
else
printf(" tmp %f h_a %f h_b %f h_c %f \n",tmp, h_a[i], h_b[i], _h_c[i]);
}
// summarize results
printf("C = A+B: %d out of %d results were correct.\n", correct, LENGTH);
}
// cleanup then shutdown
clReleaseMemObject(d_a);
clReleaseMemObject(d_b);
clReleaseMemObject(d_c);
clReleaseProgram(program);
clReleaseKernel(ko_vadd);
clReleaseCommandQueue(commands);
clReleaseContext(context);
err = MPI_Finalize ();
if (err != MPI_SUCCESS)
{
printf ("MPI_Finalize failed!\n");
exit (-1);
}
return 0;
}
开发者ID:ElsevierSoftwareX,项目名称:SOFTX-D-15-00010,代码行数:101,代码来源:mpi-vadd.c
示例11: main
int main()
{
struct ecl_context ctx;
cl_program program;
cl_kernel kernel;
cl_int err;
cl_mem in, out;
size_t globWorkSize;
int n = 100000;
cl_event event;
cl_ulong start, end;
err = eclGetSomeContext(&ctx);
assert(err == CL_SUCCESS);
err = eclGetProgramFromSource(ctx.context, ctx.device, src, &program);
assert(err == CL_SUCCESS);
kernel = clCreateKernel(program, "stream", &err);
assert(err == CL_SUCCESS);
in = clCreateBuffer(ctx.context, CL_MEM_READ_ONLY, n * sizeof(float),
0, &err);
assert(err == CL_SUCCESS);
out = clCreateBuffer(ctx.context, CL_MEM_READ_ONLY, n * sizeof(float),
0, &err);
assert(err == CL_SUCCESS);
err = clSetKernelArg(kernel, 0, sizeof(in), &in);
assert(err == CL_SUCCESS);
err = clSetKernelArg(kernel, 1, sizeof(out), &out);
assert(err == CL_SUCCESS);
err = clSetKernelArg(kernel, 2, sizeof(n), &n);
assert(err == CL_SUCCESS);
globWorkSize = n;
err = clEnqueueNDRangeKernel(ctx.queue, kernel, 1, 0, &globWorkSize, 0,
0, 0, &event);
assert(err == CL_SUCCESS);
clWaitForEvents(1, &event);
err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START,
sizeof(start), &start, 0);
if (err == CL_PROFILING_INFO_NOT_AVAILABLE) {
printf("Profling info not available.\n");
return err;
} else if (err) {
printf("An error occurred getting profiling info.\n");
return err;
}
err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
sizeof(end), &end, 0);
if (err == CL_PROFILING_INFO_NOT_AVAILABLE) {
printf("Profling info not available.\n");
return err;
} else if (err) {
printf("An error occurred getting profiling info.\n");
return err;
}
printf("T/ms: %lf\n", (double)(end - start) / 1.0e6);
printf("MB: %lf\n", (double)n * sizeof(float) / 1.0e6);
printf("GB/s: %lf\n", (double)n * sizeof(float) / (end - start));
clReleaseCommandQueue(ctx.queue);
clReleaseContext(ctx.context);
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseMemObject(in);
clReleaseMemObject(out);
clReleaseEvent(event);
return err;
}
开发者ID:d-meiser,项目名称:EasyOpenCL,代码行数:72,代码来源:stream.c
示例12: OpenCLFree
void OpenCLFree()
{
clReleaseCommandQueue(opencl_command_queue);
clReleaseContext(opencl_context);
}
开发者ID:bradmccormack,项目名称:Fdupe,代码行数:5,代码来源:opencl.c
示例13: call_kernel
void call_kernel(float *data,unsigned int count,char * cl_name,float *results) {
FILE* programHandle;
size_t programSize, KernelSourceSize;
char *programBuffer, *KernelSource;
size_t global; // global domain size for our calculation
size_t local; // local domain size for our calculation
cl_device_id device_id; // compute device id
cl_context context; // compute context
cl_command_queue commands; // compute command queue
cl_program program; // compute program
cl_kernel kernel; // compute kernel
cl_mem input; // device memory used for the input array
cl_mem output; // device memory used for the output array
int err;
int gpu = 1;
err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL);
context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
commands = clCreateCommandQueue(context, device_id, 0, &err);
//----------------------------------------------------------------------------
// get size of kernel source
programHandle = fopen(cl_name, "r");
fseek(programHandle, 0, SEEK_END);
programSize = ftell(programHandle);
rewind(programHandle);
programBuffer = (char*) malloc(programSize + 1);
programBuffer[programSize] = '\0';
fread(programBuffer, sizeof(char), programSize, programHandle);
fclose(programHandle);
// create program from buffer
program = clCreateProgramWithSource(context,1,(const char**) &programBuffer,&programSize, NULL);
free(programBuffer);
// read kernel source back in from program to check
clGetProgramInfo(program, CL_PROGRAM_SOURCE, 0, NULL, &KernelSourceSize);
KernelSource = (char*) malloc(KernelSourceSize);
clGetProgramInfo(program, CL_PROGRAM_SOURCE, KernelSourceSize, KernelSource, NULL);
program = clCreateProgramWithSource(context, 1, (const char **) & KernelSource, NULL, &err);
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
kernel = clCreateKernel(program, "square", &err);
//----------------------------------------------------------------------------
input = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * count, NULL, NULL);
output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * count, NULL, NULL);
err = clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, sizeof(float) * count, data, 0, NULL, NULL);
clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL);
global = count;
err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
clFinish(commands);
err = clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(float) * count, results, 0, NULL, NULL );
clReleaseMemObject(input);
clReleaseMemObject(output);
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseCommandQueue(commands);
clReleaseContext(context);
printf("nKernel source:\n\n %s \n", KernelSource);
free(KernelSource);
}
开发者ID:linan7788626,项目名称:Opencl_examples,代码行数:75,代码来源:more_simple_hello.c
示例14: main
int main()
{
cl_device_id device = new_device();
cl_context context;
cl_command_queue queue;
cl_program program;
cl_kernel kernel;
cl_int i, j, err;
float matrix_1[80], matrix_2[80], matrix_3[80];
const size_t buffer_origin[3] = { 5 * sizeof(float), 3, 0 };
const size_t host_origin[3] = { 1 * sizeof(float), 1, 0 };
const size_t region[3] = { 4 * sizeof(float), 4, 1 };
cl_mem matrix_buffer_1, matrix_buffer_2, matrix_buffer_3;
for (i = 0; i < 80; i++)
{
matrix_1[i] = i * 1.0f;
matrix_2[i] = 3.0;
matrix_3[i] = 0;
}
context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
if (err < 0)
{
perror("Couldn't create a context\n");
exit(1);
}
program = build_program(context, device, FILE_NAME);
kernel = clCreateKernel(program, "add", &err);
if (err < 0) {
perror("Couldn't create a kernel\n");
exit(1);
}
matrix_buffer_1 = clCreateBuffer(context, CL_MEM_READ_WRITE |
CL_MEM_COPY_HOST_PTR, sizeof(matrix_1), matrix_1, &err);
if (err < 0) {
perror("Couldn't create a buffer\n");
exit(1);
}
matrix_buffer_2 = clCreateBuffer(context, CL_MEM_READ_WRITE |
CL_MEM_COPY_HOST_PTR, sizeof(matrix_2), matrix_2, &err);
if (err < 0) {
perror("Couldn't create a buffer\n");
exit(1);
}
matrix_buffer_3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(matrix_3), matrix_3, &err);
if (err < 0) {
perror("Couldn't create a buffer\n");
exit(1);
}
int row = 8;
int col = 10;
err = clSetKernelArg(kernel, 0, sizeof(int), &row);
err = clSetKernelArg(kernel, 1, sizeof(int), &col);
err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &matrix_buffer_1);
err = clSetKernelArg(kernel, 3, sizeof(cl_mem), &matrix_buffer_2);
err = clSetKernelArg(kernel, 4, sizeof(cl_mem), &matrix_buffer_3);
queue = clCreateCommandQueue(context, device, 0, &err);
if (err < 0) {
perror("Couldn't create a command queue\n");
exit(1);
}
err = clEnqueueTask(queue, kernel, 0, NULL, NULL);
if (err < 0) {
perror("Couldn't enque task\n");
exit(1);
}
err = clEnqueueReadBuffer(queue, matrix_buffer_3, CL_TRUE, 0,
sizeof(matrix_3), &matrix_3, 0, NULL, NULL);
for (i = 0; i < 8; i++) {
for (j = 0; j < 10; j++) {
printf("%6.1f ", matrix_3[j + i * 10]);
}
printf("\n");
}
clReleaseMemObject(matrix_buffer_1);
clReleaseMemObject(matrix_buffer_2);
clReleaseMemObject(matrix_buffer_3);
clReleaseKernel(kernel);
clReleaseCommandQueue(queue);
clReleaseContext(context);
return 0;
}
开发者ID:Aperjump,项目名称:OpenCL_wrapper,代码行数:86,代码来源:main.cpp
示例15: main
int
main(void)
{
cl_int err;
cl_platform_id platform = 0;
cl_device_id device = 0;
cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
cl_context ctx = 0;
cl_command_queue queue = 0;
cl_mem bufX, bufY;
cl_event event = NULL;
int ret = 0;
int lenX = 1 + (N-1)*abs(incx);
int lenY = 1 + (N-1)*abs(incy);
/* Setup OpenCL environment. */
err = clGetPlatformIDs(1, &platform, NULL);
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &device, NULL);
if (err != CL_SUCCESS) {
printf( "clGetPlatformIDs() failed with %d\n", err );
return 1;
}
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
if (err != CL_SUCCESS) {
printf( "clGetDeviceIDs() failed with %d\n", err );
return 1;
}
props[1] = (cl_context_properties)platform;
ctx = clCreateContext(props, 1, &device, NULL, NULL, &err);
if (err != CL_SUCCESS) {
printf( "clCreateContext() failed with %d\n", err );
return 1;
}
queue = clCreateCommandQueue(ctx, device, 0, &err);
if (err != CL_SUCCESS) {
printf( "clCreateCommandQueue() failed with %d\n", err );
clReleaseContext(ctx);
return 1;
}
/* Setup clblas. */
err = clblasSetup();
if (err != CL_SUCCESS) {
printf("clblasSetup() failed with %d\n", err);
clReleaseCommandQueue(queue);
clReleaseContext(ctx);
return 1;
}
/* Prepare OpenCL memory objects and place matrices inside them. */
bufX = clCreateBuffer(ctx, CL_MEM_READ_WRITE, (lenX*sizeof(cl_float)), NULL, &err);
bufY = clCreateBuffer(ctx, CL_MEM_READ_WRITE, (lenY*sizeof(cl_float)), NULL, &err);
err = clEnqueueWriteBuffer(queue, bufX, CL_TRUE, 0, (lenX*sizeof(cl_float)), X, 0, NULL, NULL);
err = clEnqueueWriteBuffer(queue, bufY, CL_TRUE, 0, (lenY*sizeof(cl_float)), Y, 0, NULL, NULL);
printResult();
/* Call clblas function. */
err = clblasSrot(N, bufX, 0, incx, bufY, 0, incy, C, S, 1, &queue, 0, NULL, &event);
// printf("here\n");
if (err != CL_SUCCESS) {
printf("clblasSrot() failed with %d\n", err);
ret = 1;
}
else {
/* Wait for calculations to be finished. */
err = clWaitForEvents(1, &event);
/* Fetch results of calculations from GPU memory. */
err = clEnqueueReadBuffer(queue, bufY, CL_TRUE, 0, (lenY*sizeof(cl_float)),
Y, 0, NULL, NULL);
err = clEnqueueReadBuffer(queue, bufX, CL_TRUE, 0, (lenX*sizeof(cl_float)),
X, 0, NULL, NULL);
/* At this point you will get the result of SROT placed in vector Y. */
printResult();
}
/* Release OpenCL events. */
clReleaseEvent(event);
/* Release OpenCL memory objects. */
clReleaseMemObject(bufY);
clReleaseMemObject(bufX);
/* Finalize work with clblas. */
clblasTeardown();
/* Release OpenCL working objects. */
clReleaseCommandQueue(queue);
clReleaseContext(ctx);
return ret;
}
开发者ID:AndreasMiller,项目名称:clBLAS,代码行数:100,代码来源:example_srot.c
示例16: vadd
//.........这里部分代码省略.........
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
printf("clGetPlatformIDs err=%d,num_platforms=%d, platform_id=%x\n", ret, ret_num_platforms, (unsigned int)platform_id );
//#define XXX CL_DEVICE_TYPE_DEFAULT
// #define XXX CL_DEVICE_TYPE_ALL
// #define XXX CL_DEVICE_TYPE_GPU
#define XXX CL_DEVICE_TYPE_CPU
cl_uint num_platforms = 2;
cl_platform_id* platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id)* num_platforms);
if(NULL == platforms){
printf("malloc err!\n");
}
ret = clGetPlatformIDs(2, platforms, &ret_num_platforms);
printf("clGetPlatformIDs err=%d,num_platforms=%d, platform_id=%x\n", ret, ret_num_platforms, (unsigned int)platforms[1] );
ret = clGetDeviceIDs( platforms[0], XXX, 1, &device_id, &ret_num_devices);
printf("clGetDeviceIDs err=%d,num_platforms=%d, device_id=%x\n", ret, ret_num_platforms, (unsigned int)device_id );
char name[64];
ret = clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(char)*64, name, NULL);
printf("device_name : %s\n", name);
// Create an OpenCL context
// cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
cl_context_properties cps[3] =
{
(cl_context_properties)CL_CONTEXT_PLATFORM,
(cl_context_properties)platforms[0],
(cl_context_properties)0
};
cl_context context = clCreateContextFromType( cps, XXX, NULL, NULL, &ret);
printf("clCreateContextFromType err=%d,device_type=%x\n", ret, (unsigned int)XXX);
// Create a command queue
cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
// Create memory buffers on the device for each vector
cl_mem a_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, LIST_SIZE * sizeof(int), NULL, &ret);
cl_mem b_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY,
LIST_SIZE * sizeof(int), NULL, &ret);
cl_mem c_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
LIST_SIZE * sizeof(int), NULL, &ret);
// Copy the lists A and B to their respective memory buffers
ret = clEnqueueWriteBuffer(command_queue, a_mem_obj, CL_TRUE, 0,
LIST_SIZE * sizeof(int), A, 0, NULL, NULL);
ret = clEnqueueWriteBuffer(command_queue, b_mem_obj, CL_TRUE, 0,
LIST_SIZE * sizeof(int), B, 0, NULL, NULL);
// Create a program from the kernel source
cl_program program = clCreateProgramWithSource(context, 1,
(const char **)&source_str, (const size_t *)&source_size, &ret);
// cl_int status;
// cl_int err;
// cl_program program = clCreateProgramWithBinary(
// context, 1, &device_id, &source_size, (const unsigned char **)&source_str, &status, &err);
// Build the program
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
// Create the OpenCL kernel
cl_kernel kernel = clCreateKernel(program, "vector_add", &ret);
// Set the arguments of the kernel
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_mem_obj);
ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_mem_obj);
ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_mem_obj);
// Execute the OpenCL kernel on the list
size_t global_item_size = LIST_SIZE; // Process the entire lists
size_t local_item_size = 64; // Process in groups of 64
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,
&global_item_size, &local_item_size, 0, NULL, NULL);
// Read the memory buffer C on the device to the local variable C
int *C = (int*)malloc(sizeof(int)*LIST_SIZE);
ret = clEnqueueReadBuffer(command_queue, c_mem_obj, CL_TRUE, 0,
LIST_SIZE * sizeof(int), C, 0, NULL, NULL);
// Display the result to the screen
for(i = 0; i < /*LIST_SIZE*/10; i++)
printf("%d + %d = %d\n", A[i], B[i], C[i]);
// Clean up
ret = clFlush(command_queue);
ret = clFinish(command_queue);
ret = clReleaseKernel(kernel);
ret = clReleaseProgram(program);
ret = clReleaseMemObject(a_mem_obj);
ret = clReleaseMemObject(b_mem_obj);
ret = clReleaseMemObject(c_mem_obj);
ret = clReleaseCommandQueue(command_queue);
ret = clReleaseContext(context);
free(A);
free(B);
free(C);
return 0;
}
开发者ID |
请发表评论