本文整理汇总了C++中clCreateContext函数的典型用法代码示例。如果您正苦于以下问题:C++ clCreateContext函数的具体用法?C++ clCreateContext怎么用?C++ clCreateContext使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了clCreateContext函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: main
int main(int argc, char *argv[]){
cl_uint numPlatforms;
cl_platform_id* clSelectedPlatformID = NULL;
int err; // error code returned from api calls
int data[DATA_SIZE]; // original data set given to device
int results[DATA_SIZE]; // results returned from device
unsigned int correct; // number of correct results returned
size_t global; // global domain size for our calculation
size_t local; // local domain size for our calculation
cl_device_id device_id;
cl_context context;
cl_command_queue commands;
cl_program program;
cl_kernel kernel;
cl_mem input; // device memory used for the input array
cl_mem output; // device memory used for the output array
if(parseArgs(argc, argv)){
return 0;
}
// Fill our data set with random int values
unsigned int count = DATA_SIZE;
////////////////////////////////////////////////////////////////////////////////
// Simple compute kernel which computes the collatz of an input array
//
const char *KernelSource = fileToString("gpuFunctions.c");
//get Platform
clGetPlatformIDs(0, NULL, &numPlatforms);
clSelectedPlatformID = (cl_platform_id*)malloc(sizeof(cl_platform_id)*numPlatforms);
err = clGetPlatformIDs(numPlatforms, clSelectedPlatformID, NULL);
//get Device
err = clGetDeviceIDs(clSelectedPlatformID[0], CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
if (err != CL_SUCCESS)
{
printf("Error: Failed to create a device group!\n");
return EXIT_FAILURE;
}
//create context
context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
if (!context)
{
printf("Error: Failed to create a compute context!\n");
return EXIT_FAILURE;
}
// Create a command commands
//
commands = clCreateCommandQueue(context, device_id, 0, &err);
if (!commands)
{
printf("Error: Failed to create a command commands!\n");
return EXIT_FAILURE;
}
// Create the compute program from the source buffer
//
program = clCreateProgramWithSource(context, 1, (const char **) & KernelSource, NULL, &err);
if (!program)
{
printf("Error: Failed to create compute program!\n");
return EXIT_FAILURE;
}
// Build the program executable
//
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
size_t len;
char buffer[2048];
printf("Error: Failed to build program executable!\n");
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
printf("%s\n", buffer);
exit(1);
}
// Create the compute kernel in the program we wish to run
//
kernel = clCreateKernel(program, "allToOne", &err);
if (!kernel || err != CL_SUCCESS)
{
printf("Error: Failed to create compute kernel!\n");
exit(1);
}
// Create the input and output arrays in device memory for our calculation
//
input = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * count, NULL, NULL);
output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * count, NULL, NULL);
//.........这里部分代码省略.........
开发者ID:TrevorDev,项目名称:CollatzConjecture-openCL,代码行数:101,代码来源:openCL-allToOne.c
示例2: gemm_clblas
ErrorStatus gemm_clblas(cl_device_id device, const void *inMatrixA, int nrowA, int ncolA, bool transposeA,
const void *inMatrixB, int nrowB, int ncolB, bool transposeB,
double alpha, double beta, void *outMatrix, bool use_float)
{
std::stringstream result;
float *input_matrixA_f = (float *)inMatrixA;
float *input_matrixB_f = (float *)inMatrixB;
float *output_matrix_f = (float *)outMatrix;
double *input_matrixA_d = (double *)inMatrixA;
double *input_matrixB_d = (double *)inMatrixB;
double *output_matrix_d = (double *)outMatrix;
if (debug) {
result << "gemm_clblas( " << (use_float ? "FLOAT" : "DOUBLE") <<
")" << std::endl << std::endl;
}
cl_int err = CL_SUCCESS;
clblasStatus status = clblasSetup();
if (status != CL_SUCCESS) {
if (debug) {
result << "clblasSetup: " << clblasErrorToString(status) << std::endl;
}
err = CL_INVALID_OPERATION;
}
// get first platform
cl_platform_id platform = NULL;
if (err == CL_SUCCESS) {
err = clGetPlatformIDs(1, &platform, NULL);
}
if (debug && err == CL_SUCCESS) {
result << "Platform: " << getPlatformInfoString(platform, CL_PLATFORM_NAME) << std::endl;
result << "Device: " << getDeviceInfoString(device, CL_DEVICE_NAME) << std::endl;
}
// context
cl_context context = NULL;
if (err == CL_SUCCESS) {
if (debug) {
result << "clCreateContext:" << std::endl;
}
context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
}
// queue
cl_command_queue queue = NULL;
if (err == CL_SUCCESS) {
#ifdef CL_VERSION_2_0
if (debug) {
result << "clCreateCommandQueueWithProperties:" << std::endl;
}
queue = clCreateCommandQueueWithProperties(context, device, NULL, &err);
#else
if (debug) {
result << "clCreateCommandQueue:" << std::endl;
}
queue = clCreateCommandQueue(context, device, 0, &err);
#endif
}
// buffers
cl_mem cl_input_matrixA = NULL;
if (err == CL_SUCCESS) {
if (debug) {
result << "clCreateBuffer cl_input_matrixA:" << std::endl;
}
if (use_float) {
cl_input_matrixA = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
nrowA * ncolA * sizeof(float), input_matrixA_f, &err);
} else {
cl_input_matrixA = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
nrowA * ncolA * sizeof(double), input_matrixA_d, &err);
}
}
cl_mem cl_input_matrixB = NULL;
if (err == CL_SUCCESS) {
if (debug) {
result << "clCreateBuffer cl_input_matrixB:" << std::endl;
}
if (use_float) {
cl_input_matrixB = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
nrowB * ncolB * sizeof(float), input_matrixB_f, &err);
} else {
//.........这里部分代码省略.........
开发者ID:quadrivio,项目名称:multiblas,代码行数:101,代码来源:gemm_clblas.cpp
示例3: main
int main()
{
typedef float ScalarType;
/////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////// Part 1: Set up a custom context and perform a sample operation. ////////////////
//////////////////////// This is rather lengthy due to the OpenCL framework. ////////////////
//////////////////////// The following does essentially the same as the ////////////////
//////////////////////// 'custom_kernels'-tutorial! ////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////////
//manually set up a custom OpenCL context:
std::vector<cl_device_id> device_id_array;
//get all available devices
viennacl::ocl::platform pf;
std::cout << "Platform info: " << pf.info() << std::endl;
std::vector<viennacl::ocl::device> devices = pf.devices(CL_DEVICE_TYPE_DEFAULT);
std::cout << devices[0].name() << std::endl;
std::cout << "Number of devices for custom context: " << devices.size() << std::endl;
//set up context using all found devices:
for (size_t i=0; i<devices.size(); ++i)
{
device_id_array.push_back(devices[i].id());
}
std::cout << "Creating context..." << std::endl;
cl_int err;
cl_context my_context = clCreateContext(0, device_id_array.size(), &(device_id_array[0]), NULL, NULL, &err);
VIENNACL_ERR_CHECK(err);
//create two Vectors:
unsigned int vector_size = 10;
std::vector<ScalarType> vec1(vector_size);
std::vector<ScalarType> vec2(vector_size);
std::vector<ScalarType> result(vector_size);
//
// fill the operands vec1 and vec2:
//
for (unsigned int i=0; i<vector_size; ++i)
{
vec1[i] = static_cast<ScalarType>(i);
vec2[i] = static_cast<ScalarType>(vector_size-i);
}
//
// create memory in OpenCL context:
//
cl_mem mem_vec1 = clCreateBuffer(my_context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, vector_size * sizeof(ScalarType), &(vec1[0]), &err);
VIENNACL_ERR_CHECK(err);
cl_mem mem_vec2 = clCreateBuffer(my_context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, vector_size * sizeof(ScalarType), &(vec2[0]), &err);
VIENNACL_ERR_CHECK(err);
cl_mem mem_result = clCreateBuffer(my_context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, vector_size * sizeof(ScalarType), &(result[0]), &err);
VIENNACL_ERR_CHECK(err);
//
// create a command queue for each device:
//
std::vector<cl_command_queue> queues(devices.size());
for (size_t i=0; i<devices.size(); ++i)
{
queues[i] = clCreateCommandQueue(my_context, devices[i].id(), 0, &err);
VIENNACL_ERR_CHECK(err);
}
//
// create and build a program in the context:
//
size_t source_len = std::string(my_compute_program).length();
cl_program my_prog = clCreateProgramWithSource(my_context, 1, &my_compute_program, &source_len, &err);
err = clBuildProgram(my_prog, 0, NULL, NULL, NULL, NULL);
/* char buffer[1024];
cl_build_status status;
clGetProgramBuildInfo(my_prog, devices[1].id(), CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, NULL);
clGetProgramBuildInfo(my_prog, devices[1].id(), CL_PROGRAM_BUILD_LOG, sizeof(char)*1024, &buffer, NULL);
std::cout << "Build Scalar: Err = " << err << " Status = " << status << std::endl;
std::cout << "Log: " << buffer << std::endl;*/
VIENNACL_ERR_CHECK(err);
//
// create a kernel from the program:
//
const char * kernel_name = "elementwise_prod";
cl_kernel my_kernel = clCreateKernel(my_prog, kernel_name, &err);
VIENNACL_ERR_CHECK(err);
//
// Execute elementwise_prod kernel on first queue: result = vec1 .* vec2;
//
err = clSetKernelArg(my_kernel, 0, sizeof(cl_mem), (void*)&mem_vec1);
VIENNACL_ERR_CHECK(err);
err = clSetKernelArg(my_kernel, 1, sizeof(cl_mem), (void*)&mem_vec2);
//.........这里部分代码省略.........
开发者ID:bollig,项目名称:viennacl,代码行数:101,代码来源:custom-context.cpp
示例4: construct_opencl_device_info
/*
* pgstrom_collect_device_info
*
* It collects properties of all the OpenCL devices. It shall be called once
* by the OpenCL management worker process, prior to any other backends.
*/
static List *
construct_opencl_device_info(int platform_index)
{
cl_platform_id platforms[32];
cl_device_id devices[MAX_NUM_DEVICES];
cl_uint n_platform;
cl_uint n_devices;
cl_int i, j, rc;
long score_max = -1;
List *result = NIL;
rc = clGetPlatformIDs(lengthof(platforms),
platforms,
&n_platform);
if (rc != CL_SUCCESS)
elog(ERROR, "clGetPlatformIDs failed (%s)", opencl_strerror(rc));
for (i=0; i < n_platform; i++)
{
pgstrom_platform_info *pl_info;
pgstrom_device_info *dev_info;
long score = 0;
List *temp = NIL;
pl_info = collect_opencl_platform_info(platforms[i]);
pl_info->pl_index = i;
rc = clGetDeviceIDs(platforms[i],
CL_DEVICE_TYPE_CPU |
CL_DEVICE_TYPE_GPU |
CL_DEVICE_TYPE_ACCELERATOR,
lengthof(devices),
devices,
&n_devices);
if (rc != CL_SUCCESS)
elog(ERROR, "clGetDeviceIDs failed (%s)", opencl_strerror(rc));
elog(LOG, "PG-Strom: [%d] OpenCL Platform: %s", i, pl_info->pl_name);
for (j=0; j < n_devices; j++)
{
dev_info = collect_opencl_device_info(devices[j]);
dev_info->pl_info = pl_info;
dev_info->dev_index = j;
elog(LOG, "PG-Strom: + device %s (%uMHz x %uunits, %luMB)",
dev_info->dev_name,
dev_info->dev_max_clock_frequency,
dev_info->dev_max_compute_units,
dev_info->dev_global_mem_size >> 20);
/* rough estimation about computing power */
if ((dev_info->dev_type & CL_DEVICE_TYPE_GPU) != 0)
score += 32 * (dev_info->dev_max_compute_units *
dev_info->dev_max_clock_frequency);
else
score += (dev_info->dev_max_compute_units *
dev_info->dev_max_clock_frequency);
temp = lappend(temp, dev_info);
}
if (platform_index == i || (platform_index < 0 && score > score_max))
{
opencl_platform_id = platforms[i];
opencl_num_devices = n_devices;
for (j=0; j < n_devices; j++)
opencl_devices[j] = devices[j];
score_max = score;
result = temp;
}
}
/* show platform name if auto-selection */
if (platform_index < 0 && result != NIL)
{
pgstrom_platform_info *pl_info
= ((pgstrom_device_info *) linitial(result))->pl_info;
elog(LOG, "PG-Strom: auto platform selection: %s", pl_info->pl_name);
}
if (result != NIL)
{
/*
* Create an OpenCL context
*/
opencl_context = clCreateContext(NULL,
opencl_num_devices,
opencl_devices,
NULL,
NULL,
&rc);
if (rc != CL_SUCCESS)
//.........这里部分代码省略.........
开发者ID:osmonds,项目名称:pg_strom,代码行数:101,代码来源:opencl_serv.c
示例5: xcl_world_single
xcl_world xcl_world_single(cl_device_type device_type, char *target_vendor, char *target_device) {
int err;
xcl_world world;
cl_uint num_platforms;
err = clGetPlatformIDs(0, NULL, &num_platforms);
if (err != CL_SUCCESS) {
printf("Error: no platforms available or OpenCL install broken");
printf("Test failed\n");
exit(EXIT_FAILURE);
}
cl_platform_id *platform_ids = (cl_platform_id *) malloc(sizeof(cl_platform_id) * num_platforms);
if (platform_ids == NULL) {
printf("Error: Out of Memory\n");
printf("Test failed\n");
exit(EXIT_FAILURE);
}
err = clGetPlatformIDs(num_platforms, platform_ids, NULL);
if (err != CL_SUCCESS) {
printf("Error: Failed to find an OpenCL platform!\n");
printf("Test failed\n");
exit(EXIT_FAILURE);
}
int i;
char cl_platform_vendor[1001];
//find target vendor if target_vendor is specified
if (target_vendor != NULL) {
for(i = 0; i < num_platforms; i++) {
err = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_VENDOR, 1000, (void *)cl_platform_vendor,NULL);
if (err != CL_SUCCESS) {
printf("Error: clGetPlatformInfo(CL_PLATFORM_VENDOR) failed!\n");
printf("Test failed\n");
exit(EXIT_FAILURE);
}
if ((target_vendor != NULL) && (strcmp(cl_platform_vendor, target_vendor) == 0)) {
printf("INFO: Selected platform %d from %s\n", i, cl_platform_vendor);
world.platform_id = platform_ids[i];
break;
}
}
} else {
for(i = 0; i < num_platforms; i++) {
err = clGetDeviceIDs(platform_ids[i], device_type,
1, &world.device_id, NULL);
if (err == CL_SUCCESS) {
world.platform_id = platform_ids[i];
break;
}
}
}
free(platform_ids);
if (i == num_platforms) {
printf("Error: Failed to find a platform\n");
printf("Test failed\n");
exit(EXIT_FAILURE);
}
if (target_device != NULL) {
//find target device
cl_device_id devices[16]; // compute device id
cl_uint num_devices;
char cl_device_name[100];
err = clGetDeviceIDs(world.platform_id, CL_DEVICE_TYPE_ACCELERATOR,
16, devices, &num_devices);
if (err != CL_SUCCESS) {
printf("Error: Failed to create a device group!\n");
printf("Test failed\n");
exit(EXIT_FAILURE);
}
//iterate all devices to select the target device.
for (i=0; i<num_devices; i++) {
err = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 100, cl_device_name, 0);
if (err != CL_SUCCESS) {
printf("Error: Failed to get device name for device %d!\n", i);
printf("Test failed\n");
exit(EXIT_FAILURE);
}
//printf("CL_DEVICE_NAME %s\n", cl_device_name);
if (strcmp(cl_device_name, target_device) == 0) {
world.device_id = devices[i];
printf("INFO: Selected %s as the target device\n", cl_device_name);
break;
}
}
if (i == num_devices) {
printf("Error: Failed to find target device %s\n", target_device);
printf("Test failed\n");
exit(EXIT_FAILURE);
}
}
world.context = clCreateContext(0, 1, &world.device_id,
NULL, NULL, &err);
if (err != CL_SUCCESS) {
//.........这里部分代码省略.........
开发者ID:shvo,项目名称:Rodinia-FPGA,代码行数:101,代码来源:xcl.c
示例6: main
int main() {
// Set the image rotation (in degrees)
float theta = 3.14159/6;
float cos_theta = cosf(theta);
float sin_theta = sinf(theta);
printf("theta = %f (cos theta = %f, sin theta = %f)\n", theta, cos_theta,
sin_theta);
// Rows and columns in the input image
int imageHeight;
int imageWidth;
const char* inputFile = "input.bmp";
const char* outputFile = "output.bmp";
// Homegrown function to read a BMP from file
float* inputImage = readImage(inputFile, &imageWidth,
&imageHeight);
// Size of the input and output images on the host
int dataSize = imageHeight*imageWidth*sizeof(float);
// Output image on the host
float* outputImage = NULL;
outputImage = (float*)malloc(dataSize);
// Set up the OpenCL environment
cl_int status;
// Discovery platform
cl_platform_id platforms[2];
cl_platform_id platform;
status = clGetPlatformIDs(2, platforms, NULL);
chk(status, "clGetPlatformIDs");
platform = platforms[PLATFORM_TO_USE];
// Discover device
cl_device_id device;
clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &device, NULL);
chk(status, "clGetDeviceIDs");
// Create context
cl_context_properties props[3] = {CL_CONTEXT_PLATFORM,
(cl_context_properties)(platform), 0};
cl_context context;
context = clCreateContext(props, 1, &device, NULL, NULL, &status);
chk(status, "clCreateContext");
// Create command queue
cl_command_queue queue;
queue = clCreateCommandQueue(context, device, 0, &status);
chk(status, "clCreateCommandQueue");
// Create the input and output buffers
cl_mem d_input;
d_input = clCreateBuffer(context, CL_MEM_READ_ONLY, dataSize, NULL,
&status);
chk(status, "clCreateBuffer");
cl_mem d_output;
d_output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, dataSize, NULL,
&status);
chk(status, "clCreateBuffer");
// Copy the input image to the device
status = clEnqueueWriteBuffer(queue, d_input, CL_TRUE, 0, dataSize,
inputImage, 0, NULL, NULL);
chk(status, "clEnqueueWriteBuffer");
const char* source = readSource("rotation.cl");
// Create a program object with source and build it
cl_program program;
program = clCreateProgramWithSource(context, 1, &source, NULL, NULL);
chk(status, "clCreateProgramWithSource");
status = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
chk(status, "clBuildProgram");
// Create the kernel object
cl_kernel kernel;
kernel = clCreateKernel(program, "img_rotate", &status);
chk(status, "clCreateKernel");
// Set the kernel arguments
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_output);
status |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_input);
status |= clSetKernelArg(kernel, 2, sizeof(int), &imageWidth);
status |= clSetKernelArg(kernel, 3, sizeof(int), &imageHeight);
status |= clSetKernelArg(kernel, 4, sizeof(float), &sin_theta);
status |= clSetKernelArg(kernel, 5, sizeof(float), &cos_theta);
chk(status, "clSetKernelArg");
// Set the work item dimensions
size_t globalSize[2] = {imageWidth, imageHeight};
status = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalSize, NULL, 0,
NULL, NULL);
chk(status, "clEnqueueNDRange");
// Read the image back to the host
//.........这里部分代码省略.........
开发者ID:Soledad89,项目名称:learnOpenCL,代码行数:101,代码来源:rotation.c
示例7: runProgram
void runProgram(int N, char *fileName)
{
printf("GPU Symmetrize()..."
"\nSquareMatrix[%d][%d]\n", N, N);
int i,j;
// initialize input array
float *A;
A = (float*)malloc(sizeof(float)*N*N);
for( i = 0; i < N ; ++i )
{
for( j = 0; j < N ; ++j )
{
A[i*N + j] = j;
}
}
// result
float *Aout;
Aout = (float*)malloc(sizeof(float)*N*N);
#ifdef DEBUG
puts("A");
check_2d_f(A,N,N);
#endif
int NumK = 1;
int NumE = 2;
double gpuTime;
cl_ulong gstart, gend;
//------------------------------------------------
// OpenCL
//------------------------------------------------
cl_int err;
cl_platform_id platform; // OpenCL platform
cl_device_id device_id; // device ID
cl_context context; // context
cl_command_queue queue; // command queue
cl_program program; // program
cl_kernel *kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*NumK);
cl_event *event = (cl_event*)malloc(sizeof(cl_event)*NumE);
// read kernel file
//char *fileName = "transpose_kernel.cl";
char *kernelSource;
size_t size;
FILE *fh = fopen(fileName, "rb");
if(!fh) {
printf("Error: Failed to open kernel file!\n");
exit(1);
}
fseek(fh,0,SEEK_END);
size=ftell(fh);
fseek(fh,0,SEEK_SET);
kernelSource = malloc(size+1);
size_t result;
result = fread(kernelSource,1,size,fh);
if(result != size){ fputs("Reading error", stderr);exit(1);}
kernelSource[size] = '\0';
// Bind to platform
err = clGetPlatformIDs(1, &platform, NULL);
OCL_CHECK(err);
// Get ID for the device
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
OCL_CHECK(err);
// Create a context
context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
OCL_CHECK(err);
// Create a command queue
queue = clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &err);
OCL_CHECK(err);
// Create the compute program from the source buffer
program = clCreateProgramWithSource(context, 1, (const char **)&kernelSource, NULL, &err);
OCL_CHECK(err);
// turn on optimization for kernel
char *options="-cl-mad-enable -cl-fast-relaxed-math -cl-no-signed-zeros -cl-unsafe-math-optimizations -cl-finite-math-only";
err = clBuildProgram(program, 1, &device_id, options, NULL, NULL);
if(err != CL_SUCCESS)
printCompilerOutput(program, device_id);
OCL_CHECK(err);
#ifdef SAVEBIN
// Calculate size of binaries
//.........这里部分代码省略.........
开发者ID:Anmol-007,项目名称:oclKernels,代码行数:101,代码来源:template.c
示例8: main
//.........这里部分代码省略.........
printf("Error: Failed to find a platform!\n");
return EXIT_FAILURE;
}
// Get all platforms
cl_platform_id Platform[numPlatforms];
err = clGetPlatformIDs(numPlatforms, Platform, NULL);
if (err != CL_SUCCESS || numPlatforms <= 0)
{
printf("Error: Failed to get the platform!\n");
return EXIT_FAILURE;
}
// Secure a GPU
for (i = 0; i < numPlatforms; i++)
{
err = clGetDeviceIDs(Platform[i], DEVICE, 1, &device_id, NULL);
if (err == CL_SUCCESS)
break;
}
if (device_id == NULL)
{
printf("Error: Failed to create a device group!\n");
return EXIT_FAILURE;
}
else
{
if (output_device_info (rank, device_id) != CL_SUCCESS)
return EXIT_FAILURE;
}
// Create a compute context
context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
if (!context)
{
printf("Error: Failed to create a compute context!\n");
return EXIT_FAILURE;
}
// Create a command queue
commands = clCreateCommandQueue(context, device_id, 0, &err);
if (!commands)
{
printf("Error: Failed to create a command commands!\n");
return EXIT_FAILURE;
}
// Create the compute program from the source buffer
program = clCreateProgramWithSource(context, 1, (const char **) & KernelSource, NULL, &err);
if (!program)
{
printf("Error: Failed to create compute program!\n");
return EXIT_FAILURE;
}
// Build the program
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
size_t len;
char buffer[2048];
printf("Error: Failed to build program executable!\n");
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
printf("%s\n", buffer);
开发者ID:ElsevierSoftwareX,项目名称:SOFTX-D-15-00010,代码行数:67,代码来源:mpi-vadd.c
示例9: main
// Main program
//*****************************************************************************
int main(int argc, char** argv)
{
// Locals used with command line args
int p = 256; // workgroup X dimension
int q = 1; // workgroup Y dimension
pArgc = &argc;
pArgv = argv;
shrQAStart(argc, argv);
// latch the executable path for other funcs to use
cExecutablePath = argv[0];
// start logs and show command line help
shrSetLogFileName ("oclNbody.txt");
shrLog("%s Starting...\n\n", cExecutablePath);
shrLog("Command line switches:\n");
shrLog(" --qatest\t\tCheck correctness of GPU execution and measure performance)\n");
shrLog(" --noprompt\t\tQuit simulation automatically after a brief period\n");
shrLog(" --n=<numbodies>\tSpecify # of bodies to simulate (default = %d)\n", numBodies);
shrLog(" --double\t\tUse double precision floating point values for simulation\n");
shrLog(" --p=<workgroup X dim>\tSpecify X dimension of workgroup (default = %d)\n", p);
shrLog(" --q=<workgroup Y dim>\tSpecify Y dimension of workgroup (default = %d)\n\n", q);
// Get command line arguments if there are any and set vars accordingly
if (argc > 0)
{
shrGetCmdLineArgumenti(argc, (const char**)argv, "p", &p);
shrGetCmdLineArgumenti(argc, (const char**)argv, "q", &q);
shrGetCmdLineArgumenti(argc, (const char**)argv, "n", &numBodies);
bDouble = (shrTRUE == shrCheckCmdLineFlag(argc, (const char**)argv, "double"));
bNoPrompt = shrCheckCmdLineFlag(argc, (const char**)argv, "noprompt");
bQATest = shrCheckCmdLineFlag(argc, (const char**)argv, "qatest");
}
//Get the NVIDIA platform
cl_int ciErrNum = oclGetPlatformID(&cpPlatform);
oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
shrLog("clGetPlatformID...\n\n");
if (bDouble)
{
shrLog("Double precision execution...\n\n");
}
else
{
shrLog("Single precision execution...\n\n");
}
flopsPerInteraction = bDouble ? 30 : 20;
//Get all the devices
shrLog("Get the Device info and select Device...\n");
ciErrNum = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, 0, NULL, &uiNumDevices);
oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
cdDevices = (cl_device_id *)malloc(uiNumDevices * sizeof(cl_device_id) );
ciErrNum = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, uiNumDevices, cdDevices, NULL);
oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
// Set target device and Query number of compute units on uiTargetDevice
shrLog(" # of Devices Available = %u\n", uiNumDevices);
if(shrGetCmdLineArgumentu(argc, (const char**)argv, "device", &uiTargetDevice)== shrTRUE)
{
uiTargetDevice = CLAMP(uiTargetDevice, 0, (uiNumDevices - 1));
}
shrLog(" Using Device %u, ", uiTargetDevice);
oclPrintDevName(LOGBOTH, cdDevices[uiTargetDevice]);
cl_uint uiNumComputeUnits;
clGetDeviceInfo(cdDevices[uiTargetDevice], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(uiNumComputeUnits), &uiNumComputeUnits, NULL);
shrLog(" # of Compute Units = %u\n", uiNumComputeUnits);
//Create the context
shrLog("clCreateContext...\n");
cxContext = clCreateContext(0, uiNumDevsUsed, &cdDevices[uiTargetDevice], NULL, NULL, &ciErrNum);
oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
// Create a command-queue
shrLog("clCreateCommandQueue...\n\n");
cqCommandQueue = clCreateCommandQueue(cxContext, cdDevices[uiTargetDevice], CL_QUEUE_PROFILING_ENABLE, &ciErrNum);
oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
// Log and config for number of bodies
shrLog("Number of Bodies = %d\n", numBodies);
switch (numBodies)
{
case 1024:
activeParams.m_clusterScale = 1.52f;
activeParams.m_velocityScale = 2.f;
break;
case 2048:
activeParams.m_clusterScale = 1.56f;
activeParams.m_velocityScale = 2.64f;
break;
case 4096:
activeParams.m_clusterScale = 1.68f;
activeParams.m_velocityScale = 2.98f;
break;
//.........这里部分代码省略.........
开发者ID:liu-kan,项目名称:nvidia-opencl-examples,代码行数:101,代码来源:oclNbody.cpp
示例10: main
int main()
{
cl_device_id device = new_device();
cl_context context;
cl_command_queue queue;
cl_program program;
cl_kernel kernel;
cl_int i, j, err;
float matrix_1[80], matrix_2[80], matrix_3[80];
const size_t buffer_origin[3] = { 5 * sizeof(float), 3, 0 };
const size_t host_origin[3] = { 1 * sizeof(float), 1, 0 };
const size_t region[3] = { 4 * sizeof(float), 4, 1 };
cl_mem matrix_buffer_1, matrix_buffer_2, matrix_buffer_3;
for (i = 0; i < 80; i++)
{
matrix_1[i] = i * 1.0f;
matrix_2[i] = 3.0;
matrix_3[i] = 0;
}
context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
if (err < 0)
{
perror("Couldn't create a context\n");
exit(1);
}
program = build_program(context, device, FILE_NAME);
kernel = clCreateKernel(program, "add", &err);
if (err < 0) {
perror("Couldn't create a kernel\n");
exit(1);
}
matrix_buffer_1 = clCreateBuffer(context, CL_MEM_READ_WRITE |
CL_MEM_COPY_HOST_PTR, sizeof(matrix_1), matrix_1, &err);
if (err < 0) {
perror("Couldn't create a buffer\n");
exit(1);
}
matrix_buffer_2 = clCreateBuffer(context, CL_MEM_READ_WRITE |
CL_MEM_COPY_HOST_PTR, sizeof(matrix_2), matrix_2, &err);
if (err < 0) {
perror("Couldn't create a buffer\n");
exit(1);
}
matrix_buffer_3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(matrix_3), matrix_3, &err);
if (err < 0) {
perror("Couldn't create a buffer\n");
exit(1);
}
int row = 8;
int col = 10;
err = clSetKernelArg(kernel, 0, sizeof(int), &row);
err = clSetKernelArg(kernel, 1, sizeof(int), &col);
err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &matrix_buffer_1);
err = clSetKernelArg(kernel, 3, sizeof(cl_mem), &matrix_buffer_2);
err = clSetKernelArg(kernel, 4, sizeof(cl_mem), &matrix_buffer_3);
queue = clCreateCommandQueue(context, device, 0, &err);
if (err < 0) {
perror("Couldn't create a command queue\n");
exit(1);
}
err = clEnqueueTask(queue, kernel, 0, NULL, NULL);
if (err < 0) {
perror("Couldn't enque task\n");
exit(1);
}
err = clEnqueueReadBuffer(queue, matrix_buffer_3, CL_TRUE, 0,
sizeof(matrix_3), &matrix_3, 0, NULL, NULL);
for (i = 0; i < 8; i++) {
for (j = 0; j < 10; j++) {
printf("%6.1f ", matrix_3[j + i * 10]);
}
printf("\n");
}
clReleaseMemObject(matrix_buffer_1);
clReleaseMemObject(matrix_buffer_2);
clReleaseMemObject(matrix_buffer_3);
clReleaseKernel(kernel);
clReleaseCommandQueue(queue);
clReleaseContext(context);
return 0;
}
开发者ID:Aperjump,项目名称:OpenCL_wrapper,代码行数:86,代码来源:main.cpp
示例11: call_kernel
void call_kernel(float *data,unsigned int count,char * cl_name,float *results) {
FILE* programHandle;
size_t programSize, KernelSourceSize;
char *programBuffer, *KernelSource;
size_t global; // global domain size for our calculation
size_t local; // local domain size for our calculation
cl_device_id device_id; // compute device id
cl_context context; // compute context
cl_command_queue commands; // compute command queue
cl_program program; // compute program
cl_kernel kernel; // compute kernel
cl_mem input; // device memory used for the input array
cl_mem output; // device memory used for the output array
int err;
int gpu = 1;
err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL);
context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
commands = clCreateCommandQueue(context, device_id, 0, &err);
//----------------------------------------------------------------------------
// get size of kernel source
programHandle = fopen(cl_name, "r");
fseek(programHandle, 0, SEEK_END);
programSize = ftell(programHandle);
rewind(programHandle);
programBuffer = (char*) malloc(programSize + 1);
programBuffer[programSize] = '\0';
fread(programBuffer, sizeof(char), programSize, programHandle);
fclose(programHandle);
// create program from buffer
program = clCreateProgramWithSource(context,1,(const char**) &programBuffer,&programSize, NULL);
free(programBuffer);
// read kernel source back in from program to check
clGetProgramInfo(program, CL_PROGRAM_SOURCE, 0, NULL, &KernelSourceSize);
KernelSource = (char*) malloc(KernelSourceSize);
clGetProgramInfo(program, CL_PROGRAM_SOURCE, KernelSourceSize, KernelSource, NULL);
program = clCreateProgramWithSource(context, 1, (const char **) & KernelSource, NULL, &err);
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
kernel = clCreateKernel(program, "square", &err);
//----------------------------------------------------------------------------
input = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * count, NULL, NULL);
output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * count, NULL, NULL);
err = clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, sizeof(float) * count, data, 0, NULL, NULL);
clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL);
global = count;
err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
clFinish(commands);
err = clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(float) * count, results, 0, NULL, NULL );
clReleaseMemObject(input);
clReleaseMemObject(output);
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseCommandQueue(commands);
clReleaseContext(context);
printf("nKernel source:\n\n %s \n", KernelSource);
free(KernelSource);
}
开发者ID:linan7788626,项目名称:Opencl_examples,代码行数:75,代码来源:more_simple_hello.c
示例12: main
int main() {
/* Host/device data structures */
cl_platform_id platform;
cl_device_id device;
cl_context context;
cl_command_queue queue;
cl_int err;
/* Program/kernel data structures */
cl_program program;
FILE *program_handle;
char *program_buffer, *program_log;
size_t program_size, log_size;
cl_kernel kernel;
size_t offset = 0;
size_t global_size, local_size;
/* Data and buffers */
char pattern[16] = "thatwithhavefrom";
FILE *text_handle;
char *text;
size_t text_size;
int chars_per_item;
int result[4] = {0, 0, 0, 0};
cl_mem text_buffer, result_buffer;
/* Identify a platform */
err = clGetPlatformIDs(1, &platform, NULL);
if(err < 0) {
perror("Couldn't identify a platform");
exit(1);
}
/* Access a device */
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
if(err < 0) {
perror("Couldn't access any devices");
exit(1);
}
/* Determine global size and local size */
clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS,
sizeof(global_size), &global_size, NULL);
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
sizeof(local_size), &local_size, NULL);
global_size *= local_size;
/* Create a context */
context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
if(err < 0) {
perror("Couldn't create a context");
exit(1);
}
/* Read program file and place content into buffer */
program_handle = fopen(PROGRAM_FILE, "r");
if(program_handle == NULL) {
perror("Couldn't find the program file");
exit(1);
}
fseek(program_handle, 0, SEEK_END);
program_size = ftell(program_handle);
rewind(program_handle);
program_buffer = (char*)calloc(program_size+1, sizeof(char));
fread(program_buffer, sizeof(char), program_size, program_handle);
fclose(program_handle);
/* Read text file and place content into buffer */
text_handle = fopen(TEXT_FILE, "r");
if(text_handle == NULL) {
perror("Couldn't find the text file");
exit(1);
}
fseek(text_handle, 0, SEEK_END);
text_size = ftell(text_handle)-1;
rewind(text_handle);
text = (char*)calloc(text_size, sizeof(char));
fread(text, sizeof(char), text_size, text_handle);
fclose(text_handle);
chars_per_item = text_size / global_size + 1;
/* Create program from file */
program = clCreateProgramWithSource(context, 1,
(const char**)&program_buffer, &program_size, &err);
if(err < 0) {
perror("Couldn't create the program");
exit(1);
}
free(program_buffer);
/* Build program */
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if(err < 0) {
/* Find size of log and print to std output */
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
0, NULL, &log_size);
program_log = (char*) calloc(log_size+1, sizeof(char));
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
//.........这里部分代码省略.........
开发者ID:sunlianqiang,项目名称:openclDemo,代码行数:101,代码来源:string_search.c
示例13: main
int main()
{
//Control Variables
bool showStartInput=false;// Setting it to true shows the original Input
bool showFftOutput=false;// Shows the output after the FFT but before the Reshuffle
bool showReshuffleOutput=false;// Shows the output after the reshuffle
bool showFinalResult=false; // Shows final result after cross-correlation
bool showGemmInput=false; // Shows output after the reshuffle but before the matrix multiplication
bool showReformatOutputAfterReshuffle=false; // Shows output after it has been reformatted after the reshuffling
//openCL State
cl_platform_id platform_id=NULL;
cl_device_id device_id=NULL;
cl_context context=NULL;
cl_command_queue queue=NULL;
cl_program program=NULL;
cl_kernel kernel=NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret=0; // Stores the error values retuned by many functions
cl_event event = NULL;
cl_event events[10];
cl_kernel clKernel;
//FFT state
clAmdFftPlanHandle plHandle;
clAmdFftResultLocation place = CLFFT_OUTOFPLACE; //Alternative CLFFT_INPLACE
clAmdFftLayout inLayout = CLFFT_COMPLEX_INTERLEAVED;
clAmdFftLayout outLayout = CLFFT_COMPLEX_INTERLEAVED;
clAmdFftDim dim = CLFFT_1D;
size_t clStrides[3]={0,0,0};
size_t clLengths[3];
clLengths[0]=(MEM_SIZE/2);//Length of first dimension of fft
clLengths[1]=1;//length of second dimension of fft
clLengths[2]=1;
clStrides[ 0 ] = 1;
clStrides[ 1 ] = clStrides[ 0 ] * clLengths[ 0 ];
clStrides[ 2 ] = clStrides[ 1 ] * clLengths[ 1 ];
clStrides[ 3 ] = clStrides[ 2 ] * clLengths[ 2 ];
size_t batchSize=CHANSIZE;//number of discreet fft's to be calculated simultaneously
//Initialise openCL
OPENCL_V_THROW(clGetPlatformIDs(1, &platform_id, &ret_num_platforms),"clGetPlatformIDs Failed");
OPENCL_V_THROW(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id,&ret_num_devices),"clGetDeviceIDs Failed");
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
OPENCL_V_THROW(ret, "Creating Context failed" );
queue = clCreateCommandQueue(context, device_id, 0, &ret);
OPENCL_V_THROW(ret, "Creating command queue failed" );
//===========Initialise the host buffers======================================
/*
* The functions sgenerate2darray(), screate2darray() and sgenerate2darrayout() are defined and declared in definition.h
*/
float** src_a_h=sgenerate2darray(NO_INPUTS,MEM_SIZE);//To be used to store the original input
float** answer=screate2darray(NO_INPUTS,MEM_SIZE);//To be used to store the answer after the reshuffling
float** corr_h=sgenerate2darrayout(NO_INPUTS,CHANSIZE << 1,CHANNELNO);// To be used to store the final answer
if(showStartInput){
cout << "Initial Input Buffer" << "\n";
for(int j=0;j<NO_INPUTS;j++){
for(int i=0;i<MEM_SIZE;i++){
cout << src_a_h[j][i] << " ";
}cout << "\n";
}printf("\n");
}
//===================================================================
//Calculation of facs for reshuffling
complex <float>* facs_h=(complex <float>*) malloc(sizeof(complex <float>)*(MEM_SIZE/2));
complex<float> I=1.0i;
complex <float> xx=2.0*PI;
for(int i=0;i<MEM_SIZE/2;i++){
facs_h[i]=(1.0*i)/(1.0*MEM_SIZE);
facs_h[i]=exp(xx*(-I*facs_h[i]));
}
//===================================================================
//Initialise GPU memory buffers
size_t sizeofgpumem=NO_INPUTS*MEM_SIZE*sizeof(float);
size_t sizeoffacsmem=MEM_SIZE*sizeof(float);
cl_mem clMemBuffersIn = clCreateBuffer(context,CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,sizeofgpumem,src_a_h[0],&ret);
OPENCL_V_THROW( ret, "Creating clMemBuffersIn Buffer failed" );
cl_mem clMemBuffersOut = clCreateBuffer(context,CL_MEM_READ_WRITE,sizeofgpumem,NULL,&ret);
OPENCL_V_THROW (ret, "Creating fft output Buffer failed");
cl_mem facs = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,sizeoffacsmem,facs_h,&ret);
OPENCL_V_THROW (ret, "Creating facs Buffer failed");
//===========================Starting the fft=============================//
clAmdFftSetupData setupData;
OPENCL_V_THROW( clAmdFftInitSetupData( &setupData ),"clAmdFftInitSetupData failed" );
OPENCL_V_THROW( clAmdFftSetup( &setupData ), "clAmdFftSetup failed" );
OPENCL_V_THROW( clAmdFftCreateDefaultPlan( &plHandle, context, dim, clLengths ), "clAmdFftCreateDefaultPlan failed" );
//.........这里部分代码省略.........
开发者ID:TechClub-IIITA,项目名称:cross_correlation_GPU_openCL,代码行数:101,代码来源:main_back |
请发表评论