本文整理汇总了C++中clFlush函数的典型用法代码示例。如果您正苦于以下问题:C++ clFlush函数的具体用法?C++ clFlush怎么用?C++ clFlush使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了clFlush函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: clEnqueueNDRangeKernel_fusion
cl_int clEnqueueNDRangeKernel_fusion ( cl_command_queue* command_queue,
cl_kernel kernel,
cl_uint work_dim,
const size_t *global_work_offset,
const size_t *global_work_size,
const size_t *local_work_size,
cl_uint num_events_in_wait_list,
const cl_event *event_wait_list,
cl_event *event){
cl_event eventList[2];
int cpu_run=0, gpu_run=0;
size_t global_offset[2];
size_t global_offset_start[2];
size_t remain_global_work_size[2];
int i;
if(cpu_offset == 0){
gpu_run=1;
}
else if(cpu_offset == 100){
cpu_run=1;
}
else{
gpu_run=1;
cpu_run=1;
}
for(i=0; i<work_dim; i++){
global_offset[i]=global_work_size[i];
remain_global_work_size[i]=global_work_size[i];
}
global_offset[0]=((double)cpu_offset/100)*global_work_size[0];
int t1=global_offset[0], t2=local_work_size[0];
global_offset[0]=(t1/t2+(int)(t1%t2!=0))*t2;
remain_global_work_size[0] = global_work_size[0]-global_offset[0];
if(remain_global_work_size[0] == 0)
gpu_run = 0;
global_offset_start[0]=global_offset[0];
global_offset_start[1]=0;
if(gpu_run){
errcode = clEnqueueNDRangeKernel(command_queue[0], kernel, work_dim, global_offset_start, remain_global_work_size, local_work_size, 0, NULL, &(eventList[0]));
if(errcode != CL_SUCCESS) printf("Error in gpu clEnqueueNDRangeKernel\n");
}
// clFinish(command_queue[0]);
if(cpu_run){
errcode = clEnqueueNDRangeKernel(command_queue[1], kernel, work_dim, NULL, global_offset, local_work_size, 0, NULL, &(eventList[1]));
if(errcode != CL_SUCCESS) printf("Error in cpu clEnqueueNDRangeKernel\n");
}
if(gpu_run) errcode = clFlush(command_queue[0]);
if(cpu_run) errcode = clFlush(command_queue[1]);
if(gpu_run) errcode = clWaitForEvents(1,&eventList[0]);
if(cpu_run) errcode = clWaitForEvents(1,&eventList[1]);
return errcode;
}
开发者ID:zhangfengthu,项目名称:CoRunBench,代码行数:59,代码来源:3mm.c
示例2: vglClToGl
void vglClToGl(VglImage* img)
{
//vglDownload(img);
if (!vglIsInContext(img, VGL_CL_CONTEXT))
{
//vglGlToCl(img);
//fprintf(stderr, "vglClToGl: Error: image context = %d not in VGL_CL_CONTEXT\n", img->inContext);
return;
}
cl_int err_cl;
//printf("==========RELEASE: vgl = %p, ocl = %d\n", img, img->oclPtr);
err_cl = clEnqueueReleaseGLObjects(cl.commandQueue, 1 , (cl_mem*) &img->oclPtr, 0 , NULL, NULL);
vglClCheckError(err_cl, (char*) "clEnqueueReleaseGLObjects");
err_cl = clFlush(cl.commandQueue);
vglClCheckError(err_cl, (char*) "clFlush");
err_cl = clFinish(cl.commandQueue);
vglClCheckError(err_cl, (char*) "clFinish");
vglSetContext(img, VGL_GL_CONTEXT);
//printf("Vai sair de %s\n", __FUNCTION__);
}
开发者ID:mcanthony,项目名称:visiongl,代码行数:28,代码来源:vglClImage.cpp
示例3: runKernel
void runKernel(cl_runtime_env env,
std::string kernel_name,
double* vars,
double* out,
int start_index,
int out_len)
{
kernel kern;
for (int i = 1; i < env.num_kerns; i++)
{
if (env.kernels[i].name == kernel_name)
kern = env.kernels[i];
}
runKernel(env.cv, env.cl_kernels[kernel_name],
kern,
env.gpu_data,
vars);
cl_int err;
err = clEnqueueReadBuffer(env.cv.commands, env.gpu_data["out"].array,
true,
start_index,
sizeof(double)*out_len,
out,
0, NULL, NULL);
CHK_ERR(err);
err = clFlush(env.cv.commands);
CHK_ERR(err);
}
开发者ID:jasclark,项目名称:indicator_optimizer,代码行数:31,代码来源:opencl_setup.cpp
示例4: cl_copyBuffer
void cl_copyBuffer(cl_mem dest, int destOffset, cl_mem src, int srcOffset, size_t size,int *index,cl_event *eventList,int *Flag_CPU_GPU,double * burden, int _CPU_GPU)
{
int preFlag=(*Flag_CPU_GPU);
double preBurden=(*burden);
int CPU_GPU=0;
CPU_GPU=cl_copyBufferscheduler(size,Flag_CPU_GPU,burden,_CPU_GPU);
cl_int ciErr1;
(*Flag_CPU_GPU)=CPU_GPU;
if(*index!=0)
{
ciErr1 = clEnqueueCopyBuffer(CommandQueue[CPU_GPU], src, dest, srcOffset, destOffset, size, 1, &eventList[((*index)-1)%2], &eventList[(*index)%2]);
deschedule(preFlag,preBurden);
}
else
ciErr1 = clEnqueueCopyBuffer(CommandQueue[CPU_GPU], src, dest, srcOffset, destOffset, size, 0, NULL, &eventList[*index]);
(*index)++;
//clEnqueueWriteBuffer(CommandQueue[CPU_GPU], to, CL_FALSE, 0, size, from, 0, NULL, NULL);
if (ciErr1 != CL_SUCCESS)
{
printf("Error %d in cl_copyBuffer, Line %u in file %s !!!\n\n", ciErr1,__LINE__, __FILE__);
cl_clean(EXIT_FAILURE);
}
clFlush(CommandQueue[CPU_GPU]);
}
开发者ID:johnspaul92,项目名称:omnidb-paralleldbonapu,代码行数:26,代码来源:common.cpp
示例5: dimension
/** Perform Hermitian matrix-vector product, \f$ y = \alpha A x + \beta y \f$.
@param[in]
uplo Whether the upper or lower triangle of A is referenced.
@param[in]
n Number of rows and columns of A. n >= 0.
@param[in]
alpha Scalar \f$ \alpha \f$
@param[in]
dA COMPLEX array of dimension (ldda,n), ldda >= max(1,n).
The n-by-n matrix A, on GPU device.
@param[in]
ldda Leading dimension of dA.
@param[in]
dx COMPLEX array on GPU device.
The m element vector x of dimension (1 + (m-1)*incx).
@param[in]
incx Stride between consecutive elements of dx. incx != 0.
@param[in]
beta Scalar \f$ \beta \f$
@param[in,out]
dy COMPLEX array on GPU device.
The n element vector y of dimension (1 + (n-1)*incy).
@param[in]
incy Stride between consecutive elements of dy. incy != 0.
@ingroup magma_cblas2
*/
extern "C" void
magma_chemv(
magma_uplo_t uplo,
magma_int_t n,
magmaFloatComplex alpha,
magmaFloatComplex_const_ptr dA, size_t dA_offset, magma_int_t ldda,
magmaFloatComplex_const_ptr dx, size_t dx_offset, magma_int_t incx,
magmaFloatComplex beta,
magmaFloatComplex_ptr dy, size_t dy_offset, magma_int_t incy,
magma_queue_t queue )
{
if ( n <= 0 )
return;
cl_int err = clblasChemv(
clblasColumnMajor,
clblas_uplo_const( uplo ),
n,
alpha, dA, dA_offset, ldda,
dx, dx_offset, incx,
beta, dy, dy_offset, incy,
1, &queue, 0, NULL, g_event );
clFlush(queue);
check_error( err );
}
开发者ID:kjbartel,项目名称:clmagma,代码行数:62,代码来源:blas_c.cpp
示例6: max
/** Returns index of element of vector x having max. absolute value;
i.e., max (infinity) norm.
@param[in]
n Number of elements in vector x. n >= 0.
@param[in]
dx COMPLEX array on GPU device.
The n element vector x of dimension (1 + (n-1)*incx).
@param[in]
incx Stride between consecutive elements of dx. incx > 0.
@ingroup magma_cblas1
*/
extern "C" magma_int_t
magma_icamax(
magma_int_t n,
magmaFloatComplex_const_ptr dx, size_t dx_offset, magma_int_t incx,
magma_queue_t queue )
{
magma_ptr dimax, scratchBuff;
magma_malloc( &dimax, sizeof(unsigned int) );
magma_malloc( &scratchBuff, (2*n+1)*sizeof(magmaFloatComplex) );
cl_int err = clblasiCamax(
n, dimax, 0,
dx, dx_offset, incx,
scratchBuff,
1, &queue, 0, NULL, g_event);
unsigned int imax_cpu;
magma_getvector( 1, sizeof(unsigned int), dimax, 0, 1, &imax_cpu, 1, queue );
clFlush(queue);
magma_free( dimax );
magma_free( scratchBuff );
return imax_cpu;
}
开发者ID:kjbartel,项目名称:clmagma,代码行数:40,代码来源:blas_c.cpp
示例7: B
/** Perform Hermitian rank-2k update.
\f$ C = \alpha A B^T + \alpha B A^T \beta C \f$ (trans == MagmaNoTrans), or \n
\f$ C = \alpha A^T B + \alpha B^T A \beta C \f$ (trans == MagmaTrans), \n
where \f$ C \f$ is Hermitian.
@param[in]
uplo Whether the upper or lower triangle of C is referenced.
@param[in]
trans Operation to perform on A and B.
@param[in]
n Number of rows and columns of C. n >= 0.
@param[in]
k Number of columns of A and B (for MagmaNoTrans) or rows of A and B (for MagmaTrans). k >= 0.
@param[in]
alpha Scalar \f$ \alpha \f$
@param[in]
dA COMPLEX array on GPU device.
If trans == MagmaNoTrans, the n-by-k matrix A of dimension (ldda,k), ldda >= max(1,n); \n
otherwise, the k-by-n matrix A of dimension (ldda,n), ldda >= max(1,k).
@param[in]
ldda Leading dimension of dA.
@param[in]
dB COMPLEX array on GPU device.
If trans == MagmaNoTrans, the n-by-k matrix B of dimension (lddb,k), lddb >= max(1,n); \n
otherwise, the k-by-n matrix B of dimension (lddb,n), lddb >= max(1,k).
@param[in]
lddb Leading dimension of dB.
@param[in]
beta Scalar \f$ \beta \f$
@param[in,out]
dC COMPLEX array on GPU device.
The n-by-n Hermitian matrix C of dimension (lddc,n), lddc >= max(1,n).
@param[in]
lddc Leading dimension of dC.
@ingroup magma_cblas3
*/
extern "C" void
magma_cher2k(
magma_uplo_t uplo, magma_trans_t trans,
magma_int_t n, magma_int_t k,
magmaFloatComplex alpha,
magmaFloatComplex_const_ptr dA, size_t dA_offset, magma_int_t ldda,
magmaFloatComplex_const_ptr dB, size_t dB_offset, magma_int_t lddb,
float beta,
magmaFloatComplex_ptr dC, size_t dC_offset, magma_int_t lddc,
magma_queue_t queue )
{
if (n <= 0 || k <= 0)
return;
cl_int err = clblasCher2k(
clblasColumnMajor,
clblas_uplo_const( uplo ),
clblas_trans_const( trans ),
n, k,
alpha, dA, dA_offset, ldda,
dB, dB_offset, lddb,
beta, dC, dC_offset, lddc,
1, &queue, 0, NULL, g_event );
clFlush(queue);
check_error( err );
}
开发者ID:kjbartel,项目名称:clmagma,代码行数:74,代码来源:blas_c.cpp
示例8: op
/** Perform matrix-matrix product, \f$ C = \alpha op(A) op(B) + \beta C \f$.
@param[in]
transA Operation op(A) to perform on matrix A.
@param[in]
transB Operation op(B) to perform on matrix B.
@param[in]
m Number of rows of C and op(A). m >= 0.
@param[in]
n Number of columns of C and op(B). n >= 0.
@param[in]
k Number of columns of op(A) and rows of op(B). k >= 0.
@param[in]
alpha Scalar \f$ \alpha \f$
@param[in]
dA COMPLEX array on GPU device.
If transA == MagmaNoTrans, the m-by-k matrix A of dimension (ldda,k), ldda >= max(1,m); \n
otherwise, the k-by-m matrix A of dimension (ldda,m), ldda >= max(1,k).
@param[in]
ldda Leading dimension of dA.
@param[in]
dB COMPLEX array on GPU device.
If transB == MagmaNoTrans, the k-by-n matrix B of dimension (lddb,n), lddb >= max(1,k); \n
otherwise, the n-by-k matrix B of dimension (lddb,k), lddb >= max(1,n).
@param[in]
lddb Leading dimension of dB.
@param[in]
beta Scalar \f$ \beta \f$
@param[in,out]
dC COMPLEX array on GPU device.
The m-by-n matrix C of dimension (lddc,n), lddc >= max(1,m).
@param[in]
lddc Leading dimension of dC.
@ingroup magma_cblas3
*/
extern "C" void
magma_cgemm(
magma_trans_t transA, magma_trans_t transB,
magma_int_t m, magma_int_t n, magma_int_t k,
magmaFloatComplex alpha,
magmaFloatComplex_const_ptr dA, size_t dA_offset, magma_int_t ldda,
magmaFloatComplex_const_ptr dB, size_t dB_offset, magma_int_t lddb,
magmaFloatComplex beta,
magmaFloatComplex_ptr dC, size_t dC_offset, magma_int_t lddc,
magma_queue_t queue )
{
if ( m <= 0 || n <= 0 || k <= 0 )
return;
cl_int err = clblasCgemm(
clblasColumnMajor,
clblas_trans_const( transA ),
clblas_trans_const( transB ),
m, n, k,
alpha, dA, dA_offset, ldda,
dB, dB_offset, lddb,
beta, dC, dC_offset, lddc,
1, &queue, 0, NULL, g_event );
clFlush(queue);
check_error( err );
}
开发者ID:kjbartel,项目名称:clmagma,代码行数:74,代码来源:blas_c.cpp
示例9: mat_mul_cl_row_local
/* Let's see if this is any different from local memory.
* Outcome: much slower than private memory, slower than naive method. */
void mat_mul_cl_row_local(const F *A, const F *B, F *C, size_t n, Cache *cache) {
cl_uint ncl;
size_t global_work_size, local_work_size, mat_sizeof;
/* Setup variables. */
/* Cannot be larger than 1 on this example, otherwise memory conflicts
* will happen between work items. */
local_work_size = 1;
global_work_size = n;
mat_sizeof = n * n * sizeof(F);
ncl = n;
/* Run kernel. */
common_create_kernel_file(&cache->common, "matmul_row_local.cl", NULL);
clEnqueueWriteBuffer(cache->common.command_queue, cache->buf_a, CL_TRUE, 0, mat_sizeof, (F*)A, 0, NULL, NULL);
clEnqueueWriteBuffer(cache->common.command_queue, cache->buf_b, CL_TRUE, 0, mat_sizeof, (F*)B, 0, NULL, NULL);
clSetKernelArg(cache->common.kernel, 0, sizeof(cache->buf_a), &cache->buf_a);
clSetKernelArg(cache->common.kernel, 1, sizeof(cache->buf_b), &cache->buf_b);
clSetKernelArg(cache->common.kernel, 2, sizeof(cache->buf_c), &cache->buf_c);
clSetKernelArg(cache->common.kernel, 3, n * sizeof(F), NULL);
clSetKernelArg(cache->common.kernel, 4, sizeof(ncl), &ncl);
clEnqueueNDRangeKernel(cache->common.command_queue, cache->common.kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL);
clFlush(cache->common.command_queue);
clFinish(cache->common.command_queue);
clEnqueueReadBuffer(cache->common.command_queue, cache->buf_c, CL_TRUE, 0, mat_sizeof, C, 0, NULL, NULL);
}
开发者ID:cirosantilli,项目名称:cpp-cheat,代码行数:28,代码来源:matmul.c
示例10: mat_mul_cl_row_priv_col_local
/* Like row private, but to reduce global memory accesses,
* we copy only once per work group to local memory.
*
* Each work group contains a few rows of A.
*
* We load the first column, multiply all rows by that column, synrhconize,
* load another column, and so on.
*
* This leads to a thread blockage / memory access tradeoff.
*
* We make work groups as large as possible to reload memory less times. */
void mat_mul_cl_row_priv_col_local(const F *A, const F *B, F *C, size_t n, Cache *cache) {
char options[256];
cl_uint ncl;
size_t global_work_size, local_work_size, mat_sizeof;
/* Setup variables. */
global_work_size = n;
mat_sizeof = n * n * sizeof(F);
ncl = n;
/* Run kernel. */
snprintf(options, sizeof(options), "-DPRIV_ROW_SIZE=%ju", n);
common_create_kernel_file(&cache->common, "matmul_row_priv_col_local.cl", options);
local_work_size = 0;
clGetDeviceInfo(cache->common.device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(local_work_size), &local_work_size, NULL);
local_work_size = zmin(local_work_size, n);
clEnqueueWriteBuffer(cache->common.command_queue, cache->buf_a, CL_TRUE, 0, mat_sizeof, (F*)A, 0, NULL, NULL);
clEnqueueWriteBuffer(cache->common.command_queue, cache->buf_b, CL_TRUE, 0, mat_sizeof, (F*)B, 0, NULL, NULL);
clSetKernelArg(cache->common.kernel, 0, sizeof(cache->buf_a), &cache->buf_a);
clSetKernelArg(cache->common.kernel, 1, sizeof(cache->buf_b), &cache->buf_b);
clSetKernelArg(cache->common.kernel, 2, sizeof(cache->buf_c), &cache->buf_c);
clSetKernelArg(cache->common.kernel, 3, n * sizeof(F), NULL);
clSetKernelArg(cache->common.kernel, 4, sizeof(ncl), &ncl);
clEnqueueNDRangeKernel(cache->common.command_queue, cache->common.kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL);
clFlush(cache->common.command_queue);
clFinish(cache->common.command_queue);
clEnqueueReadBuffer(cache->common.command_queue, cache->buf_c, CL_TRUE, 0, mat_sizeof, C, 0, NULL, NULL);
}
开发者ID:cirosantilli,项目名称:cpp-cheat,代码行数:39,代码来源:matmul.c
示例11: main
int main(void) {
char options[256];
const char *source =
"__kernel void kmain(__global int *out) {\n"
" out[0] = X;\n"
"}\n";
cl_int io[] = {0};
const cl_int X = 1;
cl_mem buffer;
Common common;
const size_t global_work_size = sizeof(io) / sizeof(io[0]);
/* Run kernel. */
snprintf(options, sizeof(options), "-DX=%d", X);
common_init_options(&common, source, options);
buffer = clCreateBuffer(common.context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(io), io, NULL);
clSetKernelArg(common.kernel, 0, sizeof(buffer), &buffer);
clEnqueueNDRangeKernel(common.command_queue, common.kernel, 1, NULL, &global_work_size, NULL, 0, NULL, NULL);
clFlush(common.command_queue);
clFinish(common.command_queue);
clEnqueueReadBuffer(common.command_queue, buffer, CL_TRUE, 0, sizeof(io), io, 0, NULL, NULL);
/* Assertions. */
assert(io[0] == X);
/* Cleanup. */
clReleaseMemObject(buffer);
common_deinit(&common);
return EXIT_SUCCESS;
}
开发者ID:cirosantilli,项目名称:cpp-cheat,代码行数:30,代码来源:preprocessor.c
示例12: MITK_ERROR
void* mitk::OclDataSet::TransferDataToCPU(cl_command_queue gpuComQueue)
{
cl_int clErr = 0;
// if image created on GPU, needs to create mitk::Image
if( m_gpuBuffer == nullptr ){
MITK_ERROR("ocl.DataSet") << "(mitk) No buffer present!\n";
return nullptr;
}
// check buffersize
char* data = new char[m_bufferSize * (size_t)m_BpE];
// debug info
#ifdef SHOW_MEM_INFO
oclPrintMemObjectInfo( m_gpuBuffer );
#endif
clErr = clEnqueueReadBuffer( gpuComQueue, m_gpuBuffer, CL_TRUE, 0, m_bufferSize * (size_t)m_BpE, data ,0, nullptr, nullptr);
CHECK_OCL_ERR(clErr);
if(clErr != CL_SUCCESS)
mitkThrow() << "openCL Error when reading Output Buffer";
clFlush( gpuComQueue );
// the cpu data is same as gpu
this->m_gpuModified = false;
return (void*) data;
}
开发者ID:Cdebus,项目名称:MITK,代码行数:30,代码来源:mitkOclDataSet.cpp
示例13: magma_ztrsm
// --------------------
magma_err_t
magma_ztrsm(
magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag,
magma_int_t m, magma_int_t n,
magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, size_t dA_offset, magma_int_t lda,
magmaDoubleComplex_ptr dB, size_t dB_offset, magma_int_t ldb,
magma_queue_t queue )
{
/*
magmaDoubleComplex *hA, *hB;
if(side==MagmaRight){
hA = (magmaDoubleComplex*)malloc(lda*n*sizeof(magmaDoubleComplex));
hB = (magmaDoubleComplex*)malloc(ldb*n*sizeof(magmaDoubleComplex));
magma_zgetmatrix(n, n, dA, dA_offset, lda, hA, 0, lda, queue);
magma_zgetmatrix(m, n, dB, dB_offset, ldb, hB, 0, ldb, queue);
#if defined(PRECISION_z) || defined(PRECISION_c)
cblas_ztrsm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
m, n,
&alpha, hA, lda, hB, ldb);
#else
cblas_ztrsm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
m, n,
alpha, hA, lda, hB, ldb);
#endif
magma_zsetmatrix(m, n, hB, 0, ldb, dB, dB_offset, ldb, queue);
free(hB);
free(hA);
}else{
hA = (magmaDoubleComplex*)malloc(lda*m*sizeof(magmaDoubleComplex));
hB = (magmaDoubleComplex*)malloc(ldb*n*sizeof(magmaDoubleComplex));
magma_zgetmatrix(m, m, dA, dA_offset, lda, hA, 0, lda, queue);
magma_zgetmatrix(m, n, dB, dB_offset, ldb, hB, 0, ldb, queue);
#if defined(PRECISION_z) || defined(PRECISION_c)
cblas_ztrsm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
m, n,
&alpha, hA, lda, hB, ldb);
#else
cblas_ztrsm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag,
m, n,
alpha, hA, lda, hB, ldb);
#endif
magma_zsetmatrix(m, n, hB, 0, ldb, dB, dB_offset, ldb, queue);
free(hB);
free(hA);
}
return CL_SUCCESS;
*/
cl_int err = clAmdBlasZtrsmEx(
clAmdBlasColumnMajor,
amdblas_side_const( side ),
amdblas_uplo_const( uplo ),
amdblas_trans_const( trans ),
amdblas_diag_const( diag ),
m, n,
alpha, dA, dA_offset, lda,
dB, dB_offset, ldb,
1, &queue, 0, NULL, NULL );
clFlush(queue);
return err;
}
开发者ID:EmergentOrder,项目名称:clmagma,代码行数:61,代码来源:magmablas_z_trace.cpp
示例14: clFlush
void CLContext::flush( cl_command_queue& command_queue )
{
cl_int status;
status = clFlush( commandQueue );
if(!checkVal( status, CL_SUCCESS, "clFlush failed."))
exit(1);
}
开发者ID:ethicalfive,项目名称:critterding,代码行数:7,代码来源:clcontext.cpp
示例15: clEnqueueReadBuffer
void OclHost::readFromDevice(cl_mem buffer, cl_bool blocking_read,
size_t offset, size_t size, void * ptr, size_t size_of) {
cl_int ciErrNum = clEnqueueReadBuffer(oclCommandQueue, buffer,
blocking_read, offset, size * size_of, ptr, 0, 0, 0);
clFlush(oclCommandQueue);
checkClError("Unable to read from device.", ciErrNum);
}
开发者ID:Cibiv,项目名称:NextGenMap,代码行数:7,代码来源:OclHost.cpp
示例16: __accr_launchkernel
void __accr_launchkernel(char* szKernelName, char* szKernelLib, int async_expr)
{
//CL kernel file
cl_int cl_error_code;
staic size_t global_work_items[3];
global_work_items[0] = gangs[0] * vectors[0];
global_work_items[1] = gangs[1] * vectors[1];
global_work_items[2] = gangs[2] * vectors[2];
if(bIsAuto_cl_local_work_partition)
cl_error_code = clEnqueueNDRangeKernel(context->cl_cq, current_cl_kernel_handle, cl_work_dim,
NULL, global_work_items, NULL, 0, NULL, NULL);
else
cl_error_code = clEnqueueNDRangeKernel(context->cl_cq, current_cl_kernel_handle, cl_work_dim,
NULL, global_work_items, vectors, 0, NULL, NULL);
if(async_expr < 0)
{
cl_error_code = clFlush(context->cl_cq);
cl_error_code = clFinish(context->cl_cq);
}
else //async
{
}
}
开发者ID:prabindh,项目名称:openuh,代码行数:25,代码来源:acc_kernel.c
示例17: gettimeofday
void BeforeCheckingExample2::parallelExecuteOrigin() {
/*struct timeval tv1, tv2;
gettimeofday(&tv1, NULL);
*/
int p = 0;
int clStatus;
clSetKernelArg(loopKernelOrigin, p++, sizeof(cl_mem), &device_a );
clSetKernelArg(loopKernelOrigin, p++, sizeof(cl_mem), &device_b );
clSetKernelArg(loopKernelOrigin, p++, sizeof(cl_mem), &device_c );
clSetKernelArg(loopKernelOrigin, p++, sizeof(cl_mem), &device_Q );
clSetKernelArg(loopKernelOrigin, p++, sizeof(cl_mem), &device_P );
clSetKernelArg(loopKernelOrigin, p++, sizeof(cl_int), &LOOP_SIZE );
clSetKernelArg(loopKernelOrigin, p++, sizeof(cl_int), &CALC_SIZE );
size_t global_size0 = LOOP_SIZE;
size_t local_size = 64;
clStatus = clEnqueueNDRangeKernel(command_queue, loopKernelOrigin, 1, NULL, &global_size0, &local_size, 0, NULL, NULL);
clFlush(command_queue);
clFinish(command_queue);
/*
gettimeofday(&tv2, NULL);
double used_time = (double) (tv2.tv_usec - tv1.tv_usec) + (double) (tv2.tv_sec - tv1.tv_sec) * 1000000;
printf("time origin = %.2f\n", used_time);
*/
}
开发者ID:lifengliu,项目名称:gputls2015,代码行数:32,代码来源:BeforeCheckingExample2.cpp
示例18: timedBufUnmap
void timedBufUnmap( cl_command_queue queue,
cl_mem buf,
void **ptr,
bool quiet )
{
CPerfCounter t1;
cl_int ret;
cl_event ev;
t1.Reset();
t1.Start();
ret = clEnqueueUnmapMemObject( queue,
buf,
(void *) *ptr,
0, NULL, &ev );
ASSERT_CL_RETURN( ret );
clFlush( queue );
spinForEventsComplete( 1, &ev );
t1.Stop();
if( !quiet )
tlog->Timer( "%32s %lf s [ %8.2lf GB/s ]\n", "clEnqueueUnmapMemObject():", t1.GetElapsedTime(), nBytes, 1 );
}
开发者ID:ChiahungTai,项目名称:OpenCL-playgorund,代码行数:26,代码来源:ImageBandwidth.cpp
示例19: timedImageCLWrite
void timedImageCLWrite( cl_command_queue queue,
cl_mem image,
void *ptr )
{
CPerfCounter t1;
cl_int ret;
cl_event ev;
t1.Start();
ret = clEnqueueWriteImage( queue,
image,
CL_FALSE,
imageOrigin, imageRegion,
0,0,
ptr,
0, NULL,
&ev );
ASSERT_CL_RETURN( ret );
clFlush( queue );
spinForEventsComplete( 1, &ev );
t1.Stop();
tlog->Timer( "%32s %lf s %8.2lf GB/s\n", "clEnqueueWriteImage():", t1.GetElapsedTime(), nBytesRegion, 1 );
}
开发者ID:ChiahungTai,项目名称:OpenCL-playgorund,代码行数:27,代码来源:ImageBandwidth.cpp
示例20: timedBufferImageCLCopy
void timedBufferImageCLCopy( cl_command_queue queue,
cl_mem srcBuf,
cl_mem dstImg )
{
CPerfCounter t1;
cl_int ret;
cl_event ev;
t1.Start();
ret = clEnqueueCopyBufferToImage( queue,
srcBuf,
dstImg,
0,
imageOrigin,
imageRegion,
0, NULL,
&ev );
ASSERT_CL_RETURN( ret );
clFlush( queue );
spinForEventsComplete( 1, &ev );
t1.Stop();
tlog->Timer( "%32s %lf s %8.2lf GB/s\n", "clEnqueueCopyBufferToImage():", t1.GetElapsedTime(), nBytesRegion, 1 );
}
开发者ID:ChiahungTai,项目名称:OpenCL-playgorund,代码行数:27,代码来源:ImageBandwidth.cpp
注:本文中的clFlush函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论