• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

C++ rtclock函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了C++中rtclock函数的典型用法代码示例。如果您正苦于以下问题:C++ rtclock函数的具体用法?C++ rtclock怎么用?C++ rtclock使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了rtclock函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: main

int main()
{
    int t, i, j, k, l;

    double t_start, t_end;

    init_array();

	IF_TIME(t_start = rtclock());

#pragma scop
    for (t=1; t<=T; t++){
	    for (i=1; i<=N-1; i++)
		    e[i] = e[i] - coeff1*(h[i]-h[i-1]);
	    for (i=0; i<=N-1; i++)
		    h[i] = h[i] - coeff2*(e[i+1]-e[i]);
    }
#pragma endscop

    IF_TIME(t_end = rtclock());
    IF_TIME(fprintf(stderr, "%0.6lfs\n", t_end - t_start));

    if (fopen(".test", "r")) {
        print_array();
    }

    return 0;
}
开发者ID:SameerAsal,项目名称:energy_predictor,代码行数:28,代码来源:fdtd-1d.c


示例2: main

int main()
{
    int i, j, k, t;

    init_array() ;

#ifdef PERFCTR
    PERF_INIT; 
#endif

    IF_TIME(t_start = rtclock());

    /* pluto start (N) */
#pragma scop
    for (i=1; i<=N-2; i++)  {
        for (j=1; j<=N-2; j++)  {
            f[i][j] = f[j][i] + f[i][j-1];
        }
    }
#pragma endscop
    /* pluto end */

    IF_TIME(t_end = rtclock());
    IF_TIME(fprintf(stderr, "%0.6lfs\n", t_end - t_start));

    if (fopen(".test", "r")) {
        print_array();
    }

    return 0;
}
开发者ID:SameerAsal,项目名称:energy_predictor,代码行数:31,代码来源:template.c


示例3: main

int main() {
  double t_start, t_end;

  DATA_TYPE* A;
  DATA_TYPE* C;
  DATA_TYPE* D;

  A = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE));
  C = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE));
  D = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE));

  fprintf(stdout, "<< Symmetric rank-k operations >>\n");

  init_arrays(A, C, D);	
  syrkGPU(A, D);

  t_start = rtclock();
  syrk(A, C);
  t_end = rtclock();
  fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start);

  compareResults(C, D);

  free(A);
  free(C);
  free(D);
  return 0;
}
开发者ID:rcfsousa,项目名称:Polybench_OpenMP,代码行数:28,代码来源:syrk.c


示例4: syrkGPU

void syrkGPU(DATA_TYPE* A, DATA_TYPE* D) {
  int i, j;
  double t_start, t_end;

  t_start = rtclock();

  #pragma omp target  device (GPU_DEVICE)
  #pragma omp target map(to: A[:N*M]) map(tofrom: D[:N*M])
  {
    #pragma omp parallel for
    for (i = 0; i < N; i++) {
      for (j = 0; j < M; j++) {
	D[i * M + j] *= beta;
      }
    }
    
    #pragma omp parallel for collapse(2)
    for (i = 0; i < N; i++) {
      for (j = 0; j < M; j++) {
	int k;		
	for(k=0; k< M; k++) {
	  D[i * M + j] += alpha * A[i * M + k] * A[j * M + k];
	}
      }
    }
  }
  
  t_end = rtclock();
  fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start);

}
开发者ID:rcfsousa,项目名称:Polybench_OpenMP,代码行数:31,代码来源:syrk.c


示例5: cl_launch_kernel

void cl_launch_kernel()
{
	double t_start, t_end;

	int m = M;
	int n = N;

	size_t localWorkSize[2], globalWorkSize[2];
	localWorkSize[0] = DIM_LOCAL_WORK_GROUP_X;
	localWorkSize[1] = DIM_LOCAL_WORK_GROUP_Y;
	globalWorkSize[0] = (size_t)ceil(((float)N) / ((float)DIM_LOCAL_WORK_GROUP_X)) * DIM_LOCAL_WORK_GROUP_X;
	globalWorkSize[1] = (size_t)ceil(((float)M) / ((float)DIM_LOCAL_WORK_GROUP_Y)) * DIM_LOCAL_WORK_GROUP_Y;

	t_start = rtclock();
	
	// Set the arguments of the kernel
	errcode =  clSetKernelArg(clKernel1, 0, sizeof(cl_mem), (void *)&a_mem_obj);
	errcode |= clSetKernelArg(clKernel1, 1, sizeof(cl_mem), (void *)&c_mem_obj);
	errcode |= clSetKernelArg(clKernel1, 2, sizeof(DATA_TYPE), (void *)&alpha);
	errcode |= clSetKernelArg(clKernel1, 3, sizeof(DATA_TYPE), (void *)&beta);
	errcode |= clSetKernelArg(clKernel1, 4, sizeof(int), (void *)&m);
	errcode |= clSetKernelArg(clKernel1, 5, sizeof(int), (void *)&n);

	if(errcode != CL_SUCCESS) printf("Error in seting arguments1\n");

	// Execute the OpenCL kernel
	errcode = clEnqueueNDRangeKernel_fusion(clCommandQue, clKernel1, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
	//errcode = clEnqueueNDRangeKernel(clCommandQue, clKernel1, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
	if(errcode != CL_SUCCESS) printf("Error in launching kernel1\n");
//	clFinish(clCommandQue);

	t_end = rtclock();
	fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start);
	fprintf(stdout, "CAUTION:CPU offset %d %% GPU Runtime: %0.6lf s\n",cpu_offset, t_end - t_start);
}
开发者ID:zhangfengthu,项目名称:CoRunBench,代码行数:35,代码来源:syrk.c


示例6: main

int main()
{
    int i, j, k, l, t;

    double t_start, t_end;

    init_array() ;

    IF_TIME(t_start = rtclock());

#pragma scop
    for (i=0; i<N; i++) {
        for (j=0; j<N; j++) {
            x1[i] = x1[i] + a[i][j] * y_1[j];
        }
    }

    for (i=0; i<N; i++) {
        for (j=0; j<N; j++) {
            x2[i] = x2[i] + a[j][i] * y_2[j];
        }
    }
#pragma endscop

    IF_TIME(t_end = rtclock());
    IF_TIME(printf("%0.6lfs\n", t_end - t_start));

#ifdef TEST
    print_array();
#endif
    return 0;
}
开发者ID:intersense,项目名称:pluto-gw,代码行数:32,代码来源:mvt.c


示例7: main

int main()
{
  init_arrays();

  double annot_t_start=0, annot_t_end=0, annot_t_total=0;
  int annot_i;

  int v1,v2,o1,o2,ox;
  int tv1,tv2,to1,to2,tox;

  for (annot_i=0; annot_i<REPS; annot_i++)
  {
    annot_t_start = rtclock();

    for (v1=0; v1<=V-1; v1=v1+1) 
      for (v2=0; v2<=V-1; v2=v2+1) 
	for (o1=0; o1<=O-1; o1=o1+1) 
	  for (o2=0; o2<=O-1; o2=o2+1) 
	    for (ox=0; ox<=O-1; ox=ox+1) 
	      R[v1][v2][o1][o2]=R[v1][v2][o1][o2]+T[v1][ox][o1][o2]*A2[v2][ox];
    
    annot_t_end = rtclock();
    annot_t_total += annot_t_end - annot_t_start;
  }
  
  annot_t_total = annot_t_total / REPS;
  printf("%f\n", annot_t_total);
  
  return 1;
}
开发者ID:brnorris03,项目名称:Orio,代码行数:30,代码来源:tensor.4d2d.base.c


示例8: main

int main(int argc, char** argv)
{
	double t_start, t_end;
	
	/* Array declaration */
	DATA_TYPE A[N][M];
	DATA_TYPE C[N][N];
	DATA_TYPE C_outputFromGpu[N][N];

	/* Initialize array. */
	init_arrays(A, C, C_outputFromGpu);

	#pragma hmpp syrk allocate
	#pragma hmpp syrk advancedload, args[a,c]

	t_start = rtclock();
	#pragma hmpp syrk callsite, args[a,c].advancedload=true, asynchronous
	runSyrk(A, C_outputFromGpu);
	#pragma hmpp syrk synchronize

	t_end = rtclock();
	fprintf(stderr, "GPU Runtime: %0.6lfs\n", t_end - t_start);
	
	#pragma hmpp syrk delegatedstore, args[c]
	#pragma hmpp syrk release

	t_start = rtclock();
	runSyrk(A, C);
	t_end = rtclock();
	fprintf(stderr, "CPU Runtime: %0.6lfs\n", t_end - t_start);

	compareResults(C, C_outputFromGpu);

	return 0;
}
开发者ID:3upperm2n,项目名称:opt_polybench,代码行数:35,代码来源:syrk.c


示例9: foo

void foo(){
	int y,x,trial;
    IF_TIME(t_start = rtclock());

for (trial=0;trial<10;++trial)
{
#pragma scop
	for (y = 0; y <= M-1; ++y)
   		for(x = 0; x <= N-1; ++x) {
	         	blurx[y][x]=in[x][y]+in[x+1][y]+in[x+2][y];
			if (y >= 2)
		 		out[x][y-2]=blurx[y-2][x]+blurx[y-1][x]+blurx[y][x];
   		}
#pragma endscop
}


    IF_TIME(t_end = rtclock());
    IF_TIME(fprintf(stdout, "%s\t\t(M=%d,N=%d) \t %0.6lfs\n", __FILE__, M, N, (t_end - t_start)/trial));

#ifdef VERIFY
for(x = 0; x <= N-1; ++x)
	for(y = 0; y <= M-1; ++y)
		A[x][y]=in[x][y]+in[x+1][y]+in[x+2][y];
// Stage 2: vertical blur
for(x = 0; x <= N-1; ++x)
	for(y = 2; y <= M-1; ++y)
	{
		if(out[x][y-2] != A[x][y]+A[x][y-1]+A[x][y-2])
		{
			printf("blur-smo.c: Difference at (%d, %d) : %f versus %f\n", x, y, out[x][y-2], A[x][y]+A[x][y-1]+A[x][y-2]);
		}
	}
#endif
}
开发者ID:bondhugula,项目名称:smo,代码行数:35,代码来源:blur-interleaved.c


示例10: main

int main(int argc, char* argv[]) 
//int main(void) 
{
	double t_start, t_end;

	DATA_TYPE* A;
	DATA_TYPE* B;
	DATA_TYPE* C;
	DATA_TYPE* D;
	DATA_TYPE* E;
	DATA_TYPE* F;
	DATA_TYPE* G;
	DATA_TYPE* G_outputFromGpu;
        if(argc==2){
          printf("arg 1 = %s\narg 2 = %s\n", argv[0], argv[1]);
          cpu_offset = atoi(argv[1]);
        }


	A = (DATA_TYPE*)malloc(NI*NK*sizeof(DATA_TYPE));
	B = (DATA_TYPE*)malloc(NK*NJ*sizeof(DATA_TYPE));
	C = (DATA_TYPE*)malloc(NJ*NM*sizeof(DATA_TYPE));
	D = (DATA_TYPE*)malloc(NM*NL*sizeof(DATA_TYPE));
	E = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE));
	F = (DATA_TYPE*)malloc(NJ*NL*sizeof(DATA_TYPE));
	G = (DATA_TYPE*)malloc(NI*NL*sizeof(DATA_TYPE));
	G_outputFromGpu = (DATA_TYPE*)malloc(NI*NL*sizeof(DATA_TYPE));

	int i;
	init_array(A, B, C, D);
	read_cl_file();
	cl_initialization_fusion();
	//cl_initialization();
	cl_mem_init(A, B, C, D, E, F, G);
	cl_load_prog();

	cl_launch_kernel();

	errcode = clEnqueueReadBuffer(clCommandQue[0], g_mem_obj, CL_TRUE, 0, sizeof(DATA_TYPE) * NI * NL, G_outputFromGpu, 0, NULL, NULL);
	if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n");

	t_start = rtclock();
	mm3_cpu(A, B, C, D, E, F, G);
	t_end = rtclock(); 
	fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start);   
	compareResults(G, G_outputFromGpu);
	cl_clean_up();

	free(A);
	free(B);
	free(C);
	free(D);
	free(E);
	free(F);
	free(G);
	free(G_outputFromGpu);

	return 0;
}
开发者ID:zhangfengthu,项目名称:CoRunBench,代码行数:59,代码来源:3mm.c


示例11: main

int main(int argc, char** argv)
{
	double t_start, t_end;

	/* Array declaration */
	DATA_TYPE A[NI][NK];
	DATA_TYPE B[NK][NJ];
	DATA_TYPE C[NJ][NM];
	DATA_TYPE D[NM][NL];
	DATA_TYPE E[NI][NJ];
	DATA_TYPE E_gpu[NI][NJ];	
	DATA_TYPE F[NJ][NL];
	DATA_TYPE F_gpu[NJ][NL];
	DATA_TYPE G[NI][NL];
	DATA_TYPE G_outputFromGpu[NI][NL];

	/* INItialize array. */
	iNIt_array(A, B, C, D);
    
	#pragma hmpp <group1> allocate

	#pragma hmpp <group1> loopa advancedload, args[a;b;e]
	#pragma hmpp <group1> loopb advancedload, args[f;c;d]
	#pragma hmpp <group1> loopc advancedload, args[g]

	t_start = rtclock();
	#pragma hmpp <group1> loopa callsite, args[a;b;e].advancedload=true, asynchronous
	threeMMloopa(A, B, E_gpu);
	#pragma hmpp <group1> loopa synchronize
	#pragma hmpp <group1> loopb callsite, args[f;c;d].advancedload=true, asynchronous
	threeMMloopb(C, D, F_gpu);
	#pragma hmpp <group1> loopb synchronize
	#pragma hmpp <group1> loopc callsite, args[g;e;f].advancedload=true, asynchronous
	threeMMloopc(E_gpu, F_gpu, G_outputFromGpu);
	#pragma hmpp <group1> loopc synchronize

	t_end = rtclock();
	fprintf(stderr, "GPU Runtime: %0.6lfs\n", t_end - t_start);
    
	#pragma hmpp <group1> loopa delegatedstore, args[a;b]
	#pragma hmpp <group1> loopb delegatedstore, args[c;d]
	#pragma hmpp <group1> loopc delegatedstore, args[g;e;f]

	#pragma hmpp <group1> release
	
	t_start = rtclock();

	threeMMloopa(A, B, E);
	threeMMloopb(C, D, F);
	threeMMloopc(E, F, G);

	t_end = rtclock();
	fprintf(stderr, "CPU Runtime: %0.6lfs\n", t_end - t_start);

	compareResults(G, G_outputFromGpu);

	return 0;
}
开发者ID:lnangong,项目名称:polybenchGpu,代码行数:58,代码来源:threemm.c


示例12: main

int main(int argc, char** argv)
{
	int m = M;
	int n = N;
	double t_start, t_end;

	/* Array declaration */
	DATA_TYPE float_n = 321414134.01;
	DATA_TYPE data[M + 1][N + 1];
	DATA_TYPE data_Gpu[M + 1][N + 1];
	DATA_TYPE symmat[M + 1][M + 1];
	DATA_TYPE symmat_outputFromGpu[M + 1][M + 1];	
	DATA_TYPE mean[M + 1];
	DATA_TYPE mean_Gpu[M + 1];

	/* Initialize array. */
	init_arrays(data, data_Gpu);
    
	#pragma hmpp <group1> allocate
	#pragma hmpp <group1> loopa advancedload, args[pmean;pdata;pfloat_n]
    
	#pragma hmpp <group1> loopc advancedload, args[psymmat]

	t_start = rtclock();
	
	#pragma hmpp <group1> loopa callsite, args[pmean;pdata;pfloat_n].advancedload=true, asynchronous
	covarLoopa(mean_Gpu, data_Gpu, float_n);
	#pragma hmpp <group1> loopa synchronize
	#pragma hmpp <group1> loopb callsite, args[pdata;pmean].advancedload=true, asynchronous
	covarLoopb(data_Gpu, mean_Gpu);
	#pragma hmpp <group1> loopb synchronize
	#pragma hmpp <group1> loopc callsite, args[psymmat;pdata].advancedload=true, asynchronous
	covarLoopc(symmat_outputFromGpu, data_Gpu);
	#pragma hmpp <group1> loopc synchronize

	t_end = rtclock();
	fprintf(stderr, "GPU Runtime: %0.6lfs\n", t_end - t_start);
    
    
	#pragma hmpp <group1> loopb delegatedstore, args[pmean]
    
	#pragma hmpp <group1> loopc delegatedstore, args[psymmat;pdata]
	#pragma hmpp <group1> release
	
	t_start = rtclock();
	
	covarLoopa(mean, data, float_n);
	covarLoopb(data, mean);
	covarLoopc(symmat, data);
	
	t_end = rtclock();
	fprintf(stderr, "CPU Runtime: %0.6lfs\n", t_end - t_start);
	
	compareResults(symmat, symmat_outputFromGpu);

	return 0;
}
开发者ID:3upperm2n,项目名称:opt_polybench,代码行数:57,代码来源:covar.c


示例13: main

int main(int argc, char* argv[]) 
//int main(void) 
{
	double t_start, t_end;
	
	DATA_TYPE* data;
	DATA_TYPE* mean;
	DATA_TYPE* stddev;
	DATA_TYPE* symmat;
	DATA_TYPE* symmat_outputFromGpu;
        if(argc==2){
          printf("arg 1 = %s\narg 2 = %s\n", argv[0], argv[1]);
          cpu_offset = atoi(argv[1]);
        }


	data = (DATA_TYPE*)malloc((M + 1)*(N + 1)*sizeof(DATA_TYPE));
	mean = (DATA_TYPE*)malloc((M + 1)*sizeof(DATA_TYPE));
	stddev = (DATA_TYPE*)malloc((M + 1)*sizeof(DATA_TYPE));
	symmat = (DATA_TYPE*)malloc((M + 1)*(N + 1)*sizeof(DATA_TYPE));
	symmat_outputFromGpu = (DATA_TYPE*)malloc((M + 1)*(N + 1)*sizeof(DATA_TYPE));
	
	init_arrays(data);
	read_cl_file();
	cl_initialization_fusion();
	//cl_initialization();
	cl_mem_init(data, mean, stddev, symmat);
	cl_load_prog();

	double start = rtclock();
	cl_launch_kernel();
	double end = rtclock(); 
	fprintf(stdout, "CAUTION:CPU offset %d %% GPU Runtime: %0.6lf s\n",cpu_offset, (end - start));
	//fprintf(stdout, "CAUTION:CPU offset %d %% GPU Runtime: %0.6lf s\n",cpu_offset, 1000*(end - start));

	errcode = clEnqueueReadBuffer(clCommandQue[0], symmat_mem_obj, CL_TRUE, 0, (M+1) * (N+1) * sizeof(DATA_TYPE), symmat_outputFromGpu, 0, NULL, NULL);
	if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n");

	t_start = rtclock();
	correlation(data, mean, stddev, symmat);
	t_end = rtclock(); 
	fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start);   


	compareResults(symmat, symmat_outputFromGpu);
	cl_clean_up();
	
	free(data);
	free(mean);
	free(stddev);
	free(symmat);
	free(symmat_outputFromGpu);
	
    return 0;
}
开发者ID:zhangfengthu,项目名称:CoRunBench,代码行数:55,代码来源:correlation.c


示例14: main

int main() {
  double t_start, t_end;

  init_arrays();	
  syrkGPU();
  t_start = rtclock();
  syrk();
  t_end = rtclock();
  fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start);
  compareResults();
  return 0;
}
开发者ID:rcfsousa,项目名称:Polybench_OpenMP,代码行数:12,代码来源:syrk_m.c


示例15: SpMM

void SpMM(Csr<ValueType>* m1, Csr<ValueType>* m2, int num_buckets) {

  vector<FastHash<int, ValueType>* > result_map(m1->num_rows);
  for (auto& v : result_map) {
    v = new FastHash<int, ValueType>(num_buckets);
  }

  cout << "Starting SpMM..." << endl;

  float res = 0;
  double before = rtclock();
  for(int i=0;i<m1->num_rows;i++) {
    for(int j=m1->rows[i];j<m1->rows[i+1];j++) {
      int cola = m1->cols[j];
      __m512d a = _mm512_set1_pd(m1->vals[j]);
      for(int k=m2->rows[cola];k<m2->rows[cola] + m2->row_lens[cola];k+=16) {
        __m512d *pb1 = (__m512d *)(&(m2->vals[k]));
        __m512d *pb2 = (__m512d *)(&(m2->vals[k]) + 8);
        __m512i *pcols = (__m512i *)(&(m2->cols[k]));
        __m512d c1 = _mm512_mul_pd(a, *pb1);
        __m512d c2 = _mm512_mul_pd(a, *pb2);
        for(int x=0;x<8;x++) {
          int col = ((int *)pcols)[x];
          if (col == -1) {
            continue;
          }
          ValueType val = ((ValueType *)(&c1))[x];
          result_map[i]->Reduce(col, val);
          res += val;
        }

        for (int x = 0; x < 8; ++x) {
          int col = ((int *)pcols)[x+8];
          if (col == -1) {
            continue;
          }
          ValueType val = ((ValueType *)(&c2))[x];
          result_map[i]->Reduce(col, val);
          res += val;
        }
      }
    }
  }

  double after = rtclock();
  cout << "res: " << res << endl;

  cout << RED << "[****Result****] ========> *SIMD Naive* time: " << after - before << " secs." << RESET << endl;

  for (auto& v : result_map) {
    delete v;
  }
}
开发者ID:CheYulin,项目名称:irreg-simd,代码行数:53,代码来源:spmm.cpp


示例16: main

int main(int argc, char** argv)
{
  double t_start, t_end;

  DATA_TYPE* A;
  DATA_TYPE* B;
  DATA_TYPE* C;
  DATA_TYPE* D;
  DATA_TYPE* E;
  DATA_TYPE* F;
  DATA_TYPE* G;
  DATA_TYPE* G_outputFromGpu;

  A = (DATA_TYPE*)malloc(NI*NK*sizeof(DATA_TYPE));
  B = (DATA_TYPE*)malloc(NK*NJ*sizeof(DATA_TYPE));
  C = (DATA_TYPE*)malloc(NJ*NM*sizeof(DATA_TYPE));
  D = (DATA_TYPE*)malloc(NM*NL*sizeof(DATA_TYPE));
  E = (DATA_TYPE*)malloc(NI*NJ*sizeof(DATA_TYPE));
  F = (DATA_TYPE*)malloc(NJ*NL*sizeof(DATA_TYPE));
  G = (DATA_TYPE*)malloc(NI*NL*sizeof(DATA_TYPE));
  G_outputFromGpu = (DATA_TYPE*)malloc(NI*NL*sizeof(DATA_TYPE));

  fprintf(stdout, "<< Linear Algebra: 3 Matrix Multiplications (E=A.B; F=C.D; G=E.F) >>\n");

  init_array(A, B, C, D);

  t_start = rtclock();
  mm3_OMP(A, B, C, D, E, F, G_outputFromGpu);
  t_end = rtclock();	

  fprintf(stdout, "GPU Runtime: %0.6lfs\n", t_end - t_start);

  t_start = rtclock();
  mm3_cpu(A, B, C, D, E, F, G);
  t_end = rtclock();

  fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start);

  compareResults(G, G_outputFromGpu);

  free(A);
  free(B);
  free(C);
  free(D);
  free(E);
  free(F);
  free(G);
  free(G_outputFromGpu);

  return 0;
}
开发者ID:rcfsousa,项目名称:Polybench_OpenMP,代码行数:51,代码来源:3mm.c


示例17: main

int main()
{
    int i, j, k, x, y;
    unsigned int distanceYtoX, distanceYtoK, distanceKtoX;
    /*
     * pathDistanceMatrix is the adjacency matrix (square) with
     * dimension length equal to number of nodes in the graph.
     */
    unsigned int width = NUM_NODES;
    unsigned int yXwidth;

    init_array();

#ifdef PERFCTR
    PERF_INIT; 
#endif

    IF_TIME(t_start = rtclock());

#pragma scop
    for(k=0; k < NUM_NODES; k++)
    {
        for(y=0; y < NUM_NODES; y++)
        {
            for(x=0; x < NUM_NODES; x++)
            {
                pathDistanceMatrix[y][x] = ((pathDistanceMatrix[y][k] + pathDistanceMatrix[k][x]) < pathDistanceMatrix[y][x]) ? (pathDistanceMatrix[y][k] + pathDistanceMatrix[k][x]):pathDistanceMatrix[y][x];
            }
        }
    }
#pragma endscop

    IF_TIME(t_end = rtclock());
    IF_TIME(fprintf(stdout, "time = %0.6lfs\n", t_end - t_start));

#ifdef PERFCTR
    PERF_EXIT; 
#endif

    if (fopen(".test", "r")) {
#ifdef MPI
        if (my_rank == 0) {
            print_array();
        }
#else
        print_array();
#endif
    }

    return 0;
}
开发者ID:intersense,项目名称:pluto-gw,代码行数:51,代码来源:floyd.c


示例18: main

int main(int argc, char** argv)
{
    double t_start, t_end;

    /* Array declaration */
    DATA_TYPE A[NI][NK];
    DATA_TYPE B[NK][NJ];
    DATA_TYPE C[NI][NJ];
    DATA_TYPE C_gpu[NI][NJ];
    DATA_TYPE D[NJ][NL];
    DATA_TYPE E[NI][NL];
    DATA_TYPE E_outputFromGpu[NI][NL];

    /* Initialize array. */
    init_array(A, B, C, C_gpu, D, E, E_outputFromGpu);

#pragma hmpp <group1> allocate

#pragma hmpp <group1> loopa advancedload, args[a;b;c]
#pragma hmpp <group1> loopb advancedload, args[d;e]

    t_start = rtclock();
#pragma hmpp <group1> loopa callsite, args[a;b;c].advancedload=true, asynchronous
    twoMMloopa(A, B, C_gpu);

#pragma hmpp <group1> loopa synchronize
#pragma hmpp <group1> loopb callsite, args[c;d;e].advancedload=true, asynchronous
    twoMMloopb(C_gpu, D, E_outputFromGpu);
#pragma hmpp <group1> loopb synchronize

    t_end = rtclock();
    fprintf(stderr, "GPU Runtime: %0.6lfs\n", t_end - t_start);

#pragma hmpp <group1> loopa delegatedstore, args[a;b]
#pragma hmpp <group1> loopb delegatedstore, args[c;d;e]

#pragma hmpp <group1> release

    t_start = rtclock();

    twoMMloopa(A, B, C);
    twoMMloopb(C, D, E);

    t_end = rtclock();
    fprintf(stderr, "CPU Runtime: %0.6lfs\n", t_end - t_start);

    compareResults(E, E_outputFromGpu);

    return 0;
}
开发者ID:lnangong,项目名称:polybenchGpu,代码行数:50,代码来源:twomm.c


示例19: main

int main(void) 
{
	double t_start, t_end;
	
	DATA_TYPE* A;
	DATA_TYPE* r;
	DATA_TYPE* s;
	DATA_TYPE* p;
	DATA_TYPE* q;
	DATA_TYPE* s_outputFromGpu;
	DATA_TYPE* q_outputFromGpu;
 	
	A = (DATA_TYPE*)malloc(NX*NY*sizeof(DATA_TYPE));
	r = (DATA_TYPE*)malloc(NX*sizeof(DATA_TYPE));
	s = (DATA_TYPE*)malloc(NY*sizeof(DATA_TYPE));
	p = (DATA_TYPE*)malloc(NY*sizeof(DATA_TYPE));
	q = (DATA_TYPE*)malloc(NX*sizeof(DATA_TYPE));
	s_outputFromGpu = (DATA_TYPE*)malloc(NY*sizeof(DATA_TYPE));
	q_outputFromGpu = (DATA_TYPE*)malloc(NX*sizeof(DATA_TYPE));
	
	init_array(A, p, r);	
	read_cl_file();
	cl_initialization();
	cl_mem_init(A, r, s, p, q);
	cl_load_prog();

	cl_launch_kernel();

	errcode = clEnqueueReadBuffer(clCommandQue, s_mem_obj, CL_TRUE, 0, NY*sizeof(DATA_TYPE), s_outputFromGpu, 0, NULL, NULL);
	errcode = clEnqueueReadBuffer(clCommandQue, q_mem_obj, CL_TRUE, 0, NX*sizeof(DATA_TYPE), q_outputFromGpu, 0, NULL, NULL);
	if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n");  

	t_start = rtclock();
	bicg_cpu(A, r, s, p, q);
	t_end = rtclock(); 
	fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start);   
	compareResults(s, s_outputFromGpu, q, q_outputFromGpu);
	cl_clean_up();
	
	free(A);
	free(r);
	free(s);
	free(p);
	free(q);
	free(s_outputFromGpu);
	free(q_outputFromGpu);
	
    	return 0;
}
开发者ID:3upperm2n,项目名称:opt_polybench,代码行数:49,代码来源:bicg.c


示例20: main

int main(int argc, char** argv)
{
	int m = M;
	int n = N;
	double t_start, t_end;

	/* Array declaration */
	DATA_TYPE float_n = 321414134.01;
	DATA_TYPE eps = 0.005;
	DATA_TYPE data[M + 1][N + 1];
	DATA_TYPE data_Gpu[M + 1][N + 1];
	DATA_TYPE mean[M + 1];
	DATA_TYPE mean_Gpu[M + 1];
	DATA_TYPE stddev[M + 1];
	DATA_TYPE stddev_Gpu[M + 1];
	DATA_TYPE symmat[M + 1][M + 1];
	DATA_TYPE symmat_outputFromGpu[M + 1][M + 1];

	/* Initialize array. */
	init_arrays(data, data_Gpu);
	
	#pragma hmpp corr allocate
    
	#pragma hmpp corr advancedload, args[pdata;psymmat;pstddev;pmean;pfloat_n;peps]

	t_start = rtclock();
	
	#pragma hmpp corr callsite, args[pdata;psymmat;pstddev;pmean;pfloat_n;peps].advancedload=true, asynchronous
	runCorr(data_Gpu, symmat_outputFromGpu, stddev_Gpu, mean_Gpu, float_n, eps);
    
	#pragma hmpp corr synchronize

	t_end = rtclock();
	fprintf(stderr, "GPU Runtime: %0.6lfs\n", t_end - t_start);
    
	#pragma hmpp corr delegatedstore, args[pdata;psymmat;pstddev;pmean]
	#pragma hmpp corr release
	
	t_start = rtclock();
	
	runCorr(data, symmat, stddev, mean, float_n, eps);
	
	t_end = rtclock();
	fprintf(stderr, "CPU Runtime: %0.6lfs\n", t_end - t_start);
	
	compareResults(symmat, symmat_outputFromGpu);

	return 0;
}
开发者ID:3upperm2n,项目名称:opt_polybench,代码行数:49,代码来源:corr.c



注:本文中的rtclock函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
C++ rtdm_lock_get_irqsave函数代码示例发布时间:2022-05-30
下一篇:
C++ rtc_valid_tm函数代码示例发布时间:2022-05-30
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap