C++ cudaThreadSynchronize函数代码示例

OGeek|极客世界-中国程序员成长平台 › 门户 › 编程› C++›C++教程

原作者: [db:作者] 来自: [db:来源] 收藏邀请

本文整理汇总了C++中cudaThreadSynchronize函数的典型用法代码示例。如果您正苦于以下问题：C++ cudaThreadSynchronize函数的具体用法？C++ cudaThreadSynchronize怎么用？C++ cudaThreadSynchronize使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了cudaThreadSynchronize函数的20个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: runCalcU

Fflcun2::ParticlesPosition Fflcun2::run(int nSteps) {
	for(int j = 0; j < nSteps; j++) {
		runCalcU(blocks, threads, devMatrixes);
		cudaThreadSynchronize();

		if (cPar->ft == ConstParams::ROTATING) {
			chPar->hExtX = cPar->hExtXInit * sin(2 * M_PI * cPar->rff * chPar->time);
			chPar->hExtY = cPar->hExtYInit * cos(2 * M_PI * cPar->rff * chPar->time);
		}
		fillGloabalChangable(chPar);
		cudaThreadSynchronize();

		runOneStep(blocks, threads, devMatrixes);
		cudaThreadSynchronize();

		runApplyDeltas(blocks, threads, devMatrixes);
		cudaThreadSynchronize();
		chPar->time += chPar->dTimeCurrent;
	}

/*	cudaMemcpy(partPos.x, devMatrixes.x, sizeof(float) * cPar->nPart, cudaMemcpyDeviceToHost);
	cudaMemcpy(partPos.y, devMatrixes.y, sizeof(float) * cPar->nPart, cudaMemcpyDeviceToHost);
	cudaMemcpy(partPos.z, devMatrixes.z, sizeof(float) * cPar->nPart, cudaMemcpyDeviceToHost);
	cudaMemcpy(partPos.theta, devMatrixes.theta, sizeof(float) * cPar->nPart, cudaMemcpyDeviceToHost);
	cudaMemcpy(partPos.phy, devMatrixes.phy, sizeof(float) * cPar->nPart, cudaMemcpyDeviceToHost);
*/
	cudaMemcpy(partPos.x, devMatrixes.x, sizeof(float) * cPar->nPart, cudaMemcpyDeviceToHost);
	cudaMemcpy(partPos.y, devMatrixes.y, sizeof(float) * cPar->nPart, cudaMemcpyDeviceToHost);
	cudaMemcpy(partPos.z, devMatrixes.z, sizeof(float) * cPar->nPart, cudaMemcpyDeviceToHost);
	cudaMemcpy(partPos.theta, devMatrixes.theta, sizeof(float) * cPar->nPart, cudaMemcpyDeviceToHost);
	cudaMemcpy(partPos.phy, devMatrixes.phy, sizeof(float) * cPar->nPart, cudaMemcpyDeviceToHost);

	return partPos;
}

开发者ID:psci2195，项目名称:fflcu，代码行数:34，代码来源:fflcun2.cpp

示例2: benchmark

void
benchmark(int iterations) 
{
    // allocate memory for result
    unsigned int *d_result;
    unsigned int size = width * height * sizeof(unsigned int);
    cutilSafeCall( cudaMalloc( (void**) &d_result, size));

    // warm-up
    gaussianFilterRGBA(d_img, d_result, d_temp, width, height, sigma, order, nthreads);

    cutilSafeCall( cudaThreadSynchronize() );
    cutilCheckError( cutStartTimer( timer));

    // execute the kernel
    for(int i=0; i<iterations; i++) {
        gaussianFilterRGBA(d_img, d_result, d_temp, width, height, sigma, order, nthreads);
    }

    cutilSafeCall( cudaThreadSynchronize() );
    cutilCheckError( cutStopTimer( timer));

    // check if kernel execution generated an error
    cutilCheckMsg("Kernel execution failed");

    printf("Processing time: %f (ms)\n", cutGetTimerValue( timer));
    printf("%.2f Mpixels/sec\n", (width*height*iterations / (cutGetTimerValue( timer) / 1000.0f)) / 1e6);

    cutilSafeCall(cudaFree(d_result));
}

开发者ID:AnkurAnandapu，项目名称:ocelot-fork，代码行数:30，代码来源:recursiveGaussian-host.cpp

示例3: device_X

void RadialBasisFunction::Train(HostMatrix<float> &Input, HostMatrix<float> &Target){

	//std::cout << "Training" << std::endl;

	//	c_width = (float*) malloc(sizeof(float)*network_size);
	//	memset(c_width,0,sizeof(float)*network_size);

	DeviceMatrix<float> device_X(Input);

	//std::cout << "KMeans" << std::endl;	
	clock_t initialTime = clock();
	KMeans KM;
	KM.SetSeed(seed);
	dCenters = KM.Execute(device_X,network_size);

	cudaThreadSynchronize();
	times[0] = (clock() - initialTime);

	//std::cout << "Adjust Widths" << std::endl;
	/*Adjust width using mean of distance to neighbours*/
	initialTime = clock();
	AdjustWidths(number_neighbours);

	cudaThreadSynchronize();
	times[1] = (clock() - initialTime);

	/*Training weights and scaling factor*/
	HostMatrix<float> TargetArr(Target.Rows(),NumClasses);
	memset(TargetArr.Pointer(),0,sizeof(float)*TargetArr.Elements());

	for(int i = 0; i < Target.Rows(); i++){
		TargetArr(i,((int)Target(i,0)-1)) = 1;
	}

	DeviceMatrix<float> d_Target(TargetArr);

	//std::cout << "Calculating Weights" << std::endl;

	initialTime = clock();

	DeviceMatrix<float> device_activ_matrix(device_X.Rows(),dCenters.Rows(),ColumnMajor);

	KernelActivationMatrix(device_activ_matrix.Pointer(),device_X.Pointer(),dCenters.Pointer(),device_X.Columns(),dCenters.Columns(),device_activ_matrix.Columns(),device_activ_matrix.Rows(),scaling_factor,device_c_width.Pointer());

	DeviceMatrix<float> d_Aplus = UTILS::pseudoinverse(device_activ_matrix);

	dWeights = DeviceMatrix<float>(d_Aplus.Rows(),d_Target.Columns());

	d_Aplus.Multiply(d_Aplus,d_Target,dWeights);


	/*Return Weights and Centers*/
	cudaThreadSynchronize();
	times[2] = (clock() - initialTime);

	// cudaMemcpy(c_width,device_c_width.Pointer(),sizeof(float)*device_c_width.Length(),cudaMemcpyDeviceToHost);
	//	this->Weights = HostMatrix<float>(dWeights);		
	//	this->Centers = HostMatrix<float>(dCenters);

}

开发者ID:kallaballa，项目名称:Neurocid，代码行数:60，代码来源:RadialBasisFunction.cpp

示例4: cudaGraphicsMapResources

//-----------------------------------------------------------------------------
void QGLImageGpuWidget::fillPbo(iu::ImageGpu_8u_C4* output)
{
  // map GL <-> CUDA resource
  uchar4 *d_dst = NULL;
  size_t start;
  cudaGraphicsMapResources(1, &cuda_pbo_resource_, 0);
  cudaGraphicsResourceGetMappedPointer((void**)&d_dst, &start, cuda_pbo_resource_);

  // get image data
  iuprivate::cuCopyImageToPbo(image_, num_channels_, bit_depth_, d_dst, min_, max_);
  cudaThreadSynchronize();

  // get overlays
  iuprivate::OverlayList::iterator it;
  for ( it=overlay_list_.begin() ; it != overlay_list_.end(); it++ )
    if ((*it)->isActive())
      cuCopyOverlayToPbo((*it), d_dst, image_->size());
  cudaThreadSynchronize();

  if (output != NULL)
  {
    // copy final pbo to output
    iu::ImageGpu_8u_C4 temp(d_dst, image_->width(), image_->height(),
                            image_->width()*sizeof(uchar4), true);
    iu::copy(&temp, output);
  }

  // unmap GL <-> CUDA resource
  cudaGraphicsUnmapResources(1, &cuda_pbo_resource_, 0);
}

开发者ID:ankurhanda，项目名称:imageutilities，代码行数:31，代码来源:qglimagegpuwidget.cpp

示例5: cudaThreadSynchronize

OsdCudaGLVertexBuffer::~OsdCudaGLVertexBuffer() {

    cudaThreadSynchronize();
    unmap();
    cudaGraphicsUnregisterResource(_cudaResource);
    cudaThreadSynchronize();
    glDeleteBuffers(1, &_vbo);
}

开发者ID:chrislowe5，项目名称:OpenSubdiv-dev，代码行数:8，代码来源:cudaGLVertexBuffer.cpp

示例6: main

int main(int argc, char** argv)
{

	float fTotalTime = 0;

// 	int TARGET_WIDTH=atoi(argv[2]);
// 	int TARGET_HEIGHT=atoi(argv[3]);
// 	bool visualize_results=atoi(argv[4]);
// 	unsigned int kernel_size=atoi(argv[2]);
 	int gpuNr=atoi(argv[2]);
 	checkCudaErrors(cudaSetDevice(gpuNr));

	
	IplImage* gray_image = cvLoadImage(argv[1],CV_LOAD_IMAGE_GRAYSCALE);
	unsigned char * d_input_image;
	unsigned char * d_output_image;
	int widthImage=gray_image->width;
	int heightImage=gray_image->height;
	
	IplImage *output_image = cvCreateImage(cvSize(widthImage,heightImage), IPL_DEPTH_8U, 1);
	for( int i=0;i<heightImage;i++)
	  for( int j=0;j<widthImage;j++)
	    output_image->imageData[i*widthImage+j]=255;
	
	  	  unsigned int * d_histogram;
	int total_threads=256;	  
	  cudaMalloc(&d_histogram,sizeof(unsigned int)*256*total_threads);
	checkCudaErrors(cudaMalloc(&d_input_image,widthImage*heightImage*sizeof(unsigned char)));  
	checkCudaErrors(cudaMalloc(&d_output_image,widthImage*heightImage*sizeof(unsigned char)));
	checkCudaErrors(cudaMemcpy(d_input_image,gray_image->imageData,widthImage*heightImage*sizeof(unsigned char),cudaMemcpyHostToDevice));
	unsigned int windows_array[4]={15,17,25,31};
	int total_implementations=4;
	double elapsed_time;
	for (int i=1;i<=total_implementations;i++)
	{
	  for( int j=0;j<4;j++)
	  {
	timer my_timer;  
	MedianFilterUcharCUDA(d_input_image,d_output_image,d_histogram,widthImage,heightImage,windows_array[j],16,16,i);
	cudaThreadSynchronize();
	elapsed_time=my_timer.elapsed();
	printf("elapsed_time for implementation %d for window size %d was %f \n",i,windows_array[j],elapsed_time);
	  }
	}
	timer array_timer;
	arrayFireRows(d_input_image,d_output_image,widthImage,heightImage,3,16,16);
		cudaThreadSynchronize();
	elapsed_time=array_timer.elapsed();
	printf("elapsed_time for array fire was %f \n",elapsed_time);
	checkCudaErrors(cudaMemcpy(output_image->imageData,d_output_image,widthImage*heightImage*sizeof(unsigned char),cudaMemcpyDeviceToHost));
// 	_medianfilter((unsigned char *)gray_image->imageData, (unsigned char *)output_image->imageData, widthImage, heightImage);
	
	cvSaveImage("output.jpg",output_image);
	



}

开发者ID:aglenis，项目名称:gpu_medfilter，代码行数:58，代码来源:main_large_windows.cpp

示例7: checkCudaErrors

void
CudaInterface::fillParamMem(ParamMem_t& pmem, int byteVal) {
    checkCudaErrors(cudaSetDevice(mDevID));
    checkCudaErrors(cudaGetDevice(&mDevID));
    std::cout << "  setting " << pmem.totalSize * sizeof(float) << " bytes to " <<  pmem.base << "\n";
    if (pmem.device) {
        checkCudaErrors(cudaThreadSynchronize());
        checkCudaErrors(cudaMemset(pmem.base, byteVal, pmem.totalSize * sizeof(float)));
        checkCudaErrors(cudaThreadSynchronize());
    } else
        memset(pmem.base, byteVal, pmem.totalSize * sizeof(float));
}

开发者ID:ethancaballero，项目名称:Multiplicative-Recursive-Neural-Net，代码行数:12，代码来源:host-device_interface.cpp

示例8: time_ongpu

void time_ongpu(int TA, int TB, int m, int k, int n)
{
    int iter = 10;
    float *a = random_matrix(m,k);
    float *b = random_matrix(k,n);

    int lda = (!TA)?k:m;
    int ldb = (!TB)?n:k;

    float *c = random_matrix(m,n);

    float *a_cl = cuda_make_array(a, m*k);
    float *b_cl = cuda_make_array(b, k*n);
    float *c_cl = cuda_make_array(c, m*n);

    int i;
    clock_t start = clock(), end;
    for(i = 0; i<iter; ++i){
        gemm_ongpu(TA,TB,m,n,k,1,a_cl,lda,b_cl,ldb,1,c_cl,n);
        cudaThreadSynchronize();
    }
    double flop = ((double)m)*n*(2.*k + 2.)*iter;
    double gflop = flop/pow(10., 9);
    end = clock();
    double seconds = sec(end-start);
    printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s, %lf GFLOPS\n",m,k,k,n, TA, TB, seconds, gflop/seconds);
    cuda_free(a_cl);
    cuda_free(b_cl);
    cuda_free(c_cl);
    free(a);
    free(b);
    free(c);
}

开发者ID:hyperchris，项目名称:Yolo，代码行数:33，代码来源:gemm.c

示例9: cudaGetDeviceProperties

void Fflcun2::setConfig(std::string fname) {	//WARNING - test it
	this->freeMemory();
	cPar = new ConstParams;
	chPar = new ChangableParams;
	configFileName = fname;
	this->setSettings();

	cudaDeviceProp deviceProp;
	cudaGetDeviceProperties(&deviceProp, 0);
	//TODO - throw exception if no device found

	blocks = ceil((float)cPar->nPart / SHARED_ARRAY);
	blocks += blocks % deviceProp.multiProcessorCount;
	threads = cPar->nPart / blocks;
	cPar->nPart = threads * blocks;
	std::cout << "After correction number of particles = " << cPar->nPart << std::endl;

	srand(time(NULL));

	this->allocMemory();
	this->initDevMatrixes();

	fillGloabalChangable(chPar);
	fillGloabalConstant(cPar);

	runSetupKernel(blocks, threads, devMatrixes);
	cudaThreadSynchronize();
}

开发者ID:psci2195，项目名称:fflcu，代码行数:28，代码来源:fflcun2.cpp

示例10: cufftDestroy

void gpuNUFFT::GpuNUFFTOperator::freeDeviceMemory(int n_coils)
{
  if (!gpuMemAllocated)
    return;

  cufftDestroy(fft_plan);
  // Destroy the cuFFT plan.
  if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
    printf("error at thread synchronization 9: %s\n",cudaGetErrorString(cudaGetLastError()));
  freeLookupTable();
  
  freeTotalDeviceMemory(data_indices_d,data_sorted_d,crds_d,gdata_d,sectors_d,sector_centers_d,NULL);//NULL as stop
  
  if (n_coils > 1 && deapo_d != NULL)
    cudaFree(deapo_d);
  
  if (this->applySensData())
    cudaFree(sens_d);
  
  if (this->applyDensComp())
    cudaFree(density_comp_d);

  showMemoryInfo();
  gpuMemAllocated = false;
}

开发者ID:davidssmith，项目名称:TRON，代码行数:25，代码来源:gpuNUFFT_operator.cpp

示例11: mpla_generic_dgemv

void mpla_generic_dgemv(struct mpla_vector* b, struct mpla_generic_matrix* A, struct mpla_vector* x, void (*mpla_dgemv_core)(struct mpla_vector*, struct mpla_generic_matrix*, struct mpla_vector*, struct mpla_instance*), struct mpla_instance* instance)
{
	// allocate redistributed vector
	struct mpla_vector x_redist;
	mpla_init_vector_for_block_rows(&x_redist, instance, x->vec_row_count);

	// redistribute input vector with row-block parallel distribution to column-block parallel distribution
	mpla_redistribute_vector_for_generic_dgesv(&x_redist, x, A, instance);
	
	// generic computation core: matrix-vector product
	mpla_dgemv_core(b, A, &x_redist, instance);

	// create sub-communicator for each process row
	int remain_dims[2];
	remain_dims[0]=0;
	remain_dims[1]=1;
	MPI_Comm row_comm;
	MPI_Cart_sub(instance->comm, remain_dims, &row_comm);

	// summation of block row results
	double* sum;
	cudaMalloc((void**)&sum, sizeof(double)*b->cur_proc_row_count);
	cudaThreadSynchronize();
	checkCUDAError("cudaMalloc");
	MPI_Allreduce(b->data, sum, b->cur_proc_row_count, MPI_DOUBLE, MPI_SUM, row_comm);
	cudaMemcpy(b->data, sum, sizeof(double)*b->cur_proc_row_count, cudaMemcpyDeviceToDevice);

	// cleanup
	cudaFree(sum);
	mpla_free_vector(&x_redist, instance);

	MPI_Comm_free(&row_comm);
}

开发者ID:zaspel，项目名称:MPLA，代码行数:33，代码来源:mpla.cpp

示例12: cudaLaunch

cudaError_t cudaLaunch(const void *entry)
{
	static cudaError_t (*nv_cudaLaunch)(const char *) = NULL;
	cudaError_t ret;
	struct timeval t;

	if(!nv_cudaLaunch) {
		nv_cudaLaunch = dlsym(RTLD_NEXT, "cudaLaunch");
		if(!nv_cudaLaunch) {
			fprintf(stderr, "failed to find symbol cudaLaunch: %s\n", dlerror());
			return cudaErrorSharedObjectSymbolNotFound;
		}
	}

	gettimeofday(&t, NULL);
	printf("[gvm] %lf intercepting cudaLaunch\n", t.tv_sec + t.tv_usec / 1000000.0);

	ret = nv_cudaLaunch(entry);

	cudaThreadSynchronize();

	gettimeofday(&t, NULL);
	printf("[gvm] %lf intercepted cudaLaunch\n", t.tv_sec + t.tv_usec / 1000000.0);


	return ret;
}

开发者ID:arnaudperin，项目名称:gpudb，代码行数:27，代码来源:intercept.c

示例13: synchronize

 /**
  * Synchronize with device.
  *
  * @param sync True to synchronize, false if not.
  */
 inline void synchronize(const bool sync = true) {
   #ifdef ENABLE_CUDA
   if (sync) {
     CUDA_CHECKED_CALL(cudaThreadSynchronize());
   }
   #endif
 }

开发者ID:JohannesBuchner，项目名称:LibBi，代码行数:12，代码来源:cuda.hpp

示例14: mvReductArraysToHost

void
mvReductArraysToHost ( int reduct_bytes )
{
  cutilSafeCall ( cudaMemcpy ( OP_reduct_h, OP_reduct_d, reduct_bytes,
                               cudaMemcpyDeviceToHost ) );
  cutilSafeCall ( cudaThreadSynchronize (  ) );
}

开发者ID:doru1004，项目名称:OP2-Common，代码行数:7，代码来源:op_cuda_rt_support.c

示例15: mvConstArraysToDevice

void
mvConstArraysToDevice ( int consts_bytes )
{
  cutilSafeCall ( cudaMemcpy ( OP_consts_d, OP_consts_h, consts_bytes,
                               cudaMemcpyHostToDevice ) );
  cutilSafeCall ( cudaThreadSynchronize (  ) );
}

开发者ID:doru1004，项目名称:OP2-Common，代码行数:7，代码来源:op_cuda_rt_support.c

示例16: cudaMemcpy

cudaError_t cudaMemcpy(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind)
{
	static cudaError_t (*nv_cudaMemcpy)(void *, const void *, size_t, enum cudaMemcpyKind) = NULL;
	cudaError_t ret;
	struct timeval t;

	if(!nv_cudaMemcpy) {
		nv_cudaMemcpy = dlsym(RTLD_NEXT, "cudaMemcpy");
		if(!nv_cudaMemcpy) {
			fprintf(stderr, "failed to find symbol cudaMemcpy: %s\n", dlerror());
			return cudaErrorSharedObjectSymbolNotFound;
		}
	}

	gettimeofday(&t, NULL);
	printf("[gvm] %lf intercepting cudaMemcpy\n", t.tv_sec + t.tv_usec / 1000000.0);

	ret = nv_cudaMemcpy(dst, src, count, kind);

	cudaThreadSynchronize();

	gettimeofday(&t, NULL);
	printf("[gvm] %lf intercepted cudaMemcpy( %lx %lx %ld %d ) = %d\n", t.tv_sec + t.tv_usec / 1000000.0, (unsigned long)dst, (unsigned long)src, count, kind, (int)ret);

	return ret;
}

开发者ID:arnaudperin，项目名称:gpudb，代码行数:26，代码来源:intercept.c

示例17: time_invocation_cuda

  double time_invocation_cuda(std::size_t num_trials, Function f, Arg1 arg1, Arg2 arg2, Arg3 arg3)
{
  cudaEvent_t start, stop;
  cudaEventCreate(&start);
  cudaEventCreate(&stop);

  cudaEventRecord(start);
  for(std::size_t i = 0;
      i < num_trials;
      ++i)
  {
    f(arg1,arg2,arg3);
  }
  cudaEventRecord(stop);
  cudaThreadSynchronize();

  float msecs = 0;
  cudaEventElapsedTime(&msecs, start, stop);

  cudaEventDestroy(start);
  cudaEventDestroy(stop);

  // return mean msecs
  return msecs / num_trials;
}

开发者ID:egaburov，项目名称:bulk，代码行数:25，代码来源:time_invocation_cuda.hpp

示例18: savegpuann

void savegpuann(const gpuann& nn, fann *ann, unsigned int instanceIndex)
{
  unsigned int neuronCount = ann->total_neurons;
  unsigned int weightsCount = ((ann->last_layer - 1)->last_neuron - 1)->last_con;

  chekedcudaMemcpyAsync(nn.h_tmp_sumArray,     nn.d_sumArray + neuronCount * instanceIndex,      neuronCount  * sizeof(fann_type), cudaMemcpyDeviceToHost);
  chekedcudaMemcpyAsync(nn.h_tmp_valuesArray,  nn.d_valuesArray + neuronCount * instanceIndex,   neuronCount  * sizeof(fann_type), cudaMemcpyDeviceToHost);
  chekedcudaMemcpyAsync(ann->weights,          nn.d_weightsArray + weightsCount * instanceIndex, weightsCount * sizeof(fann_type), cudaMemcpyDeviceToHost);
  cudaThreadSynchronize();

  struct fann_neuron *neuronsArray = ann->first_layer->first_neuron;
  struct fann_layer *last_layer = ann->last_layer;
  struct fann_layer *layer_it   = ann->first_layer;

  for(; layer_it != last_layer; layer_it++)
  {
    struct fann_neuron * last_neuron = layer_it->last_neuron;
    struct fann_neuron * neuron_it   = layer_it->first_neuron;
    for(; neuron_it != last_neuron; neuron_it++)
    {
      unsigned int currentNeuronShift  = neuron_it - neuronsArray;
      neuron_it->value = nn.h_tmp_valuesArray[currentNeuronShift];
      neuron_it->sum = nn.h_tmp_sumArray[currentNeuronShift];
    }
  }
}

开发者ID:verybigbadboy，项目名称:gpuann，代码行数:26，代码来源:gpuannDataCreator.cpp

示例19: keplereq_wrapper_c

// keplereq_wrapper_C:
//         C wrapper function to solve's Kepler's equation num times.  
// inputs: 
//         ph_ma:  pointer to beginning element of array of doubles containing mean anomaly in radians 
//         ph_ecc: pointer to beginning element of array of doubles containing eccentricity 
//         num:    integer size of input arrays 
//         ph_eccanom: pointer to beginning element of array of doubles eccentric anomaly in radians 
// outputs:
//         ph_eccanom: values overwritten with eccentric anomaly
// assumptions:
//         input mean anomalies between 0 and 2pi
//         input eccentricities between 0 and 1
//         all three arrays have at least num elements 
//
void keplereq_wrapper_c(double *ph_ma, double *ph_ecc, int num, double *ph_eccanom)
{
	int gpuid = init_cuda();
	// put vectors in thrust format from raw points
	thrust::host_vector<double> h_ecc(ph_ecc,ph_ecc+num);
	thrust::host_vector<double> h_ma(ph_ma,ph_ma+num);

	cutCreateTimer(&memoryTime);  	cutCreateTimer(&kernelTime);
	cutResetTimer(memoryTime);    	cutResetTimer(kernelTime);

	if(gpuid>=0)
	{
	cutStartTimer(memoryTime);
	// transfer input params to GPU
	thrust::device_vector<double> d_ecc = h_ecc;
	thrust::device_vector<double> d_ma = h_ma;
	// allocate mem on GPU
	thrust::device_vector<double> d_eccanom(num);
	cudaThreadSynchronize();
	cutStopTimer(memoryTime);
	
	// distribute the computation to the GPU
	cutStartTimer(kernelTime);
	thrust::for_each(
	   thrust::make_zip_iterator(thrust::make_tuple(d_ma.begin(),d_ecc.begin(),d_eccanom.begin())),
	   thrust::make_zip_iterator(thrust::make_tuple(d_ma.end(),  d_ecc.end(),  d_eccanom.end())), 
	   keplereq_functor() );
	cudaThreadSynchronize();
	cutStopTimer(kernelTime);

	// transfer results back to host
	cutStartTimer(memoryTime);
	thrust::copy(d_eccanom.begin(),d_eccanom.end(),ph_eccanom);
	cudaThreadSynchronize();
	cutStopTimer(memoryTime);
	}
	else
	{
	// distribute the computation to the CPU
	cutStartTimer(kernelTime);
	thrust::for_each(
	   thrust::make_zip_iterator(thrust::make_tuple(h_ma.begin(),h_ecc.begin(),ph_eccanom)),
	   thrust::make_zip_iterator(thrust::make_tuple(h_ma.end(),  h_ecc.end(),  ph_eccanom+num)), 
	   keplereq_functor() );
	cutStopTimer(kernelTime);	
	}
}

开发者ID:jkrick，项目名称:idlbin，代码行数:61，代码来源:keplereq.c

示例20: op_fetch_data

void
op_fetch_data ( op_dat dat )
{
  cutilSafeCall ( cudaMemcpy ( dat->data, dat->data_d,
                               dat->size * dat->set->size,
                               cudaMemcpyDeviceToHost ) );
  cutilSafeCall ( cudaThreadSynchronize (  ) );
}

开发者ID:doru1004，项目名称:OP2-Common，代码行数:8，代码来源:op_cuda_rt_support.c

注：本文中的cudaThreadSynchronize函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。

鲜花

握手

雷人

路过

鸡蛋

该文章已有0人参与评论

请发表评论

全部评论

专题导读

More+

10-27 六六分期app的软件客服如何联系？(六六分期

11-06 可心卡盟:win10系统火狐flash插件崩溃怎么

11-06 亲亲特价:怎么删除回收站图标

11-06 济南大学虚拟社区:鲁大师节能降温的具体办

11-06 xlueops.exe:无线网络安装向导

11-06 女斗合众国:win7系统cf与主机连接不稳定怎

11-06 0xc000022-[cf烟雾头]cf怎么调烟雾头

11-06 qizideyouhuo:应用程序无法正常启动0xc0000

11-06 ipz-185:win7系统vcf文件怎么打开

11-06 傻哥蹦迪:win10系统s4怎么打开usb调试

11-06 八神浩树gtaste:回收站清空了怎么恢复

11-06 妖尾之黑色守护:win10系统电脑没有1440x900

11-06 校园至尊魔王小说:win7系统浏览网页时字体

11-06 女斗合众国:win10系统访问共享文件夹提示请

11-06 tokyo hot n0654:恢复win7系统默认字体一招

11-06 雨酷仙境:设置win7系统转移临时文件夹腾出

11-06 阿穆纳伊之杖:win7系统开始菜单在右边还原

11-06 tunespotting:win10系统火狐flash插件总是

11-06 甘尔葛分析师：计谋网站seo关键词暴涨有什

11-06 蔡贵霖: 计谋网站seo关键词暴涨有什么秘密

11-06 博益网首页:ao3网页版进入不了解决方法

11-06 漏斗子专栏: 网站数据分析小白易懂精华篇

11-06 见证双虹怎么做:win7系统开启telnet命令的

11-06 颾狐蝶蜋:系统资源不足无法完成请求的服务

11-06 国光中学校歌:提交网站到alexa查询详细步骤

11-06 西安有情天:静态网页和动态网页的区别

11-06 红木雅尚斋:外部链接构造对网站的好处

11-06 前官礼遇：防止域名劫持–增强域安全性的10

11-06 密传二转答案: 中文分词算法有哪些

11-06 金泉家园邮编:百度快照劫持的表现及应对方

C++ cuda_make_array函数代码示例发布时间：2022-05-30

C++ cudaStreamSynchronize函数代码示例发布时间：2022-05-30

剪的笔顺,诠释剪的笔画,认识剪的部首

1 六六分期app的软件客服如何联系？(六六分期

六六分期app的软件客服如何联系？不知道吗？加qq群【895510560】即可！标题：六六分期

阅读：18094|2023-10-27

2 可心卡盟:win10系统火狐flash插件崩溃怎么

今天小编告诉大家如何处理win10系统火狐flash插件总是崩溃的问题，可能很多用户都不知

阅读：9618|2022-11-06

3 亲亲特价:怎么删除回收站图标

今天小编告诉大家如何对win10系统删除桌面回收站图标进行设置，可能很多用户都不知道

阅读：8149|2022-11-06

4 济南大学虚拟社区:鲁大师节能降温的具体办

今天小编告诉大家如何对win10系统电脑设置节能降温的设置方法，想必大家都遇到过需要

阅读：8530|2022-11-06

5 xlueops.exe:无线网络安装向导

我们在使用xp系统的过程中,经常需要对xp系统无线网络安装向导设置进行设置，可能很多

阅读：8432|2022-11-06

6 女斗合众国:win7系统cf与主机连接不稳定怎

今天小编告诉大家如何处理win7系统玩cf老是与主机连接不稳定的问题，可能很多用户都不

阅读：9347|2022-11-06

7 0xc000022-[cf烟雾头]cf怎么调烟雾头

电脑对日常生活的重要性小编就不多说了，可是一旦碰到win7系统设置cf烟雾头的问题，很

阅读：8397|2022-11-06

8 qizideyouhuo:应用程序无法正常启动0xc0000

我们在日常使用电脑的时候，有的小伙伴们可能在打开应用的时候会遇见提示应用程序无法

阅读：7833|2022-11-06

9 ipz-185:win7系统vcf文件怎么打开

今天小编告诉大家如何对win7系统打开vcf文件进行设置，可能很多用户都不知道怎么对win

阅读：8387|2022-11-06

10 傻哥蹦迪:win10系统s4怎么打开usb调试

今天小编告诉大家如何对win10系统s4开启USB调试模式进行设置，可能很多用户都不知道怎

阅读：7380|2022-11-06

客服电话

电子邮件

C++ cudaThreadSynchronize函数代码示例

示例1: runCalcU

示例2: benchmark

示例3: device_X

示例4: cudaGraphicsMapResources

示例5: cudaThreadSynchronize

示例6: main

示例7: checkCudaErrors

示例8: time_ongpu

示例9: cudaGetDeviceProperties

示例10: cufftDestroy

示例11: mpla_generic_dgemv

示例12: cudaLaunch

示例13: synchronize

示例14: mvReductArraysToHost

示例15: mvConstArraysToDevice

示例16: cudaMemcpy

示例17: time_invocation_cuda

示例18: savegpuann

示例19: keplereq_wrapper_c

示例20: op_fetch_data

请发表评论

全部评论

上一篇：

下一篇：

PacktPublishing/Python-Machine-Learning-

BentoBoxWorld/AOneBlock: A OneBlock Mine

鲁东大学一米网:Win7系统USB驱动器RAM的操

manur/MATLAB-git: A thin MATLAB wrapper

Matlab中点乘和乘的区别

剪的笔顺,诠释剪的笔画,认识剪的部首

六六分期app的软件客服如何联系？(六六分期

florent37/ViewAnimator: A fluent Android

florent37/Shrine-MaterialDesign2: implem

CVE-2020-36276

SimpleSoftwareIO/simple-sms: Send and re

关于我们

产品与服务

解决方案

139-2527-9053