本文整理汇总了C++中cudaEventRecord函数的典型用法代码示例。如果您正苦于以下问题:C++ cudaEventRecord函数的具体用法?C++ cudaEventRecord怎么用?C++ cudaEventRecord使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了cudaEventRecord函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: cudaEventRecord
float TimerGPU::read() {
cudaEventRecord(stop_, stream_);
cudaEventSynchronize(stop_);
float time;
cudaEventElapsedTime(&time, start_, stop_);
return time;
}
开发者ID:bbferka,项目名称:simtrack,代码行数:7,代码来源:utilities.cpp
示例2: runBenchmark
void runBenchmark(int iterations)
{
// once without timing to prime the GPU
nbody->update(activeParams.m_timestep);
cutilSafeCall(cudaEventRecord(startEvent, 0));
for (int i = 0; i < iterations; ++i)
{
nbody->update(activeParams.m_timestep);
}
cutilSafeCall(cudaEventRecord(stopEvent, 0));
cudaEventSynchronize(stopEvent);
float milliseconds = 0;
cutilSafeCall( cudaEventElapsedTime(&milliseconds, startEvent, stopEvent));
double interactionsPerSecond = 0;
double gflops = 0;
computePerfStats(interactionsPerSecond, gflops, milliseconds, iterations);
printf("%d bodies, total time for %d iterations: %0.3f ms\n",
numBodies, iterations, milliseconds);
printf("= %0.3f billion interactions per second\n", interactionsPerSecond);
printf("= %0.3f GFLOP/s at %d flops per interaction\n", gflops, 20);
}
开发者ID:AnkurAnandapu,项目名称:ocelot-fork,代码行数:26,代码来源:nbody.cpp
示例3: TEST
TEST(EventRecord, RecordAfterDestroy) {
::testing::FLAGS_gtest_death_test_style = "threadsafe";
cudaError_t ret;
cudaEvent_t event;
cudaStream_t stream;
ret = cudaEventCreate(&event);
ASSERT_EQ(cudaSuccess, ret);
ret = cudaEventDestroy(event);
EXPECT_EQ(cudaSuccess, ret);
ret = cudaStreamCreate(&stream);
ASSERT_EQ(cudaSuccess, ret);
#if CUDART_VERSION >= 5000
ret = cudaEventRecord(event);
EXPECT_EQ(cudaErrorUnknown, ret);
#else
EXPECT_EXIT(
cudaEventRecord(event, stream),
::testing::KilledBySignal(SIGSEGV), "");
#endif
ret = cudaStreamDestroy(stream);
EXPECT_EQ(cudaSuccess, ret);
}
开发者ID:ckennelly,项目名称:panoptes,代码行数:28,代码来源:test_eventrecord.cpp
示例4: time_invocation_cuda
double time_invocation_cuda(std::size_t num_trials, Function f, Arg1 arg1, Arg2 arg2, Arg3 arg3)
{
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start);
for(std::size_t i = 0;
i < num_trials;
++i)
{
f(arg1,arg2,arg3);
}
cudaEventRecord(stop);
cudaThreadSynchronize();
float msecs = 0;
cudaEventElapsedTime(&msecs, start, stop);
cudaEventDestroy(start);
cudaEventDestroy(stop);
// return mean msecs
return msecs / num_trials;
}
开发者ID:egaburov,项目名称:bulk,代码行数:25,代码来源:time_invocation_cuda.hpp
示例5: trainMethodsSpeedTestGPU
void trainMethodsSpeedTestGPU(fann *ann, fann_train_data* train, unsigned int trainingAlgorithm, unsigned int epochCount)
{
fann *gpunn = fann_copy(ann);
gpunn->training_algorithm = (fann_train_enum)trainingAlgorithm;
{
cudaEvent_t start, stop;
float time;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
gpuann_fann_parallel_train_on_data(gpunn, train, epochCount);
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
cudaEventDestroy(start);
cudaEventDestroy(stop);
printf("%10.5f ", time);
}
fann_destroy(gpunn);
}
开发者ID:verybigbadboy,项目名称:gpuann,代码行数:26,代码来源:trainSpeed.cpp
示例6: main
int main()
{
cudaEvent_t start;
cudaEvent_t end;
float duration;
const float overestimateRate = 0.01f;
const float errorRate = 0.01f;
Tokenizer tokenizer( overestimateRate, errorRate );
/************** Test counting string tokens *************/
TextReader reader;
cudaEventCreate( &start );
cudaEventRecord( start, 0 );
reader.Read();
tokenizer.StartTokenizing(
reader.GetCharBuffer(),
reader.GetOffsetBuffer(),
reader.GetCharBufferSize(),
reader.GetOffsetBufferSize() );
cudaEventCreate( &end );
cudaEventRecord( end, 0 );
cudaEventSynchronize( end );
cudaEventElapsedTime( &duration, start, end );
printf( "Time taken: %.3lf milliseconds\n", duration );
tokenizer.GetFrequency( "a" );
}
开发者ID:YSZhuoyang,项目名称:CountMinParallel,代码行数:32,代码来源:Main.cpp
示例7: check
float bench::ClockBenchmark::_determineCycleTime() {
cudaEvent_t start, end;
check( cudaEventCreate(&start) );
check( cudaEventCreate(&end) );
unsigned long long elapsedCycles;
unsigned long long* deviceElapsedCycles;
long long int* deviceDummyMem;
const dim3 grid(1,1,1), block(1,1,1);
check( cudaMalloc((void**)&deviceElapsedCycles, sizeof(unsigned long long)) );
check( cudaMalloc((void**)&deviceDummyMem, sizeof(long long int)) );
check( cudaEventRecord(start) );
cudaDetermineCycleTimeWrapper(deviceElapsedCycles, deviceDummyMem, grid, block);
check( cudaEventRecord(end) );
check( cudaDeviceSynchronize() );
check( cudaMemcpy(&elapsedCycles, deviceElapsedCycles, sizeof(unsigned long long), cudaMemcpyDeviceToHost) );
float elapsedTime = 0;
check( cudaEventElapsedTime(&elapsedTime, start, end) );
report(util::Indents(2) << "elapsed time: " << elapsedTime << "ms");
report(util::Indents(2) << "elapsed cycles: " << elapsedCycles);
return elapsedTime * 1000000.0 / (float)elapsedCycles;
}
开发者ID:wangbiaouestc,项目名称:clpeak,代码行数:30,代码来源:ClockBenchmark.cpp
示例8: runCuda
void runCuda()
{
//////////////////////
// Timing cuda call //
//////////////////////
float time;
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
// Map OpenGL buffer object for writing from CUDA on a single GPU
// No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer
dptr=NULL;
vbo = mesh->getVBO();
vbosize = mesh->getVBOsize();
nbo = mesh->getNBO();
nbosize = mesh->getNBOsize();
#if RGBONLY == 1
float newcbo[] = {0.0, 1.0, 0.0,
0.0, 0.0, 1.0,
1.0, 0.0, 0.0};
cbo = newcbo;
cbosize = 9;
#elif RGBONLY == 0
vec3 defaultColor(0.5f, 0.5f, 0.5f);
mesh->changeColor(defaultColor);
cbo = mesh->getCBO();
cbosize = mesh->getCBOsize();
#endif
ibo = mesh->getIBO();
ibosize = mesh->getIBOsize();
cudaGLMapBufferObject((void**)&dptr, pbo);
updateCamera();
cudaRasterizeCore(cam, dptr, glm::vec2(width, height), frame, vbo, vbosize, cbo, cbosize, ibo, ibosize, nbo, nbosize, lights, lightsize, alpha, beta, displayMode);
cudaGLUnmapBufferObject(pbo);
vbo = NULL;
cbo = NULL;
ibo = NULL;
frame++;
fpstracker++;
//////////////////////
// Timing cuda call //
//////////////////////
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
printf("runCuda runtime: %3.1f ms \n", time);
}
开发者ID:mchen15,项目名称:Project4-Rasterizer,代码行数:59,代码来源:main.cpp
示例9: contractTT
void contractTT(sTensorGPU *TT1, sTensorGPU *TT2, const int n, const int size)
{
cublasHandle_t handle;
cublasCreate(&handle);
type result=0;
sTensorGPU temp1 = emptyTensor(size*size,2);
sTensorGPU temp2 = emptyTensor(size*size*2,3);
cudaEvent_t start;
cudaEventCreate(&start);
cudaEvent_t stop;
cudaEventCreate(&stop);
//printf("Start contractTT\n");
cudaEventRecord(start, NULL);
int indA = TT1[0].size[0];
int indB = TT2[0].size[0];
sTensorCPU tt1start = copyToCPU(TT1[0]);
sTensorCPU tt2start = copyToCPU(TT2[0]);
sTensorCPU tt1end = copyToCPU(TT1[n - 1]);
sTensorCPU tt2end = copyToCPU( TT2[n - 1]);
for (int i = 0; i < indA; i++){
TT1[0] = prepareTensorStart(tt1start, i);
TT1[n - 1] = prepareTensorEnd(tt1end, i);
for (int j = 0; j < indB; j++){
TT2[0] = prepareTensorStart(tt2start, j);
TT2[n - 1] = prepareTensorEnd(tt2end, j);
contractTensor(handle, TT1[0], TT2[0], temp1);
for (int i = 1; i < n; i++){
contractTensor(handle, temp1, TT1[i], temp2);
contractTensor(handle, temp2, TT2[i], temp1, 2);
}
type add = 0;
cudaMemcpy(&add, temp1.deviceData, sizeof(type), cudaMemcpyDeviceToHost);
//printf("%e ", add);
result += add;
}
}
cudaEventRecord(stop, NULL);
cudaEventSynchronize(stop);
float msecTotal = 0.0f;
cudaEventElapsedTime(&msecTotal, start, stop);
printf("Time: %.3fms\n", msecTotal);
printf("Ops: %.0f\n", bops);
double gigaFlops = (bops * 1.0e-9f) / (msecTotal / 1000.0f);
printf("Perf= %.2f GFlop/s\n", gigaFlops);
cublasDestroy(handle);
cudaDeviceReset();
printf("%.5e \n", result);
exit(0);
}
开发者ID:thomas-hoer,项目名称:cuTT,代码行数:58,代码来源:bigSizeTensors.cpp
示例10: cudaEventRecord
NVENCSTATUS NVEncFilter::filter(FrameInfo *pInputFrame, FrameInfo **ppOutputFrames, int *pOutputFrameNum) {
cudaError_t cudaerr = cudaSuccess;
if (m_bCheckPerformance) {
cudaerr = cudaEventRecord(*m_peFilterStart.get());
if (cudaerr != cudaSuccess) {
AddMessage(RGY_LOG_ERROR, _T("failed cudaEventRecord(m_peFilterStart): %s.\n"), char_to_tstring(cudaGetErrorString(cudaerr)).c_str());
}
}
if (pInputFrame == nullptr) {
*pOutputFrameNum = 0;
ppOutputFrames[0] = nullptr;
}
if (m_pParam
&& m_pParam->bOutOverwrite //上書きか?
&& pInputFrame != nullptr && pInputFrame->ptr != nullptr //入力が存在するか?
&& ppOutputFrames != nullptr && ppOutputFrames[0] == nullptr) { //出力先がセット可能か?
ppOutputFrames[0] = pInputFrame;
*pOutputFrameNum = 1;
}
const auto ret = run_filter(pInputFrame, ppOutputFrames, pOutputFrameNum);
const int nOutFrame = *pOutputFrameNum;
if (!m_pParam->bOutOverwrite && nOutFrame > 0) {
if (m_nPathThrough & FILTER_PATHTHROUGH_TIMESTAMP) {
if (nOutFrame != 1) {
AddMessage(RGY_LOG_ERROR, _T("timestamp path through can only be applied to 1-in/1-out filter.\n"));
return NV_ENC_ERR_INVALID_CALL;
} else {
ppOutputFrames[0]->timestamp = pInputFrame->timestamp;
ppOutputFrames[0]->duration = pInputFrame->duration;
}
}
for (int i = 0; i < nOutFrame; i++) {
if (m_nPathThrough & FILTER_PATHTHROUGH_FLAGS) ppOutputFrames[i]->flags = pInputFrame->flags;
if (m_nPathThrough & FILTER_PATHTHROUGH_PICSTRUCT) ppOutputFrames[i]->picstruct = pInputFrame->picstruct;
}
}
if (m_bCheckPerformance) {
cudaerr = cudaEventRecord(*m_peFilterFin.get());
if (cudaerr != cudaSuccess) {
AddMessage(RGY_LOG_ERROR, _T("failed cudaEventRecord(m_peFilterFin): %s.\n"), char_to_tstring(cudaGetErrorString(cudaerr)).c_str());
}
cudaerr = cudaEventSynchronize(*m_peFilterFin.get());
if (cudaerr != cudaSuccess) {
AddMessage(RGY_LOG_ERROR, _T("failed cudaEventSynchronize(m_peFilterFin): %s.\n"), char_to_tstring(cudaGetErrorString(cudaerr)).c_str());
}
float time_ms = 0.0f;
cudaerr = cudaEventElapsedTime(&time_ms, *m_peFilterStart.get(), *m_peFilterFin.get());
if (cudaerr != cudaSuccess) {
AddMessage(RGY_LOG_ERROR, _T("failed cudaEventElapsedTime(m_peFilterStart - m_peFilterFin): %s.\n"), char_to_tstring(cudaGetErrorString(cudaerr)).c_str());
}
m_dFilterTimeMs += time_ms;
m_nFilterRunCount++;
}
return ret;
}
开发者ID:ming-hai,项目名称:NVEnc,代码行数:56,代码来源:NVEncFilter.cpp
示例11: dslashCUDA
// execute kernel
double dslashCUDA() {
printfQuda("Executing %d kernel loops...\n", loops);
fflush(stdout);
if (test_type < 2)
dirac->Tune(*cudaSpinorOut, *cudaSpinor, *tmp);
else
dirac->Tune(cudaSpinorOut->Even(), cudaSpinor->Even(), *tmp);
cudaEvent_t start, end;
cudaEventCreate(&start);
cudaEventRecord(start, 0);
cudaEventSynchronize(start);
for (int i = 0; i < loops; i++) {
switch (test_type) {
case 0:
if (transfer) {
dslashQuda(spinorOut->V(), spinor->V(), &inv_param, parity);
} else {
dirac->Dslash(*cudaSpinorOut, *cudaSpinor, parity);
}
break;
case 1:
case 2:
if (transfer) {
MatQuda(spinorOut->V(), spinor->V(), &inv_param);
} else {
dirac->M(*cudaSpinorOut, *cudaSpinor);
}
break;
}
}
cudaEventCreate(&end);
cudaEventRecord(end, 0);
cudaEventSynchronize(end);
float runTime;
cudaEventElapsedTime(&runTime, start, end);
cudaEventDestroy(start);
cudaEventDestroy(end);
double secs = runTime / 1000; //stopwatchReadSeconds();
// check for errors
cudaError_t stat = cudaGetLastError();
if (stat != cudaSuccess)
printf("with ERROR: %s\n", cudaGetErrorString(stat));
printf("done.\n\n");
return secs;
}
开发者ID:fwinter,项目名称:quda,代码行数:55,代码来源:domain_wall_dslash_test.cpp
示例12: dslashCUDA
// execute kernel
double dslashCUDA(int niter) {
cudaEvent_t start, end;
cudaEventCreate(&start);
cudaEventCreate(&end);
cudaEventRecord(start, 0);
for (int i = 0; i < niter; i++) {
switch (test_type) {
case 0:
if (transfer) {
dslashQuda(spinorOut->V(), spinor->V(), &inv_param, parity);
} else {
//inv_param.input_location = QUDA_CUDA_FIELD_LOCATION;
//inv_param.output_location = QUDA_CUDA_FIELD_LOCATION;
//dslashQuda(cudaSpinorOut->V(), cudaSpinor->V(), &inv_param, parity);
dirac->Dslash(*cudaSpinorOut, *cudaSpinor, parity);
}
break;
case 1:
case 2:
if (transfer) {
MatQuda(spinorOut->V(), spinor->V(), &inv_param);
} else {
dirac->M(*cudaSpinorOut, *cudaSpinor);
}
break;
case 3:
case 4:
if (transfer) {
MatDagMatQuda(spinorOut->V(), spinor->V(), &inv_param);
} else {
dirac->MdagM(*cudaSpinorOut, *cudaSpinor);
}
break;
}
}
cudaEventRecord(end, 0);
cudaEventSynchronize(end);
float runTime;
cudaEventElapsedTime(&runTime, start, end);
cudaEventDestroy(start);
cudaEventDestroy(end);
double secs = runTime / 1000; //stopwatchReadSeconds();
// check for errors
cudaError_t stat = cudaGetLastError();
if (stat != cudaSuccess)
printfQuda("with ERROR: %s\n", cudaGetErrorString(stat));
return secs;
}
开发者ID:kpetrov,项目名称:quda,代码行数:55,代码来源:dslash_test.cpp
示例13: main
int main(int argc, char **argv)
{
// device memory
real *psi_d, *z_d;
size_t fSize = sizeof(real);
/* grid dimensions */
unsigned int Nx = 513, Ny = 513;
// omitting boundaries
unsigned int nGridPoints = (Nx-2)*(Ny-2);
cudaMalloc((void **) &psi_d, (nGridPoints+1)*fSize);
cudaMalloc((void **) &z_d, (nGridPoints+1)*fSize);
/* initialization */
fillArray(psi_d, 0.0, nGridPoints+1);
fillArray(z_d, 1.0, nGridPoints+1);
checkCudaError("Initialization of grid");
// for timing purposes
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
// start timer
cudaEventRecord(start,0);
/* Call the poisson solver, right hand side
* is stored on the device in z_d (make sure the data
* is copied from CPU to GPU!), result is stored in
* psi_d (on the GPU/device).
* Here NX-2 is the width of the grid's interior
* (without the boundaries).
*/
cuPoisson((Nx-2), psi_d, z_d);
// stop timer
cudaEventRecord(stop,0);
cudaEventSynchronize(stop);
float computationTime;
cudaEventElapsedTime(&computationTime, start, stop);
printf("Computation time was %.5f seconds.\n\n", computationTime/1000.0);
printf("Writing result to disk...\n");
// write result to file
writeBinaryFile(Nx, Ny, psi_d, "data.dat");
printf("done\n");
return EXIT_SUCCESS;
}
开发者ID:JackeryShh,项目名称:cupoisson,代码行数:51,代码来源:main.c
示例14: dslashCUDA
double dslashCUDA(int niter) {
cudaEvent_t start, end;
cudaEventCreate(&start);
cudaEventRecord(start, 0);
cudaEventSynchronize(start);
for (int i = 0; i < niter; i++) {
switch (test_type) {
case 0:
parity = QUDA_EVEN_PARITY;
if (transfer){
//dslashQuda(spinorOdd, spinorEven, &inv_param, parity);
} else {
dirac->Dslash(*cudaSpinorOut, *cudaSpinor, parity);
}
break;
case 1:
parity = QUDA_ODD_PARITY;
if (transfer){
//MatPCQuda(spinorOdd, spinorEven, &inv_param);
} else {
dirac->Dslash(*cudaSpinorOut, *cudaSpinor, parity);
}
break;
case 2:
if (transfer){
//MatQuda(spinorGPU, spinor, &inv_param);
} else {
dirac->M(*cudaSpinorOut, *cudaSpinor);
}
}
}
cudaEventCreate(&end);
cudaEventRecord(end, 0);
cudaEventSynchronize(end);
float runTime;
cudaEventElapsedTime(&runTime, start, end);
cudaEventDestroy(start);
cudaEventDestroy(end);
double secs = runTime / 1000; //stopwatchReadSeconds();
// check for errors
cudaError_t stat = cudaGetLastError();
if (stat != cudaSuccess)
errorQuda("with ERROR: %s\n", cudaGetErrorString(stat));
return secs;
}
开发者ID:kpetrov,项目名称:quda,代码行数:51,代码来源:staggered_dslash_test.cpp
示例15: test_2gpu
// use_cuda_time = 1: use cudaEventElapsedTime()
// or use getSystemTime()
void test_2gpu(float *d_send_data, float *d_recv_data, int size, int id0, int id1, bool use_cuda_time)
{
if(use_cuda_time) {
cudaEvent_t start_event, stop_event;
float time_memcpy;
// version I
//cudaEventCreate(&start_event);
//cudaEventCreate(&stop_event);
//cudaEventRecord(start_event, 0);
// version II
int eventflags = cudaEventBlockingSync;
cudaEventCreateWithFlags(&start_event, eventflags);
cudaEventCreateWithFlags(&stop_event, eventflags);
cudaEventRecord(start_event, 0);
for(int i=0; i<CNT; i++) {
cudaMemcpy(d_recv_data, d_send_data, size*sizeof(float), cudaMemcpyDeviceToDevice);
}
std::cout << "hello, use_cuda_time" << std::endl;
cudaEventRecord(stop_event, 0);
cudaEventSynchronize(stop_event);
cudaEventElapsedTime(&time_memcpy, start_event, stop_event); // ms
std::cout << "Time is " << time_memcpy/1000. << "s" << std::endl;
std::cout << "GPU" << id0 << " ---> GPU" << id1 << " :" <<
WIDTH*HEIGHT*sizeof(float)*CNT*1000./(1024*1024*time_memcpy) << "MB/s" << std::endl;
cudaEventDestroy(start_event);
cudaEventDestroy(stop_event);
} else {
//cudaEvent_t start_event;
//cudaEventCreate(&start_event);
long long start = getSystemTime();
for(int i=0; i<CNT; i++) {
cudaMemcpy(d_recv_data, d_send_data, size*sizeof(float), cudaMemcpyDeviceToDevice);
//cudaMemcpyPeer(d_recv_data, id1, d_send_data, id0, size*sizeof(float));
}
//cudaEventRecord(start_event, 0);
//cudaEventSynchronize(start_event);
long long end = getSystemTime();
std::cout << "Time is " << (end-start)/1000. << "s" << std::endl;
std::cout << "GPU" << id0 << " ---> GPU" << id1 << " :" <<
WIDTH*HEIGHT*sizeof(float)*CNT*1000./(1024*1024*(end - start+1)) << "MB/s" << std::endl;
} //WIDTH*HEIGHT*4.*CNT/(1000*(end - start)) << "Mb/s" << std::endl;
}
开发者ID:tornadomeet,项目名称:test_code,代码行数:51,代码来源:bandwidth_test_between2gpu.cpp
示例16: cudaCheckError
float CCudaTimeMeasure::GetTimeout(bool bResetStart/* = false*/)
{
cudaCheckError(cudaEventRecord(m_ceStopEvent, m_csStreamID));
cudaCheckError(cudaEventSynchronize(m_ceStopEvent));
float fElapsedTime = 0.0f;
cudaCheckError(cudaEventElapsedTime(&fElapsedTime, m_ceStartEvent, m_ceStopEvent));
if (bResetStart)
{
cudaCheckError(cudaEventRecord(m_ceStartEvent, m_csStreamID));
}
return fElapsedTime;
}
开发者ID:starand,项目名称:cpp,代码行数:15,代码来源:CudaTimeMeasure.cpp
示例17: _runBenchmark
void _runBenchmark(int iterations)
{
// once without timing to prime the device
if (!useCpu)
{
m_nbody->update(activeParams.m_timestep);
}
if (useCpu)
{
sdkCreateTimer(&timer);
sdkStartTimer(&timer);
}
else
{
checkCudaErrors(cudaEventRecord(startEvent, 0));
}
for (int i = 0; i < iterations; ++i)
{
m_nbody->update(activeParams.m_timestep);
}
float milliseconds = 0;
if (useCpu)
{
sdkStopTimer(&timer);
milliseconds = sdkGetTimerValue(&timer);
sdkStartTimer(&timer);
}
else
{
checkCudaErrors(cudaEventRecord(stopEvent, 0));
checkCudaErrors(cudaEventSynchronize(stopEvent));
checkCudaErrors(cudaEventElapsedTime(&milliseconds, startEvent, stopEvent));
}
double interactionsPerSecond = 0;
double gflops = 0;
computePerfStats(interactionsPerSecond, gflops, milliseconds, iterations);
printf("%d bodies, total time for %d iterations: %.3f ms, mean %f\n",
numBodies, iterations, milliseconds, milliseconds/iterations);
printf("= %.3f billion interactions per second\n", interactionsPerSecond);
printf("= %.3f %s-precision GFLOP/s at %d flops per interaction\n", gflops,
(sizeof(T) > 4) ? "double" : "single", flopsPerInteraction);
}
开发者ID:benl23x5,项目名称:papers,代码行数:48,代码来源:nbody.cpp
示例18: one_body_gradient_PBC
void OneBodyJastrowOrbitalBspline::calcGradient
(MCWalkerConfiguration &W, int iat, vector<GradType> &grad)
{
CudaReal sim_cell_radius = W.Lattice.SimulationCellRadius;
vector<Walker_t*> &walkers = W.WalkerList;
if (OneGradHost.size() < OHMMS_DIM*walkers.size())
{
OneGradHost.resize (walkers.size()*OHMMS_DIM);
OneGradGPU.resize (walkers.size()*OHMMS_DIM, 1.25);
}
bool zero = true;
for (int group=0; group<NumCenterGroups; group++)
{
int first = CenterFirst[group];
int last = CenterLast[group];
if (GPUSplines[group])
{
CudaSpline<CudaReal> &spline = *(GPUSplines[group]);
if (UsePBC)
one_body_gradient_PBC (W.RList_GPU.data(), iat, C.data(), first, last,
spline.coefs.data(), spline.coefs.size(),
spline.rMax, L.data(), Linv.data(), sim_cell_radius,
zero, OneGradGPU.data(), walkers.size());
else
one_body_gradient (W.RList_GPU.data(), iat, C.data(), first, last,
spline.coefs.data(), spline.coefs.size(),
spline.rMax, zero, OneGradGPU.data(), walkers.size());
zero = false;
}
}
// Copy data back to CPU memory
gpu::streamsSynchronize();
OneGradHost.asyncCopy(OneGradGPU);
cudaEventRecord(gpu::gradientSyncOneBodyEvent, gpu::memoryStream);
}
开发者ID:digideskio,项目名称:qmcpack,代码行数:35,代码来源:OneBodyJastrowOrbitalBspline.cpp
示例19: start_cuda_timer_ev
/*
* Starts the CUDA timer for the given CUDA event.
*
* Returns EXIT_SUCCESS or EXIT_FAILURE.
*/
int start_cuda_timer_ev( cudaEvent_t timing_event )
{
#if NMFGPU_PROFILING_TRANSF || NMFGPU_PROFILING_KERNELS
cudaError_t cuda_status = cudaSuccess;
// ----------------------
/* Waits for *ALL* operations.
* NOTE: The CPU thread will block or spin according to flags
* specified in init_GPU().
*/
cuda_status = cudaDeviceSynchronize();
if ( cuda_status != cudaSuccess ) {
print_error( sys_error_shown_by_all, "CUDA Error detected: %s\n", cudaGetErrorString(cuda_status) );
return EXIT_FAILURE;
}
// Registers the current "timestamp".
cuda_status = cudaEventRecord( timing_event, 0 );
if ( cuda_status != cudaSuccess ) {
print_error( sys_error_shown_by_all, "Error recording a CUDA event: %s\n", cudaGetErrorString(cuda_status) );
return EXIT_FAILURE;
}
#endif /* if NMFGPU_PROFILING_TRANSF || NMFGPU_PROFILING_KERNELS */
return EXIT_SUCCESS;
} // start_cuda_timer_ev
开发者ID:BioinformaticsArchive,项目名称:bionmf-gpu,代码行数:36,代码来源:timing.c
示例20: stop
void stop() {
if(!is_running_) {
std::cerr << "error: timer is not running" << std::endl;
return;
} // if
cudaEventRecord(custop_);
} // stop()
开发者ID:mywoodstock,项目名称:HipGISAXS,代码行数:7,代码来源:woo_cudatimers.hpp
注:本文中的cudaEventRecord函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论