本文整理汇总了C++中cudaEventElapsedTime函数的典型用法代码示例。如果您正苦于以下问题:C++ cudaEventElapsedTime函数的具体用法?C++ cudaEventElapsedTime怎么用?C++ cudaEventElapsedTime使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了cudaEventElapsedTime函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: cudaEventRecord
float TimerGPU::read() {
cudaEventRecord(stop_, stream_);
cudaEventSynchronize(stop_);
float time;
cudaEventElapsedTime(&time, start_, stop_);
return time;
}
开发者ID:bbferka,项目名称:simtrack,代码行数:7,代码来源:utilities.cpp
示例2: runBenchmark
void runBenchmark(int iterations)
{
// once without timing to prime the GPU
nbody->update(activeParams.m_timestep);
cutilSafeCall(cudaEventRecord(startEvent, 0));
for (int i = 0; i < iterations; ++i)
{
nbody->update(activeParams.m_timestep);
}
cutilSafeCall(cudaEventRecord(stopEvent, 0));
cudaEventSynchronize(stopEvent);
float milliseconds = 0;
cutilSafeCall( cudaEventElapsedTime(&milliseconds, startEvent, stopEvent));
double interactionsPerSecond = 0;
double gflops = 0;
computePerfStats(interactionsPerSecond, gflops, milliseconds, iterations);
printf("%d bodies, total time for %d iterations: %0.3f ms\n",
numBodies, iterations, milliseconds);
printf("= %0.3f billion interactions per second\n", interactionsPerSecond);
printf("= %0.3f GFLOP/s at %d flops per interaction\n", gflops, 20);
}
开发者ID:AnkurAnandapu,项目名称:ocelot-fork,代码行数:26,代码来源:nbody.cpp
示例3: cudaEventRecord
//-----------------------------------------------------------------------------
void CUDA::Timer::Stop ()
{
cudaEventRecord(mStop, 0);
cudaEventSynchronize(mStop);
cudaEventElapsedTime(&mTime, mStart, mStop);
mState = CT_STOPPED;
}
开发者ID:segfault11,项目名称:TwoScale2D,代码行数:8,代码来源:cuda.cpp
示例4: record_async_times
/* Assumes that all recorded events have completed */
static pb_Timestamp record_async_times(struct pb_TimerSet* tset)
{
struct pb_async_time_marker_list * next_interval = NULL;
struct pb_async_time_marker_list * last_marker = get_last_async(tset);
pb_Timestamp total_async_time = 0;
enum pb_TimerID timer;
for(next_interval = tset->async_markers; next_interval != last_marker;
next_interval = next_interval->next) {
float interval_time_ms;
cudaEventElapsedTime(&interval_time_ms, *((cudaEvent_t *)next_interval->marker),
*((cudaEvent_t *)next_interval->next->marker));
pb_Timestamp interval = (pb_Timestamp) (interval_time_ms * 1e3);
tset->timers[next_interval->timerID].elapsed += interval;
if (next_interval->label != NULL) {
struct pb_SubTimer *subtimer = tset->sub_timer_list[next_interval->timerID]->subtimer_list;
while (subtimer != NULL) {
if ( strcmp(subtimer->label, next_interval->label) == 0) {
subtimer->timer.elapsed += interval;
break;
}
subtimer = subtimer->next;
}
}
total_async_time += interval;
next_interval->timerID = INVALID_TIMERID;
}
if(next_interval != NULL)
next_interval->timerID = INVALID_TIMERID;
return total_async_time;
}
开发者ID:anshumang,项目名称:lammps-analytics,代码行数:35,代码来源:parboil_cuda.c
示例5: LOG
float Timer::MicroSeconds() {
if (!has_run_at_least_once()) {
LOG(WARNING)<< "Timer has never been run before reading time.";
return 0;
}
if (running()) {
Stop();
}
if (Caffe::mode() == Caffe::GPU
&& Caffe::GetDefaultDevice()->backend() == BACKEND_CUDA) {
#ifndef CPU_ONLY
#ifdef USE_CUDA
CUDA_CHECK(cudaEventElapsedTime(&elapsed_milliseconds_, start_gpu_,
stop_gpu_));
// Cuda only measure milliseconds
elapsed_microseconds_ = elapsed_milliseconds_ * 1000;
#endif // USE_CUDA
#else
NO_GPU;
#endif
} else {
elapsed_microseconds_ = (stop_cpu_ - start_cpu_).total_microseconds();
}
return elapsed_microseconds_;
}
开发者ID:strin,项目名称:caffe-opencl,代码行数:25,代码来源:benchmark.cpp
示例6: GetTimeMillis
unsigned int GetTimeMillis () {
float elapsedTime;
cudaEventRecord(timerStop,0);
cudaEventSynchronize(timerStop);
cudaEventElapsedTime(&elapsedTime, timerStart, timerStop);
return (unsigned int)(elapsedTime);
}
开发者ID:smallGum,项目名称:gpuocelot,代码行数:7,代码来源:tictoc.c
示例7: time_invocation_cuda
double time_invocation_cuda(std::size_t num_trials, Function f, Arg1 arg1, Arg2 arg2, Arg3 arg3)
{
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start);
for(std::size_t i = 0;
i < num_trials;
++i)
{
f(arg1,arg2,arg3);
}
cudaEventRecord(stop);
cudaThreadSynchronize();
float msecs = 0;
cudaEventElapsedTime(&msecs, start, stop);
cudaEventDestroy(start);
cudaEventDestroy(stop);
// return mean msecs
return msecs / num_trials;
}
开发者ID:egaburov,项目名称:bulk,代码行数:25,代码来源:time_invocation_cuda.hpp
示例8: main
int main()
{
cudaEvent_t start;
cudaEvent_t end;
float duration;
const float overestimateRate = 0.01f;
const float errorRate = 0.01f;
Tokenizer tokenizer( overestimateRate, errorRate );
/************** Test counting string tokens *************/
TextReader reader;
cudaEventCreate( &start );
cudaEventRecord( start, 0 );
reader.Read();
tokenizer.StartTokenizing(
reader.GetCharBuffer(),
reader.GetOffsetBuffer(),
reader.GetCharBufferSize(),
reader.GetOffsetBufferSize() );
cudaEventCreate( &end );
cudaEventRecord( end, 0 );
cudaEventSynchronize( end );
cudaEventElapsedTime( &duration, start, end );
printf( "Time taken: %.3lf milliseconds\n", duration );
tokenizer.GetFrequency( "a" );
}
开发者ID:YSZhuoyang,项目名称:CountMinParallel,代码行数:32,代码来源:Main.cpp
示例9: Elapsed
float Elapsed()
{
float elapsed;
cudaEventSynchronize(stop);
cudaEventElapsedTime(&elapsed, start, stop);
return elapsed;
}
开发者ID:tmquan,项目名称:hetero,代码行数:7,代码来源:timer.hpp
示例10: runCuda
void runCuda()
{
//////////////////////
// Timing cuda call //
//////////////////////
float time;
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
// Map OpenGL buffer object for writing from CUDA on a single GPU
// No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer
dptr=NULL;
vbo = mesh->getVBO();
vbosize = mesh->getVBOsize();
nbo = mesh->getNBO();
nbosize = mesh->getNBOsize();
#if RGBONLY == 1
float newcbo[] = {0.0, 1.0, 0.0,
0.0, 0.0, 1.0,
1.0, 0.0, 0.0};
cbo = newcbo;
cbosize = 9;
#elif RGBONLY == 0
vec3 defaultColor(0.5f, 0.5f, 0.5f);
mesh->changeColor(defaultColor);
cbo = mesh->getCBO();
cbosize = mesh->getCBOsize();
#endif
ibo = mesh->getIBO();
ibosize = mesh->getIBOsize();
cudaGLMapBufferObject((void**)&dptr, pbo);
updateCamera();
cudaRasterizeCore(cam, dptr, glm::vec2(width, height), frame, vbo, vbosize, cbo, cbosize, ibo, ibosize, nbo, nbosize, lights, lightsize, alpha, beta, displayMode);
cudaGLUnmapBufferObject(pbo);
vbo = NULL;
cbo = NULL;
ibo = NULL;
frame++;
fpstracker++;
//////////////////////
// Timing cuda call //
//////////////////////
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
printf("runCuda runtime: %3.1f ms \n", time);
}
开发者ID:mchen15,项目名称:Project4-Rasterizer,代码行数:59,代码来源:main.cpp
示例11: cudaEventSynchronize
float libcgt::cuda::Event::synchronizeAndGetMillisecondsElapsed()
{
cudaEventSynchronize( m_stop );
float ms;
cudaEventElapsedTime( &ms, m_start, m_stop );
return ms;
}
开发者ID:zxwglzi,项目名称:libcgt,代码行数:8,代码来源:Event.cpp
示例12: cudaEventRecord
//----------------------------------------------------------------------------//
double CUDAImpl::_StopTimer()
{
cudaEventRecord(_stop, 0);
cudaEventSynchronize(_stop);
float time;
cudaEventElapsedTime(&time, _start, _stop);
return time;
}
开发者ID:karlssonper,项目名称:gpuip,代码行数:9,代码来源:cuda.cpp
示例13: cudaEventRecord
double CudaTimer::Split() {
cudaEventRecord(end);
cudaDeviceSynchronize();
float t;
cudaEventElapsedTime(&t, start, end);
start.Swap(end);
return (t / 1000.0);
}
开发者ID:BillOmg,项目名称:moderngpu,代码行数:8,代码来源:mgpucontext.cpp
示例14: contractTT
void contractTT(sTensorGPU *TT1, sTensorGPU *TT2, const int n, const int size)
{
cublasHandle_t handle;
cublasCreate(&handle);
type result=0;
sTensorGPU temp1 = emptyTensor(size*size,2);
sTensorGPU temp2 = emptyTensor(size*size*2,3);
cudaEvent_t start;
cudaEventCreate(&start);
cudaEvent_t stop;
cudaEventCreate(&stop);
//printf("Start contractTT\n");
cudaEventRecord(start, NULL);
int indA = TT1[0].size[0];
int indB = TT2[0].size[0];
sTensorCPU tt1start = copyToCPU(TT1[0]);
sTensorCPU tt2start = copyToCPU(TT2[0]);
sTensorCPU tt1end = copyToCPU(TT1[n - 1]);
sTensorCPU tt2end = copyToCPU( TT2[n - 1]);
for (int i = 0; i < indA; i++){
TT1[0] = prepareTensorStart(tt1start, i);
TT1[n - 1] = prepareTensorEnd(tt1end, i);
for (int j = 0; j < indB; j++){
TT2[0] = prepareTensorStart(tt2start, j);
TT2[n - 1] = prepareTensorEnd(tt2end, j);
contractTensor(handle, TT1[0], TT2[0], temp1);
for (int i = 1; i < n; i++){
contractTensor(handle, temp1, TT1[i], temp2);
contractTensor(handle, temp2, TT2[i], temp1, 2);
}
type add = 0;
cudaMemcpy(&add, temp1.deviceData, sizeof(type), cudaMemcpyDeviceToHost);
//printf("%e ", add);
result += add;
}
}
cudaEventRecord(stop, NULL);
cudaEventSynchronize(stop);
float msecTotal = 0.0f;
cudaEventElapsedTime(&msecTotal, start, stop);
printf("Time: %.3fms\n", msecTotal);
printf("Ops: %.0f\n", bops);
double gigaFlops = (bops * 1.0e-9f) / (msecTotal / 1000.0f);
printf("Perf= %.2f GFlop/s\n", gigaFlops);
cublasDestroy(handle);
cudaDeviceReset();
printf("%.5e \n", result);
exit(0);
}
开发者ID:thomas-hoer,项目名称:cuTT,代码行数:58,代码来源:bigSizeTensors.cpp
示例15: HANDLE_ERROR
float gpuNUFFT::GpuNUFFTOperator::stopTiming()
{
float time;
HANDLE_ERROR( cudaEventRecord(stop, 0) );
HANDLE_ERROR( cudaEventSynchronize(stop) );
HANDLE_ERROR( cudaEventElapsedTime(&time, start, stop) );
return time;
}
开发者ID:davidssmith,项目名称:TRON,代码行数:9,代码来源:gpuNUFFT_operator.cpp
示例16: cudaEventRecord
NVENCSTATUS NVEncFilter::filter(FrameInfo *pInputFrame, FrameInfo **ppOutputFrames, int *pOutputFrameNum) {
cudaError_t cudaerr = cudaSuccess;
if (m_bCheckPerformance) {
cudaerr = cudaEventRecord(*m_peFilterStart.get());
if (cudaerr != cudaSuccess) {
AddMessage(RGY_LOG_ERROR, _T("failed cudaEventRecord(m_peFilterStart): %s.\n"), char_to_tstring(cudaGetErrorString(cudaerr)).c_str());
}
}
if (pInputFrame == nullptr) {
*pOutputFrameNum = 0;
ppOutputFrames[0] = nullptr;
}
if (m_pParam
&& m_pParam->bOutOverwrite //上書きか?
&& pInputFrame != nullptr && pInputFrame->ptr != nullptr //入力が存在するか?
&& ppOutputFrames != nullptr && ppOutputFrames[0] == nullptr) { //出力先がセット可能か?
ppOutputFrames[0] = pInputFrame;
*pOutputFrameNum = 1;
}
const auto ret = run_filter(pInputFrame, ppOutputFrames, pOutputFrameNum);
const int nOutFrame = *pOutputFrameNum;
if (!m_pParam->bOutOverwrite && nOutFrame > 0) {
if (m_nPathThrough & FILTER_PATHTHROUGH_TIMESTAMP) {
if (nOutFrame != 1) {
AddMessage(RGY_LOG_ERROR, _T("timestamp path through can only be applied to 1-in/1-out filter.\n"));
return NV_ENC_ERR_INVALID_CALL;
} else {
ppOutputFrames[0]->timestamp = pInputFrame->timestamp;
ppOutputFrames[0]->duration = pInputFrame->duration;
}
}
for (int i = 0; i < nOutFrame; i++) {
if (m_nPathThrough & FILTER_PATHTHROUGH_FLAGS) ppOutputFrames[i]->flags = pInputFrame->flags;
if (m_nPathThrough & FILTER_PATHTHROUGH_PICSTRUCT) ppOutputFrames[i]->picstruct = pInputFrame->picstruct;
}
}
if (m_bCheckPerformance) {
cudaerr = cudaEventRecord(*m_peFilterFin.get());
if (cudaerr != cudaSuccess) {
AddMessage(RGY_LOG_ERROR, _T("failed cudaEventRecord(m_peFilterFin): %s.\n"), char_to_tstring(cudaGetErrorString(cudaerr)).c_str());
}
cudaerr = cudaEventSynchronize(*m_peFilterFin.get());
if (cudaerr != cudaSuccess) {
AddMessage(RGY_LOG_ERROR, _T("failed cudaEventSynchronize(m_peFilterFin): %s.\n"), char_to_tstring(cudaGetErrorString(cudaerr)).c_str());
}
float time_ms = 0.0f;
cudaerr = cudaEventElapsedTime(&time_ms, *m_peFilterStart.get(), *m_peFilterFin.get());
if (cudaerr != cudaSuccess) {
AddMessage(RGY_LOG_ERROR, _T("failed cudaEventElapsedTime(m_peFilterStart - m_peFilterFin): %s.\n"), char_to_tstring(cudaGetErrorString(cudaerr)).c_str());
}
m_dFilterTimeMs += time_ms;
m_nFilterRunCount++;
}
return ret;
}
开发者ID:ming-hai,项目名称:NVEnc,代码行数:56,代码来源:NVEncFilter.cpp
示例17: sobel1
void sobel1(int *h_result, unsigned int *h_pic, int xsize, int ysize, int thresh)
{
int *d_result;
unsigned int *d_pic;
int resultSize = xsize * ysize * 3 * sizeof(int);
int picSize = xsize * ysize * sizeof(int);
cudaMalloc( (void**)&d_result, resultSize);
if( !d_result) {
exit(-1);
}
cudaMalloc( (void**)&d_pic, picSize);
if( !d_pic) {
exit(-1);
}
cudaMemcpy(d_result, h_result, resultSize, cudaMemcpyHostToDevice);
cudaMemcpy(d_pic, h_pic, picSize, cudaMemcpyHostToDevice);
dim3 threadsPerBlock(BLOCKSIZE, BLOCKSIZE);
dim3 numBlocks(ceil((float)ysize/(float)threadsPerBlock.x), ceil((float)xsize/(float)threadsPerBlock.y));
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
{ __set_CUDAConfig(numBlocks, threadsPerBlock );
d_sobel1 (d_result, d_pic, xsize, ysize, thresh);}
cudaEventSynchronize(stop);
float elapsedTime;
cudaEventElapsedTime(&elapsedTime, start, stop);
cudaEventDestroy(start);
cudaEventDestroy(stop);
cudaMemcpy(h_result, d_result, resultSize, cudaMemcpyDeviceToHost);
cudaMemcpy(h_pic, d_pic, picSize, cudaMemcpyDeviceToHost);
cudaFree(d_result);
cudaFree(d_pic);
}
开发者ID:drolfe00,项目名称:CUDAVerificationkernels,代码行数:56,代码来源:cudaSobel.cpp
示例18: dslashCUDA
// execute kernel
double dslashCUDA(int niter) {
cudaEvent_t start, end;
cudaEventCreate(&start);
cudaEventCreate(&end);
cudaEventRecord(start, 0);
for (int i = 0; i < niter; i++) {
switch (test_type) {
case 0:
if (transfer) {
dslashQuda(spinorOut->V(), spinor->V(), &inv_param, parity);
} else {
//inv_param.input_location = QUDA_CUDA_FIELD_LOCATION;
//inv_param.output_location = QUDA_CUDA_FIELD_LOCATION;
//dslashQuda(cudaSpinorOut->V(), cudaSpinor->V(), &inv_param, parity);
dirac->Dslash(*cudaSpinorOut, *cudaSpinor, parity);
}
break;
case 1:
case 2:
if (transfer) {
MatQuda(spinorOut->V(), spinor->V(), &inv_param);
} else {
dirac->M(*cudaSpinorOut, *cudaSpinor);
}
break;
case 3:
case 4:
if (transfer) {
MatDagMatQuda(spinorOut->V(), spinor->V(), &inv_param);
} else {
dirac->MdagM(*cudaSpinorOut, *cudaSpinor);
}
break;
}
}
cudaEventRecord(end, 0);
cudaEventSynchronize(end);
float runTime;
cudaEventElapsedTime(&runTime, start, end);
cudaEventDestroy(start);
cudaEventDestroy(end);
double secs = runTime / 1000; //stopwatchReadSeconds();
// check for errors
cudaError_t stat = cudaGetLastError();
if (stat != cudaSuccess)
printfQuda("with ERROR: %s\n", cudaGetErrorString(stat));
return secs;
}
开发者ID:kpetrov,项目名称:quda,代码行数:55,代码来源:dslash_test.cpp
示例19: stop_timing_cuda
void stop_timing_cuda(cudaEvent_t* start,cudaEvent_t* stop, char* info_str) {
realw time;
// stops events
cudaEventRecord( *stop, 0 );
cudaEventSynchronize( *stop );
cudaEventElapsedTime( &time, *start, *stop );
cudaEventDestroy( *start );
cudaEventDestroy( *stop );
// user output
printf("%s: Execution Time = %f ms\n",info_str,time);
}
开发者ID:Kerilk,项目名称:specfem3d_globe,代码行数:11,代码来源:helper_functions_gpu.c
示例20: dslashCUDA
// execute kernel
double dslashCUDA() {
printfQuda("Executing %d kernel loops...\n", loops);
fflush(stdout);
if (test_type < 2)
dirac->Tune(*cudaSpinorOut, *cudaSpinor, *tmp);
else
dirac->Tune(cudaSpinorOut->Even(), cudaSpinor->Even(), *tmp);
cudaEvent_t start, end;
cudaEventCreate(&start);
cudaEventRecord(start, 0);
cudaEventSynchronize(start);
for (int i = 0; i < loops; i++) {
switch (test_type) {
case 0:
if (transfer) {
dslashQuda(spinorOut->V(), spinor->V(), &inv_param, parity);
} else {
dirac->Dslash(*cudaSpinorOut, *cudaSpinor, parity);
}
break;
case 1:
case 2:
if (transfer) {
MatQuda(spinorOut->V(), spinor->V(), &inv_param);
} else {
dirac->M(*cudaSpinorOut, *cudaSpinor);
}
break;
}
}
cudaEventCreate(&end);
cudaEventRecord(end, 0);
cudaEventSynchronize(end);
float runTime;
cudaEventElapsedTime(&runTime, start, end);
cudaEventDestroy(start);
cudaEventDestroy(end);
double secs = runTime / 1000; //stopwatchReadSeconds();
// check for errors
cudaError_t stat = cudaGetLastError();
if (stat != cudaSuccess)
printf("with ERROR: %s\n", cudaGetErrorString(stat));
printf("done.\n\n");
return secs;
}
开发者ID:fwinter,项目名称:quda,代码行数:55,代码来源:domain_wall_dslash_test.cpp
注:本文中的cudaEventElapsedTime函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论