本文整理汇总了C#中GASS.CUDA.Types.CUfunction类的典型用法代码示例。如果您正苦于以下问题:C# CUfunction类的具体用法?C# CUfunction怎么用?C# CUfunction使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
CUfunction类属于GASS.CUDA.Types命名空间,在下文中一共展示了CUfunction类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C#代码示例。
示例1: cuParamSetv
public static extern CUResult cuParamSetv(CUfunction hfunc, int offset, [In] Short1[] ptr, uint numbytes);
开发者ID:constructor-igor,项目名称:cudafy,代码行数:1,代码来源:CUDADriver.cs
示例2: cuFuncGetAttribute
public static extern CUResult cuFuncGetAttribute(ref int pi, CUFunctionAttribute attrib, CUfunction hfunc);
开发者ID:constructor-igor,项目名称:cudafy,代码行数:1,代码来源:CUDADriver.cs
示例3: cuParamSetTexRef
public static extern CUResult cuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef);
开发者ID:constructor-igor,项目名称:cudafy,代码行数:1,代码来源:CUDADriver.cs
示例4: Main
static void Main(string[] args)
{
// Init and select 1st device.
CUDA cuda = new CUDA(0, true);
// load module
//cuda.LoadModule(Path.Combine(Environment.CurrentDirectory, "simpleCUFFT.ptx"));
CUfunction func = new CUfunction();// cuda.GetModuleFunction("ComplexPointwiseMulAndScale");
// The filter size is assumed to be a number smaller than the signal size
const int SIGNAL_SIZE = 50;
const int FILTER_KERNEL_SIZE = 11;
// Allocate host memory for the signal
Float2[] h_signal = new Float2[SIGNAL_SIZE];
// Initalize the memory for the signal
Random r = new Random();
for (int i = 0; i < SIGNAL_SIZE; ++i)
{
h_signal[i].x = r.Next() / (float)int.MaxValue;
h_signal[i].y = 0;
}
// Allocate host memory for the filter
Float2[] h_filter_kernel = new Float2[FILTER_KERNEL_SIZE];
// Initalize the memory for the filter
for (int i = 0; i < FILTER_KERNEL_SIZE; ++i)
{
h_filter_kernel[i].x = r.Next() / (float)int.MaxValue;
h_filter_kernel[i].y = 0;
}
// Pad signal and filter kernel
Float2[] h_padded_signal;
Float2[] h_padded_filter_kernel;
int new_size = PadData(h_signal, out h_padded_signal, SIGNAL_SIZE,
h_filter_kernel, out h_padded_filter_kernel, FILTER_KERNEL_SIZE);
// Allocate device memory for signal
// Copy host memory to device
CUdeviceptr d_signal = cuda.CopyHostToDevice<Float2>(h_padded_signal);
// Allocate device memory for filter kernel
// Copy host memory to device
CUdeviceptr d_filter_kernel = cuda.CopyHostToDevice<Float2>(h_padded_filter_kernel);
// CUFFT plan
CUFFT fft = new CUFFT(cuda);
cufftHandle handle = new cufftHandle();
CUFFTResult fftres = CUFFTDriver.cufftPlan1d(ref handle, new_size, CUFFTType.C2C, 1);
//fft.Plan1D(new_size, CUFFTType.C2C, 1);
return;
// Transform signal and kernel
fft.ExecuteComplexToComplex(d_signal, d_signal, CUFFTDirection.Forward);
fft.ExecuteComplexToComplex(d_filter_kernel, d_filter_kernel, CUFFTDirection.Forward);
// Multiply the coefficients together and normalize the result
// ComplexPointwiseMulAndScale<<<32, 256>>>(d_signal, d_filter_kernel, new_size, 1.0f / new_size);
cuda.SetFunctionBlockShape(func, 256, 1, 1);
cuda.SetParameter(func, 0, (uint)d_signal.Pointer);
cuda.SetParameter(func, IntPtr.Size, (uint)d_filter_kernel.Pointer);
cuda.SetParameter(func, IntPtr.Size * 2, (uint)new_size);
cuda.SetParameter(func, IntPtr.Size * 2 + 4, 1.0f / new_size);
cuda.SetParameterSize(func, (uint)(IntPtr.Size * 2 + 8));
cuda.Launch(func, 32, 1);
// Transform signal back
fft.ExecuteComplexToComplex(d_signal, d_signal, CUFFTDirection.Inverse);
// Copy device memory to host
Float2[] h_convolved_signal = h_padded_signal;
cuda.CopyDeviceToHost<Float2>(d_signal, h_convolved_signal);
// Allocate host memory for the convolution result
Float2[] h_convolved_signal_ref = new Float2[SIGNAL_SIZE];
// Convolve on the host
Convolve(h_signal, SIGNAL_SIZE,
h_filter_kernel, FILTER_KERNEL_SIZE,
h_convolved_signal_ref);
// check result
bool res = cutCompareL2fe(h_convolved_signal_ref, h_convolved_signal, 2 * SIGNAL_SIZE, 1e-5f);
Console.WriteLine("Test {0}", (true == res) ? "PASSED" : "FAILED");
//Destroy CUFFT context
fft.Destroy();
// cleanup memory
cuda.Free(d_signal);
cuda.Free(d_filter_kernel);
}
开发者ID:JustasB,项目名称:cudafy,代码行数:95,代码来源:Program.cs
示例5: cuLaunchGridAsync
public static extern CUResult cuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream);
开发者ID:constructor-igor,项目名称:cudafy,代码行数:1,代码来源:CUDADriver.cs
示例6: cuModuleGetFunction
public static extern CUResult cuModuleGetFunction(ref CUfunction hfunc, CUmodule hmod, string name);
开发者ID:constructor-igor,项目名称:cudafy,代码行数:1,代码来源:CUDADriver.cs
示例7: cuLaunch
public static extern CUResult cuLaunch(CUfunction f);
开发者ID:constructor-igor,项目名称:cudafy,代码行数:1,代码来源:CUDADriver.cs
示例8: cuLaunchGrid
public static extern CUResult cuLaunchGrid(CUfunction f, int grid_width, int grid_height);
开发者ID:constructor-igor,项目名称:cudafy,代码行数:1,代码来源:CUDADriver.cs
示例9: cuFuncSetSharedSize
public static extern CUResult cuFuncSetSharedSize(CUfunction hfunc, uint bytes);
开发者ID:constructor-igor,项目名称:cudafy,代码行数:1,代码来源:CUDADriver.cs
示例10: cuFuncSetCacheConfig
public static extern CUResult cuFuncSetCacheConfig(CUfunction hfunc, CUFunctionCache config);
开发者ID:constructor-igor,项目名称:cudafy,代码行数:1,代码来源:CUDADriver.cs
示例11: cuFuncSetBlockShape
public static extern CUResult cuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z);
开发者ID:constructor-igor,项目名称:cudafy,代码行数:1,代码来源:CUDADriver.cs
示例12: cuParamSetf
public static extern CUResult cuParamSetf(CUfunction hfunc, int offset, float value);
开发者ID:constructor-igor,项目名称:cudafy,代码行数:1,代码来源:CUDADriver.cs
示例13: cuParamSeti
public static extern CUResult cuParamSeti(CUfunction hfunc, int offset, uint value);
开发者ID:constructor-igor,项目名称:cudafy,代码行数:1,代码来源:CUDADriver.cs
示例14: DoLayout
public override void DoLayout()
{
CUdeviceptr p1 = new CUdeviceptr();
CUDADriver.cuMemAlloc(ref p1, 1 <<10);
byte[] b = new byte[1<<10];
CUDADriver.cuMemcpyHtoD(p1, b, (uint) b.Length);
CUfunction func = new CUfunction();
CUResult res;
int nnodes = (int) Network.VertexCount*2;
int blocks = 32;
if (nnodes < 1024*blocks) nnodes = 1024*blocks;
while ((nnodes & (prop.SIMDWidth-1)) != 0) nnodes++;
nnodes--;
//float dtime = 0.025f; float dthf = dtime * 0.5f;
//float epssq = 0.05f * 0.05f;
//float itolsq = 1.0f / (0.5f * 0.5f);
CUDADriver.cuModuleGetFunction(ref func, mod, "dummy");
// Float4[] data = new Float4[100];
CUdeviceptr ptr = new CUdeviceptr();
//CUDADriver.cuMemAlloc(ref ptr, (uint) 100 * System.Runtime.InteropServices.Marshal.SizeOf(Float4));
CUDADriver.cuParamSeti(func, 0, (uint) ptr.Pointer);
CUDADriver.cuParamSetSize(func, 4);
res = CUDADriver.cuLaunch(func);
if(res != CUResult.Success)
Logger.AddMessage(LogEntryType.Warning, "CUDA Error in dummy function: " +res.ToString());
// InitializationKernel<<<1, 1>>>();
CUDADriver.cuModuleGetFunction(ref func, mod, "InitializationKernel");
res = CUDADriver.cuLaunch(func);
if(res != CUResult.Success)
Logger.AddMessage(LogEntryType.Warning, "CUDA Error in InitializationKernel: " +res.ToString());
// BoundingBoxKernel<<<blocks * FACTOR1, THREADS1>>>();
CUDADriver.cuModuleGetFunction(ref func, mod, "BoundingBoxKernel: "+res.ToString());
CUDADriver.cuLaunch(func);
if(res != CUResult.Success)
Logger.AddMessage(LogEntryType.Warning, "CUDA Error in BoundingBoxKernel: "+res.ToString());
// TreeBuildingKernel<<<blocks * FACTOR2, THREADS2>>>();
CUDADriver.cuModuleGetFunction(ref func, mod, "TreeBuildingKernel: "+res.ToString());
CUDADriver.cuLaunch(func);
if(res != CUResult.Success)
Logger.AddMessage(LogEntryType.Warning, "CUDA Error in TreeBuildingKernel: "+res.ToString());
// SummarizationKernel<<<blocks * FACTOR3, THREADS3>>>();
CUDADriver.cuModuleGetFunction(ref func, mod, "SummarizationKernel: "+res.ToString());
CUDADriver.cuLaunch(func);
if(res != CUResult.Success)
Logger.AddMessage(LogEntryType.Warning, "CUDA Error in SummarizationKernel: "+res.ToString());
// ForceCalculationKernel<<<blocks * FACTOR5, THREADS5>>>();
CUDADriver.cuModuleGetFunction(ref func, mod, "ForceCalculationKernel: "+res.ToString());
CUDADriver.cuLaunch(func);
if(res != CUResult.Success)
Logger.AddMessage(LogEntryType.Warning, "CUDA Error in ForceCalculationKernel: "+res.ToString());
// IntegrationKernel<<<blocks * FACTOR6, THREADS6>>>();
CUDADriver.cuModuleGetFunction(ref func, mod, "IntegrationKernel");
CUDADriver.cuLaunch(func);
if(res != CUResult.Success)
Logger.AddMessage(LogEntryType.Warning, "CUDA Error in IntegrationKernel: "+res.ToString());
}
开发者ID:schwarzertod,项目名称:NETGen,代码行数:70,代码来源:HuForceDirectedLayout.cs
示例15: cuParamSetSize
public static extern CUResult cuParamSetSize(CUfunction hfunc, uint numbytes);
开发者ID:constructor-igor,项目名称:cudafy,代码行数:1,代码来源:CUDADriver.cs
示例16: Initialize
public void Initialize()
{
float softeningSquared = 0.00125f;
Random random = new Random();
m_CUDA.LoadModule(m_nbody_kernel);
m_IntegrateBodies = m_CUDA.GetModuleFunction("IntegrateBodies");
m_SofteningSquared = m_CUDA.GetModuleGlobal("softeningSquared");
h_Pos = new Float4[2][] { new Float4[m_NumBodies], new Float4[m_NumBodies] };
h_Vel = new Float4[2][] { new Float4[m_NumBodies], new Float4[m_NumBodies] };
d_Pos = new CUdeviceptr[2] { m_CUDA.Allocate<Float4>(HostOldPos), m_CUDA.Allocate<Float4>(HostNewPos) };
d_Vel = new CUdeviceptr[2] { m_CUDA.Allocate<Float4>(HostOldVel), m_CUDA.Allocate<Float4>(HostNewVel) };
float scale = 3.0f;
float vscale = scale * 1.0f;
for (int i = 0; i < HostOldPos.Length; i++)
{
recalc:
HostOldPos[i].x = (float)(random.NextDouble() * 2 - 1.0);
HostOldPos[i].y = (float)(random.NextDouble() * 2 - 1.0);
HostOldPos[i].z = (float)(random.NextDouble() * 2 - 1.0);
HostOldPos[i].w = 1.0f;
if (dot(HostOldPos[i], HostOldPos[i]) > 1.0f)
goto recalc;
HostOldPos[i].x *= scale;
HostOldPos[i].y *= scale;
HostOldPos[i].z *= scale;
}
for (int i = 0; i < HostOldVel.Length; i++)
{
recalc:
HostOldVel[i].x = (float)(random.NextDouble() * 2 - 1.0);
HostOldVel[i].y = (float)(random.NextDouble() * 2 - 1.0);
HostOldVel[i].z = (float)(random.NextDouble() * 2 - 1.0);
HostOldVel[i].w = 1.0f;
if (dot(HostOldVel[i], HostOldVel[i]) > 1.0f)
goto recalc;
HostOldPos[i].x *= vscale;
HostOldPos[i].y *= vscale;
HostOldPos[i].z *= vscale;
}
m_CUDA.CopyHostToDevice<Float4>(DeviceOldPos, HostOldPos);
m_CUDA.CopyHostToDevice<Float4>(DeviceOldVel, HostOldVel);
m_CUDA.CopyHostToDevice<float>(m_SofteningSquared, new float[] { softeningSquared });
}
开发者ID:takayuki,项目名称:opensim-pcproject,代码行数:52,代码来源:NBody.cs
示例17: InitCudaModule
//private double ComputeObj(float[] w, float[] alpha, Problem<SparseVec> sub_prob, float[] diag)
//{
// double v = 0, v1=0;
// int nSV = 0;
// for (int i = 0; i < w.Length; i++)
// {
// v += w[i] * w[i];
// v1 += 0.5*w[i] * w[i];
// }
// for (int i = 0; i < alpha.Length; i++)
// {
// sbyte y_i = (sbyte)sub_prob.Y[i];
// //original line
// //v += alpha[i] * (alpha[i] * diag[GETI(y_i, i)] - 2);
// v += alpha[i] * (alpha[i] * diag[y_i + 1] - 2);
// v1 += 0.5* alpha[i] * (alpha[i] * diag[y_i + 1] - 2);
// if (alpha[i] > 0) ++nSV;
// }
// v = v / 2;
// // Debug.WriteLine("Objective value = {0}", v);
// // Debug.WriteLine("nSV = {0}", nSV);
// return v;
//}
protected void InitCudaModule()
{
cuda = new CUDA(0, true);
cuModule = cuda.LoadModule(Path.Combine(Environment.CurrentDirectory, cudaModuleName));
cuFuncDotProd = cuda.GetModuleFunction(cudaProductKernelName);
cuFuncSolver = cuda.GetModuleFunction(cudaSolveL2SVM);
cuFuncUpdateW = cuda.GetModuleFunction(cudaUpdateW);
}
开发者ID:maotong,项目名称:KMLib,代码行数:37,代码来源:CUDALinSolver.cs
示例18: InitCudaModule
private void InitCudaModule()
{
string modluePath = Path.Combine(Environment.CurrentDirectory, cudaModuleName);
if (!File.Exists(modluePath))
throw new ArgumentException("Failed access to cuda module" + modluePath);
cuModule = cuda.LoadModule(modluePath);
cuFuncDense = cuda.GetModuleFunction(funcName);
}
开发者ID:maotong,项目名称:KMLib,代码行数:9,代码来源:EllpackDenseVectorBuilder.cs
示例19: InitCudaModule
private void InitCudaModule()
{
cuda = new CUDA(0, true);
cuModule = cuda.LoadModule(Path.Combine(Environment.CurrentDirectory, cudaModuleName));
cuFuncDotProd = cuda.GetModuleFunction(cudaProductKernelName);
cuFuncGradFinalize = cuda.GetModuleFunction(cudaGradFinalizeName);
cuFuncComputeBBstep = cuda.GetModuleFunction(cudaComputeBBStepName);
cuFuncObjSquareW = cuda.GetModuleFunction(cudaObjWName);
cuFuncObjSquareAlpha = cuda.GetModuleFunction(cudaObjAlphaName);
cuFuncUpdateW = cuda.GetModuleFunction(cudaUpdateW);
cuFuncUpdateAlpha = cuda.GetModuleFunction(cudaUpdateAlphaName);
cuFuncMaxNorm = cuda.GetModuleFunction(cudaMaxNormName);
}
开发者ID:maotong,项目名称:KMLib,代码行数:20,代码来源:GPUstdBBLinSolver.cs
示例20: FlaCudaTask
unsafe public FlaCudaTask(CUDA _cuda, int channelCount, int channels, uint bits_per_sample, int max_frame_size, bool do_verify)
{
cuda = _cuda;
residualTasksLen = sizeof(FlaCudaSubframeTask) * channelCount * (lpc.MAX_LPC_ORDER * lpc.MAX_LPC_WINDOWS + 8) * FlaCudaWriter.maxFrames;
bestResidualTasksLen = sizeof(FlaCudaSubframeTask) * channelCount * FlaCudaWriter.maxFrames;
samplesBufferLen = sizeof(int) * FlaCudaWriter.MAX_BLOCKSIZE * channelCount;
int partitionsLen = sizeof(int) * (30 << 8) * channelCount * FlaCudaWriter.maxFrames;
int riceParamsLen = sizeof(int) * (4 << 8) * channelCount * FlaCudaWriter.maxFrames;
int lpcDataLen = sizeof(float) * 32 * 33 * lpc.MAX_LPC_WINDOWS * channelCount * FlaCudaWriter.maxFrames;
cudaSamplesBytes = cuda.Allocate((uint)samplesBufferLen / 2);
cudaSamples = cuda.Allocate((uint)samplesBufferLen);
cudaResidual = cuda.Allocate((uint)samplesBufferLen);
cudaLPCData = cuda.Allocate((uint)lpcDataLen);
cudaPartitions = cuda.Allocate((uint)partitionsLen);
cudaRiceParams = cuda.Allocate((uint)riceParamsLen);
cudaBestRiceParams = cuda.Allocate((uint)riceParamsLen / 4);
cudaAutocorOutput = cuda.Allocate((uint)(sizeof(float) * channelCount * lpc.MAX_LPC_WINDOWS * (lpc.MAX_LPC_ORDER + 1) * (FlaCudaWriter.maxAutocorParts + FlaCudaWriter.maxFrames)));
cudaResidualTasks = cuda.Allocate((uint)residualTasksLen);
cudaBestResidualTasks = cuda.Allocate((uint)bestResidualTasksLen);
cudaResidualOutput = cuda.Allocate((uint)(sizeof(int) * channelCount * (lpc.MAX_LPC_WINDOWS * lpc.MAX_LPC_ORDER + 8) * 64 /*FlaCudaWriter.maxResidualParts*/ * FlaCudaWriter.maxFrames));
CUResult cuErr = CUResult.Success;
if (cuErr == CUResult.Success)
cuErr = CUDADriver.cuMemAllocHost(ref samplesBytesPtr, (uint)samplesBufferLen/2);
if (cuErr == CUResult.Success)
cuErr = CUDADriver.cuMemAllocHost(ref residualBufferPtr, (uint)samplesBufferLen);
if (cuErr == CUResult.Success)
cuErr = CUDADriver.cuMemAllocHost(ref bestRiceParamsPtr, (uint)riceParamsLen / 4);
if (cuErr == CUResult.Success)
cuErr = CUDADriver.cuMemAllocHost(ref residualTasksPtr, (uint)residualTasksLen);
if (cuErr == CUResult.Success)
cuErr = CUDADriver.cuMemAllocHost(ref bestResidualTasksPtr, (uint)bestResidualTasksLen);
if (cuErr != CUResult.Success)
{
if (samplesBytesPtr != IntPtr.Zero) CUDADriver.cuMemFreeHost(samplesBytesPtr); samplesBytesPtr = IntPtr.Zero;
if (residualBufferPtr != IntPtr.Zero) CUDADriver.cuMemFreeHost(residualBufferPtr); residualBufferPtr = IntPtr.Zero;
if (bestRiceParamsPtr != IntPtr.Zero) CUDADriver.cuMemFreeHost(bestRiceParamsPtr); bestRiceParamsPtr = IntPtr.Zero;
if (residualTasksPtr != IntPtr.Zero) CUDADriver.cuMemFreeHost(residualTasksPtr); residualTasksPtr = IntPtr.Zero;
if (bestResidualTasksPtr != IntPtr.Zero) CUDADriver.cuMemFreeHost(bestResidualTasksPtr); bestResidualTasksPtr = IntPtr.Zero;
throw new CUDAException(cuErr);
}
cudaComputeAutocor = cuda.GetModuleFunction("cudaComputeAutocor");
cudaStereoDecorr = cuda.GetModuleFunction("cudaStereoDecorr");
cudaChannelDecorr = cuda.GetModuleFunction("cudaChannelDecorr");
cudaChannelDecorr2 = cuda.GetModuleFunction("cudaChannelDecorr2");
cudaFindWastedBits = cuda.GetModuleFunction("cudaFindWastedBits");
cudaComputeLPC = cuda.GetModuleFunction("cudaComputeLPC");
cudaQuantizeLPC = cuda.GetModuleFunction("cudaQuantizeLPC");
cudaComputeLPCLattice = cuda.GetModuleFunction("cudaComputeLPCLattice");
cudaEstimateResidual = cuda.GetModuleFunction("cudaEstimateResidual");
cudaEstimateResidual8 = cuda.GetModuleFunction("cudaEstimateResidual8");
cudaEstimateResidual12 = cuda.GetModuleFunction("cudaEstimateResidual12");
cudaEstimateResidual1 = cuda.GetModuleFunction("cudaEstimateResidual1");
cudaChooseBestMethod = cuda.GetModuleFunction("cudaChooseBestMethod");
cudaCopyBestMethod = cuda.GetModuleFunction("cudaCopyBestMethod");
cudaCopyBestMethodStereo = cuda.GetModuleFunction("cudaCopyBestMethodStereo");
cudaEncodeResidual = cuda.GetModuleFunction("cudaEncodeResidual");
cudaCalcPartition = cuda.GetModuleFunction("cudaCalcPartition");
cudaCalcPartition16 = cuda.GetModuleFunction("cudaCalcPartition16");
cudaCalcLargePartition = cuda.GetModuleFunction("cudaCalcLargePartition");
cudaSumPartition = cuda.GetModuleFunction("cudaSumPartition");
cudaFindRiceParameter = cuda.GetModuleFunction("cudaFindRiceParameter");
cudaFindPartitionOrder = cuda.GetModuleFunction("cudaFindPartitionOrder");
stream = cuda.CreateStream();
samplesBuffer = new int[FlaCudaWriter.MAX_BLOCKSIZE * channelCount];
outputBuffer = new byte[max_frame_size * FlaCudaWriter.maxFrames + 1];
frame = new FlacFrame(channelCount);
frame.writer = new BitWriter(outputBuffer, 0, outputBuffer.Length);
if (do_verify)
{
verify = new FlakeReader(new AudioPCMConfig((int)bits_per_sample, channels, 44100));
verify.DoCRC = false;
}
}
开发者ID:androidhacker,项目名称:DotNetProjs,代码行数:78,代码来源:FlaCudaWriter.cs
注:本文中的GASS.CUDA.Types.CUfunction类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论