本文整理汇总了Python中scikits.cuda.fft.fft函数的典型用法代码示例。如果您正苦于以下问题:Python fft函数的具体用法?Python fft怎么用?Python fft使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了fft函数的19个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: thunk
def thunk():
input_shape = inputs[0][0].shape
output_shape = input_shape
z = outputs[0]
# only allocate if there is no previous allocation of the
# right size.
if z[0] is None or z[0].shape != output_shape:
z[0] = CudaNdarray.zeros(output_shape)
input_pycuda = to_gpuarray(inputs[0][0])
# I thought we'd need to change the type on output_pycuda
# so it is complex64, but as it turns out scikits.cuda.fft
# doesn't really care either way and treats the array as
# if it is complex64 anyway.
output_pycuda = to_gpuarray(z[0])
# only initialise plan if necessary
if plan[0] is None or plan_input_shape[0] != input_shape:
plan_input_shape[0] = input_shape
plan[0] = fft.Plan(input_shape[1:-1], np.complex64, np.complex64,
batch=input_shape[0])
fft.fft(input_pycuda, output_pycuda, plan[0])
compute_map[node.outputs[0]][0] = True
开发者ID:Thrandis,项目名称:complex_RNN,代码行数:26,代码来源:fftconv.py
示例2: thunk
def thunk():
input_shape = inputs[0][0].shape
# construct output shape
output_shape = tuple(input_shape)
# print 'FFT shapes:', input_shape, '->', output_shape
# print 'Batch size:', input_shape[0]
# print 'Core shape:', input_shape[1:-1]
z = outputs[0]
# only allocate if there is no previous allocation of the right size.
if z[0] is None or z[0].shape != output_shape:
z[0] = CudaNdarray.zeros(output_shape)
input_pycuda = to_gpuarray(inputs[0][0])
# I thought we'd need to change the type on output_pycuda
# so it is complex64, but as it turns out scikits.cuda.fft
# doesn't really care either way and treats the array as
# if it is complex64 anyway.
output_pycuda = to_gpuarray(z[0])
# only initialise plan if necessary
if plan[0] is None or plan_input_shape[0] != input_shape:
plan_input_shape[0] = input_shape
plan[0] = fft.Plan(shape=input_shape[1:-1], # Exclude batch dim and complex dim
in_dtype=np.complex64,
out_dtype=np.complex64,
batch=input_shape[0])
fft.fft(input_pycuda, output_pycuda, plan[0])
开发者ID:soroushmehr,项目名称:BP-FFT,代码行数:32,代码来源:cuda_fft.py
示例3: fft_multiply_repeated
def fft_multiply_repeated(h_fft, x, cuda_dict=dict(use_cuda=False)):
"""Do FFT multiplication by a filter function (possibly using CUDA)
Parameters
----------
h_fft : 1-d array or gpuarray
The filtering array to apply.
x : 1-d array
The array to filter.
cuda_dict : dict
Dictionary constructed using setup_cuda_multiply_repeated().
Returns
-------
x : 1-d array
Filtered version of x.
"""
if not cuda_dict["use_cuda"]:
# do the fourier-domain operations
x = np.real(ifft(h_fft * fft(x), overwrite_x=True)).ravel()
else:
# do the fourier-domain operations, results in second param
cuda_dict["x"].set(x.astype(np.float64))
cudafft.fft(cuda_dict["x"], cuda_dict["x_fft"], cuda_dict["fft_plan"])
cuda_multiply_inplace_c128(h_fft, cuda_dict["x_fft"])
# If we wanted to do it locally instead of using our own kernel:
# cuda_seg_fft.set(cuda_seg_fft.get() * h_fft)
cudafft.ifft(cuda_dict["x_fft"], cuda_dict["x"], cuda_dict["ifft_plan"], False)
x = np.array(cuda_dict["x"].get(), dtype=x.dtype, subok=True, copy=False)
return x
开发者ID:TanayGahlot,项目名称:mne-python,代码行数:30,代码来源:cuda.py
示例4: thunk
def thunk():
input_shape = inputs[0][0].shape
# construct output shape
output_shape = list(input_shape)
# DFT of real input is symmetric, no need to store
# redundant coefficients
output_shape[-1] = output_shape[-1] // 2 + 1
# extra dimension with length 2 for real/imag
output_shape += [2]
output_shape = tuple(output_shape)
z = outputs[0]
# only allocate if there is no previous allocation of the
# right size.
if z[0] is None or z[0].shape != output_shape:
z[0] = CudaNdarray.zeros(output_shape)
input_pycuda = to_gpuarray(inputs[0][0])
# I thought we'd need to change the type on output_pycuda
# so it is complex64, but as it turns out scikits.cuda.fft
# doesn't really care either way and treats the array as
# if it is complex64 anyway.
output_pycuda = to_gpuarray(z[0])
# only initialise plan if necessary
if plan[0] is None or plan_input_shape[0] != input_shape:
plan_input_shape[0] = input_shape
plan[0] = fft.Plan(input_shape[1:], np.float32, np.complex64,
batch=input_shape[0])
fft.fft(input_pycuda, output_pycuda, plan[0])
开发者ID:Ambier,项目名称:Theano,代码行数:33,代码来源:fftconv.py
示例5: gpu_r2c_fft
def gpu_r2c_fft(in1, is_gpuarray=False, store_on_gpu=False):
"""
This function makes use of the scikits implementation of the FFT for GPUs to take the real to complex FFT.
INPUTS:
in1 (no default): The array on which the FFT is to be performed.
is_gpuarray (default=True): Boolean specifier for whether or not input is on the gpu.
store_on_gpu (default=False): Boolean specifier for whether the result is to be left on the gpu or not.
OUTPUTS:
gpu_out1 The gpu array containing the result.
OR
gpu_out1.get() The result from the gpu array.
"""
if is_gpuarray:
gpu_in1 = in1
else:
gpu_in1 = gpuarray.to_gpu_async(in1.astype(np.float32))
output_size = np.array(in1.shape)
output_size[1] = 0.5*output_size[1] + 1
gpu_out1 = gpuarray.empty([output_size[0], output_size[1]], np.complex64)
gpu_plan = Plan(gpu_in1.shape, np.float32, np.complex64)
fft(gpu_in1, gpu_out1, gpu_plan)
if store_on_gpu:
return gpu_out1
else:
return gpu_out1.get()
开发者ID:AstroChem,项目名称:PyMORESANE,代码行数:31,代码来源:iuwt_convolution.py
示例6: test_fft_float64_to_complex128
def test_fft_float64_to_complex128(self):
x = np.asarray(np.random.rand(self.N), np.float64)
xf = np.fft.fft(x)
x_gpu = gpuarray.to_gpu(x)
xf_gpu = gpuarray.empty(self.N/2+1, np.complex128)
plan = fft.Plan(x.shape, np.float64, np.complex128)
fft.fft(x_gpu, xf_gpu, plan)
assert np.allclose(xf[0:self.N/2+1], xf_gpu.get(), atol=atol_float64)
开发者ID:jfrelinger,项目名称:scikits.cuda,代码行数:8,代码来源:test_fft.py
示例7: test_batch_fft_float64_to_complex128_2d
def test_batch_fft_float64_to_complex128_2d(self):
x = np.asarray(np.random.rand(self.B, self.N, self.M), np.float64)
xf = np.fft.rfftn(x, axes=(1,2))
x_gpu = gpuarray.to_gpu(x)
xf_gpu = gpuarray.empty((self.B, self.N, self.M/2+1), np.complex128)
plan = fft.Plan([self.N, self.M], np.float64, np.complex128, batch=self.B)
fft.fft(x_gpu, xf_gpu, plan)
assert np.allclose(xf, xf_gpu.get(), atol=atol_float64)
开发者ID:GiladAmar,项目名称:scikits.cuda,代码行数:8,代码来源:test_fft.py
示例8: test_batch_fft_float64_to_complex128_1d
def test_batch_fft_float64_to_complex128_1d(self):
x = np.asarray(np.random.rand(self.B, self.N), np.float64)
xf = np.fft.rfft(x, axis=1)
x_gpu = gpuarray.to_gpu(x)
xf_gpu = gpuarray.empty((self.B, self.N/2+1), np.complex128)
plan = fft.Plan(x.shape[1], np.float64, np.complex128, batch=self.B)
fft.fft(x_gpu, xf_gpu, plan)
assert np.allclose(xf, xf_gpu.get(), atol=atol_float64)
开发者ID:GiladAmar,项目名称:scikits.cuda,代码行数:8,代码来源:test_fft.py
示例9: test_fft_float32_to_complex64_2d
def test_fft_float32_to_complex64_2d(self):
x = np.asarray(np.random.rand(self.N, self.M), np.float32)
xf = np.fft.rfftn(x)
x_gpu = gpuarray.to_gpu(x)
xf_gpu = gpuarray.empty((self.N, self.M/2+1), np.complex64)
plan = fft.Plan(x.shape, np.float32, np.complex64)
fft.fft(x_gpu, xf_gpu, plan)
assert np.allclose(xf, xf_gpu.get(), atol=atol_float32)
开发者ID:GiladAmar,项目名称:scikits.cuda,代码行数:8,代码来源:test_fft.py
示例10: rfft2
def rfft2(self, i, o = None, cache = True):
shape = i.shape[:-2]
rshape = i.shape[-2:]
cshape = (rshape[0], rshape[1]/2+1)
batch = np.prod(shape, dtype=np.int)
plan = self.get_plan(cache, rshape, self.rtype, self.ctype, batch)
if o is None:
o = self.context.empty(shape+cshape, self.ctype)
cu_fft.fft(i, o, plan, scale=False)
return o
开发者ID:EelcoHoogendoorn,项目名称:ThreadPy,代码行数:10,代码来源:Context.py
示例11: test_multiple_streams
def test_multiple_streams(self):
x = np.asarray(np.random.rand(self.N), np.float32)
xf = np.fft.fft(x)
y = np.asarray(np.random.rand(self.N), np.float32)
yf = np.fft.fft(y)
x_gpu = gpuarray.to_gpu(x)
y_gpu = gpuarray.to_gpu(y)
xf_gpu = gpuarray.empty(self.N/2+1, np.complex64)
yf_gpu = gpuarray.empty(self.N/2+1, np.complex64)
stream0 = drv.Stream()
stream1 = drv.Stream()
plan1 = fft.Plan(x.shape, np.float32, np.complex64, stream=stream0)
plan2 = fft.Plan(y.shape, np.float32, np.complex64, stream=stream1)
fft.fft(x_gpu, xf_gpu, plan1)
fft.fft(y_gpu, yf_gpu, plan2)
assert np.allclose(xf[0:self.N/2+1], xf_gpu.get(), atol=atol_float32)
assert np.allclose(yf[0:self.N/2+1], yf_gpu.get(), atol=atol_float32)
开发者ID:jfrelinger,项目名称:scikits.cuda,代码行数:17,代码来源:test_fft.py
示例12: convol
def convol(self, data1, data2):
self.init()
self.ctx.push()
plan = self.__class__.plans[self.shape]
data1_gpu = self.__class__.data1_gpus[self.shape]
data2_gpu = self.__class__.data2_gpus[self.shape]
data1_gpu.set(data1.astype(numpy.complex128))
cu_fft.fft(data1_gpu, data1_gpu, plan)
data2_gpu.set(data2.astype(numpy.complex128))
cu_fft.fft(data2_gpu, data2_gpu, plan)
# data1_gpu *= data2_gpu.conj()
self.multconj(data1_gpu, data2_gpu)
cu_fft.ifft(data1_gpu, data1_gpu, plan, True)
# self.ctx.synchronize()
res = data1_gpu.get().real
self.ctx.pop()
return res
开发者ID:pierrepaleo,项目名称:directConvolution,代码行数:17,代码来源:fft.py
示例13: cufft
def cufft(data,shape=None,inverse=False):
if shape:
data = pad2(data,shape)
plan = CUFFT_PLANS.get(data.shape)
if not plan:
plan = cu_fft.Plan(data.shape,np.complex64,np.complex64)
CUFFT_PLANS[data.shape] = plan
gpu_data = gpuarray.to_gpu(np.cast[np.complex64](data))
if inverse:
cu_fft.ifft(gpu_data,gpu_data,plan)
else:
cu_fft.fft(gpu_data,gpu_data,plan)
r = gpu_data.get()
return r
开发者ID:yamins81,项目名称:v1framework,代码行数:18,代码来源:v1_pyfft.py
示例14: fft
def fft(invec,outvec,prec,itype,otype):
cuplan = _get_fwd_plan(invec.dtype,outvec.dtype,len(invec))
cu_fft.fft(invec.data,outvec.data,cuplan)
开发者ID:AbhayMK,项目名称:pycbc,代码行数:3,代码来源:cufft.py
示例15: sample_defrost_gpu
def sample_defrost_gpu(lat, func, gamma, m2_eff):
"""Calculates a sample of random values in the lattice
lat = Lattice
func = name of Cuda kernel
n = size of cubic lattice
gamma = -0.25 or +0.25
m2_eff = effective mass
This uses CuFFT to calculate FFTW.
"""
import scikits.cuda.fft as fft
import fftw3
"Various constants:"
mpl = lat.mpl
n = lat.n
nn = lat.nn
os = 16
nos = n*pow(os,2)
dk = lat.dk
dx = lat.dx
dkos = dk/(2.*os)
dxos = dx/os
kcut = nn*dk/2.0
norm = 0.5/(math.sqrt(2*pi*dk**3.)*mpl)*(dkos/dxos)
ker = np.empty(nos,dtype = lat.prec_real)
fft1 = fftw3.Plan(ker,ker, direction='forward', flags=['measure'],
realtypes = ['realodd 10'])
for k in xrange(nos):
kk = (k+0.5)*dkos
ker[k]=kk*(kk**2. + m2_eff)**gamma*math.exp(-(kk/kcut)**2.)
fft1.execute()
fftw3.destroy_plan(fft1)
for k in xrange(nos):
ker[k] = norm*ker[k]/(k+1)
Fk_gpu = gpuarray.zeros((n/2+1,n,n), dtype = lat.prec_complex)
ker_gpu = gpuarray.to_gpu(ker)
tmp_gpu = gpuarray.zeros((n,n,n),dtype = lat.prec_real)
plan = fft.Plan(tmp_gpu.shape, lat.prec_real, lat.prec_complex)
plan2 = fft.Plan(tmp_gpu.shape, lat.prec_complex, lat.prec_real)
func(tmp_gpu, ker_gpu, np.uint32(nn), np.float64(os),
np.uint32(lat.dimx), np.uint32(lat.dimy), np.uint32(lat.dimz),
block = lat.cuda_block_1, grid = lat.cuda_grid)
fft.fft(tmp_gpu, Fk_gpu, plan)
if lat.test==True:
print'Testing mode on! Set testQ to False to disable this.\n'
np.random.seed(1)
rr1 = (np.random.normal(size=Fk_gpu.shape)+
np.random.normal(size=Fk_gpu.shape)*1j)
Fk = Fk_gpu.get()
Fk*= rr1
Fk_gpu = gpuarray.to_gpu(Fk)
fft.ifft(Fk_gpu, tmp_gpu, plan2)
res = (tmp_gpu.get()).astype(lat.prec_real)
res *= 1./lat.VL
return res
开发者ID:jtksai,项目名称:PyCOOL,代码行数:71,代码来源:field_init.py
示例16: cuda_gridvis
#.........这里部分代码省略.........
d_re = gpu.to_gpu(h_re)
d_im = gpu.to_gpu(h_im)
d_cnt = gpu.zeros((np.int(nx),np.int(nx)),np.int32)
d_grd = gpu.zeros((np.int(nx),np.int(nx)),np.complex64)
d_ngrd = gpu.zeros_like(d_grd)
d_bm = gpu.zeros_like(d_grd)
d_nbm = gpu.zeros_like(d_grd)
d_fim = gpu.zeros((np.int(imsize),np.int(imsize)),np.float32)
## define kernel parameters
blocksize2D = (8,16,1)
gridsize2D = (np.int(np.ceil(1.*nx/blocksize2D[0])),np.int(np.ceil(1.*nx/blocksize2D[1])))
blocksizeF2D = (16,16,1)
gridsizeF2D = (np.int(np.ceil(1.*imsize/blocksizeF2D[0])),np.int(np.ceil(1.*imsize/blocksizeF2D[1])))
blocksize1D = (256,1,1)
gridsize1D = (np.int(np.ceil(1.*gcount/blocksize1D[0])),1)
# ------------------------
# make gridding kernels
# ------------------------
## make spheroidal convolution kernel (don't mess with these!)
width = 6.
ngcf = 24.
h_cgf = gcf(ngcf,width)
## make grid correction
h_corr = corrfun(nx,width)
d_cgf = module.get_global('cgf')[0]
d_corr = gpu.to_gpu(h_corr)
cu.memcpy_htod(d_cgf,h_cgf)
# ------------------------
# grid it up
# ------------------------
d_umax = gpu.max(cumath.fabs(d_u))
d_vmax = gpu.max(cumath.fabs(d_v))
umax = np.int32(np.ceil(d_umax.get()/du))
vmax = np.int32(np.ceil(d_vmax.get()/du))
## grid ($$)
# This should be improvable via:
# - shared memory solution? I tried...
# - better coalesced memory access? I tried...
# - reorganzing and indexing UV data beforehand?
# (i.e. http://www.nvidia.com/docs/IO/47905/ECE757_Project_Report_Gregerson.pdf)
# - storing V(u,v) in texture memory?
gridVis_wBM_kernel(d_grd,d_bm,d_cnt,d_u,d_v,d_re,d_im,nx,du,gcount,umax,vmax,\
block=blocksize2D,grid=gridsize2D)
## apply weights
wgtGrid_kernel(d_bm,d_cnt,briggs,nx,block=blocksize2D,grid=gridsize2D)
hfac = np.int32(1)
dblGrid_kernel(d_bm,nx,hfac,block=blocksize2D,grid=gridsize2D)
shiftGrid_kernel(d_bm,d_nbm,nx,block=blocksize2D,grid=gridsize2D)
## normalize
wgtGrid_kernel(d_grd,d_cnt,briggs,nx,block=blocksize2D,grid=gridsize2D)
## Reflect grid about v axis
hfac = np.int32(-1)
dblGrid_kernel(d_grd,nx,hfac,block=blocksize2D,grid=gridsize2D)
## Shift both
shiftGrid_kernel(d_grd,d_ngrd,nx,block=blocksize2D,grid=gridsize2D)
# ------------------------
# Make the beam
# ------------------------
## Transform to image plane
fft.fft(d_nbm,d_bm,plan)
## Shift
shiftGrid_kernel(d_bm,d_nbm,nx,block=blocksize2D,grid=gridsize2D)
## Correct for C
corrGrid_kernel(d_nbm,d_corr,nx,block=blocksize2D,grid=gridsize2D)
# Trim
trimIm_kernel(d_nbm,d_fim,noff,nx,imsize,block=blocksizeF2D,grid=gridsizeF2D)
## Normalize
d_bmax = gpu.max(d_fim)
bmax = d_bmax.get()
bmax = np.float32(1./bmax)
nrmBeam_kernel(d_fim,bmax,imsize,block=blocksizeF2D,grid=gridsizeF2D)
## Pull onto CPU
dpsf = d_fim.get()
# ------------------------
# Make the map
# ------------------------
## Transform to image plane
fft.fft(d_ngrd,d_grd,plan)
## Shift
shiftGrid_kernel(d_grd,d_ngrd,nx,block=blocksize2D,grid=gridsize2D)
## Correct for C
corrGrid_kernel(d_ngrd,d_corr,nx,block=blocksize2D,grid=gridsize2D)
## Trim
trimIm_kernel(d_ngrd,d_fim,noff,nx,imsize,block=blocksizeF2D,grid=gridsizeF2D)
## Normalize (Jy/beam)
nrmGrid_kernel(d_fim,bmax,imsize,block=blocksizeF2D,grid=gridsizeF2D)
## Finish timers
t_end=time.time()
t_full=t_end-t_start
print "Gridding execution time %0.5f"%t_full+' s'
print "\t%0.5f"%(t_full/gcount)+' s per visibility'
## Return dirty psf (CPU) and dirty image (GPU)
return dpsf,d_fim
开发者ID:shaoguangleo,项目名称:autoFits,代码行数:101,代码来源:gICLEAN.py
示例17: fft_resample
def fft_resample(x, W, new_len, npad, to_remove, cuda_dict=dict(use_cuda=False)):
"""Do FFT resampling with a filter function (possibly using CUDA)
Parameters
----------
x : 1-d array
The array to resample.
W : 1-d array or gpuarray
The filtering function to apply.
new_len : int
The size of the output array (before removing padding).
npad : int
Amount of padding to apply before resampling.
to_remove : int
Number of samples to remove after resampling.
cuda_dict : dict
Dictionary constructed using setup_cuda_multiply_repeated().
Returns
-------
x : 1-d array
Filtered version of x.
"""
# add some padding at beginning and end to make this work a little cleaner
x = _smart_pad(x, npad)
old_len = len(x)
shorter = new_len < old_len
if not cuda_dict["use_cuda"]:
N = int(min(new_len, old_len))
sl_1 = slice((N + 1) // 2)
y_fft = np.zeros(new_len, np.complex128)
x_fft = fft(x).ravel() * W
y_fft[sl_1] = x_fft[sl_1]
sl_2 = slice(-(N - 1) // 2, None)
y_fft[sl_2] = x_fft[sl_2]
y = np.real(ifft(y_fft, overwrite_x=True)).ravel()
else:
cuda_dict["x"].set(np.concatenate((x, np.zeros(max(new_len - old_len, 0), x.dtype))))
# do the fourier-domain operations, results put in second param
cudafft.fft(cuda_dict["x"], cuda_dict["x_fft"], cuda_dict["fft_plan"])
cuda_multiply_inplace_c128(W, cuda_dict["x_fft"])
# This is not straightforward, but because x_fft and y_fft share
# the same data (and only one half of the full DFT is stored), we
# don't have to transfer the slice like we do in scipy. All we
# need to worry about is the Nyquist component, either halving it
# or taking just the real component...
use_len = new_len if shorter else old_len
func = cuda_real_c128 if shorter else cuda_halve_c128
if use_len % 2 == 0:
nyq = int((use_len - (use_len % 2)) // 2)
func(cuda_dict["x_fft"], slice=slice(nyq, nyq + 1))
cudafft.ifft(cuda_dict["x_fft"], cuda_dict["x"], cuda_dict["ifft_plan"], scale=False)
y = cuda_dict["x"].get()[: new_len if shorter else None]
# now let's trim it back to the correct size (if there was padding)
if to_remove > 0:
keep = np.ones((new_len), dtype="bool")
keep[:to_remove] = False
keep[-to_remove:] = False
y = np.compress(keep, y)
return y
开发者ID:TanayGahlot,项目名称:mne-python,代码行数:62,代码来源:cuda.py
示例18: int
ii = 0
tmpimg = numpy.zeros((n, m, k), dtype=numpy.float32)
ln = sq + 5
mags = mag[indexp].sum()
del indexp
s = 3
N2 = int(N * 0.7)
N3 = int(N * 0.7)
gpu_data.set(sobject.astype(numpy.complex64))
pycuda.driver.memcpy_dtod(gpu_last.gpudata, gpu_data.gpudata, gpu_data.nbytes)
gpu_intensity.set(mag)
gpu_mask.set(sobm)
#print real_space.nbytes
for i in range(N):
t0 = time()
cu_fft.fft(gpu_data, gpu_data, plan)
constrains_fourier(gpu_data, gpu_intensity)
cu_fft.ifft(gpu_data, gpu_data, plan, True)
constrains_real(gpu_data, gpu_last, gpu_mask, beta)
pycuda.driver.memcpy_dtod(gpu_last.gpudata, gpu_data.gpudata, gpu_data.nbytes)
t1 = time()
ctx.synchronize()
t2 = time()
print("With CUDA, the full loop took %.3fs but after sync %.3fs" % (t1 - t0, t2 - t0))
del tmpimg
print "it took", time() - time0, N / (time() - time0)
print "smallest error", serr, "number", nerr
开发者ID:kif,项目名称:pycdi,代码行数:30,代码来源:pycid3D_cuda.py
示例19:
import numpy as np
import scikits.cuda.fft as cu_fft
print 'Testing fft/ifft..'
N = 4096*16
batch_size = 16
x = np.asarray(np.random.rand(batch_size, N), np.float32)
xf = np.fft.fft(x)
y = np.real(np.fft.ifft(xf))
x_gpu = gpuarray.to_gpu(x)
xf_gpu = gpuarray.empty((batch_size, N/2+1), np.complex64)
plan_forward = cu_fft.Plan(N, np.float32, np.complex64, batch_size)
cu_fft.fft(x_gpu, xf_gpu, plan_forward)
y_gpu = gpuarray.empty_like(x_gpu)
plan_inverse = cu_fft.Plan(N, np.complex64, np.float32, batch_size)
cu_fft.ifft(xf_gpu, y_gpu, plan_inverse, True)
print 'Success status: ', np.allclose(y, y_gpu.get(), atol=1e-6)
print 'Testing in-place fft..'
x = np.asarray(np.random.rand(batch_size, N)+\
1j*np.random.rand(batch_size, N), np.complex64)
x_gpu = gpuarray.to_gpu(x)
plan = cu_fft.Plan(N, np.complex64, np.complex64, batch_size)
cu_fft.fft(x_gpu, x_gpu, plan)
开发者ID:Lurkman,项目名称:scikits.cuda,代码行数:30,代码来源:fft_batch_demo.py
注:本文中的scikits.cuda.fft.fft函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论