本文整理汇总了Python中pycuda.gpuarray.empty_like函数的典型用法代码示例。如果您正苦于以下问题:Python empty_like函数的具体用法?Python empty_like怎么用?Python empty_like使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了empty_like函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: calc_x_G
def calc_x_G(Kp1, C, Cm1, rp1, lm2, Am1, A, Ap1, lm1_s, lm1_si, r_s, r_si, Vsh, handle=None):
D = A[0].shape[1]
Dm1 = A[0].shape[0]
q = len(A)
x = garr.zeros((Dm1, q * D - Dm1), dtype=A[0].dtype)
x_part = garr.empty_like(x)
x_subpart = garr.empty_like(A[0])
if not (C is None and Kp1 is None):
assert (not C is None) and (not Kp1 is None)
x_part.fill(0)
for s in range(q):
x_subpart = eps_r(rp1, C[s], Ap1, x_subpart, handle) #~1st line
x_subpart += cla.dot(A[s], Kp1, handle=handle) #~3rd line
x_part += cla.dot(cla.dot(x_subpart, r_si, handle=handle), Vsh[s], handle=handle)
x += cla.dot(lm1_s, x_part, handle=handle)
if not lm2 is None:
x_part.fill(0)
for s in range(q): #~2nd line
x_subpart = eps_l(lm2, Am1, Cm1[s], x_subpart, handle)
x_part += cla.dot(x_subpart, cla.dot(r_s, Vsh[s], handle=handle), handle=handle)
x += cla.dot(lm1_si, x_part, handle=handle)
return x
开发者ID:amilsted,项目名称:evoMPS,代码行数:29,代码来源:cuda_alternatives.py
示例2: gpubarlinedata
def gpubarlinedata(xdata, ydata, bins, minval=None, maxval=None):
if maxval == None:
maxval = gpumax(xdata)
if minval == None:
minval = gpumin(xdata)
binsize = (maxval - minval) / float(bins)
inbin = gpuarray.empty_like(xdata)
select = gpuarray.empty_like(xdata)
xmeans = []
ymeans = []
errors = []
for i in xrange(bins):
lo = minval + binsize * i
hi = minval + binsize * (i + 1)
gpubarlinekerna(xdata, lo, hi, inbin)
N = gpusum(inbin)
if N > 1:
gpubarlinekernb(inbin, ydata, select)
my = gpusum(select) / float(N)
gpubarlinekernb(inbin, xdata, select)
mx = gpusum(select) / float(N)
gpubarlinekernc(inbin, ydata, my, select)
s = sqrt(gpusum(select) / (N * (N - 1)))
xmeans.append(mx)
ymeans.append(my)
errors.append(s)
return (xmeans, ymeans, errors)
开发者ID:michaelerule,项目名称:neurotools,代码行数:27,代码来源:statistics.py
示例3: integrate
def integrate(stepsize=0.01, stores=5, steps=10000, number_of_particles=2 ** 10):
gpu_r, gpu_v, gpu_mass = create_particles(number_of_particles)
number_of_particles = np.int32(number_of_particles)
gpu_rs, gpu_vs = [gpu_r], [gpu_v]
for i in xrange(stores - 1):
gpu_rs.append(gpuarray.empty_like(gpu_r))
gpu_vs.append(gpuarray.empty_like(gpu_v))
advance = SourceModule(advance_kernel).get_function("advance")
advance.prepare([np.intp, np.intp, np.intp, np.intp, np.intp, np.int32])
block_size = (32, 0, 0)
grid_size = (int(number_of_particles / 32), 0, 0)
advance.prepared_call(block_size, grid_size, gpu_r[0], gpu_v[0], gpu_mass, gpu_r[1], gpu_v[1], number_of_particles)
old, new = 1, 2
for i in xrange(steps):
r = rs_gpu[old].get_async()
v = vs_gpu[old].get_async()
advance.prepared_call_async(
block_size, grid_size, gpu_rs[old], gpu_vs[old], gpu_mass, gpu_rs[new], gpu_vs[new], number_of_particles
)
np.write("step{i:4}_r".format(i * stepsize) + ".dat", r)
np.write("step{i:4}_v".format(i * stepsize) + ".dat", r)
old, new = new, (new + 1) % stores
开发者ID:tricecold,项目名称:pynbody,代码行数:29,代码来源:nbody.py
示例4: sici
def sici(x_gpu):
"""
Sine/Cosine integral.
Computes the sine and cosine integral of every element in the
input matrix.
Parameters
----------
x_gpu : GPUArray
Input matrix of shape `(m, n)`.
Returns
-------
(si_gpu, ci_gpu) : tuple of GPUArrays
Tuple of GPUarrays containing the sine integrals and cosine
integrals of the entries of `x_gpu`.
Examples
--------
>>> import pycuda.gpuarray as gpuarray
>>> import pycuda.autoinit
>>> import numpy as np
>>> import scipy.special
>>> import special
>>> x = np.array([[1, 2], [3, 4]], np.float32)
>>> x_gpu = gpuarray.to_gpu(x)
>>> (si_gpu, ci_gpu) = sici(x_gpu)
>>> (si, ci) = scipy.special.sici(x)
>>> np.allclose(si, si_gpu.get())
True
>>> np.allclose(ci, ci_gpu.get())
True
"""
if x_gpu.dtype == np.float32:
args = 'float *x, float *si, float *ci'
op = 'sicif(x[i], &si[i], &ci[i])'
elif x_gpu.dtype == np.float64:
args = 'double *x, double *si, double *ci'
op = 'sici(x[i], &si[i], &ci[i])'
else:
raise ValueError('unsupported type')
try:
func = sici.cache[x_gpu.dtype]
except KeyError:
func = elementwise.ElementwiseKernel(args, op,
options=["-I", install_headers],
preamble='#include "cuSpecialFuncs.h"')
sici.cache[x_gpu.dtype] = func
si_gpu = gpuarray.empty_like(x_gpu)
ci_gpu = gpuarray.empty_like(x_gpu)
func(x_gpu, si_gpu, ci_gpu)
return (si_gpu, ci_gpu)
开发者ID:Lurkman,项目名称:scikits.cuda,代码行数:57,代码来源:special.py
示例5: make_GPU_gradient
def make_GPU_gradient(mesh, context):
'''Prepare to compute gradient on the GPU w.r.t. the given mesh.
Return gradient function.
'''
mx = int(getattr(mesh, 'nx', 1))
my = int(getattr(mesh, 'ny', 1))
mz = int(getattr(mesh, 'nz', 1))
dxInv = np.array(1./getattr(mesh, 'dx', 1), dtype=np.float64)
dyInv = np.array(1./getattr(mesh, 'dy', 1), dtype=np.float64)
dzInv = np.array(1./getattr(mesh, 'dz', 1), dtype=np.float64)
sizeof_double = 8
with open(where + 'gradient2.cu') as fdlib:
source = fdlib.read()
module = SourceModule(source)
mx_ptr = module.get_global("mx")[0]
my_ptr = module.get_global("my")[0]
mz_ptr = module.get_global("mz")[0]
cuda.memcpy_htod(mx_ptr, np.array(mx, dtype=np.int32))
cuda.memcpy_htod(my_ptr, np.array(my, dtype=np.int32))
cuda.memcpy_htod(mz_ptr, np.array(mz, dtype=np.int32))
dxInv_ptr = module.get_global("dxInv")[0]
dyInv_ptr = module.get_global("dyInv")[0]
dzInv_ptr = module.get_global("dzInv")[0]
cuda.memcpy_htod(dxInv_ptr, dxInv)
cuda.memcpy_htod(dyInv_ptr, dyInv)
cuda.memcpy_htod(dzInv_ptr, dzInv)
deriv_x = module.get_function("gradient_x")
deriv_y = module.get_function("gradient_y")
deriv_z = module.get_function("gradient_z")
block, grid = mesh.get_domain_decomposition(DeviceData().max_threads)
d_deriv_x = gpuarray.empty(shape=(1, mesh.n_nodes), dtype=np.float64)
d_deriv_y = gpuarray.empty_like(d_deriv_x)
d_deriv_z = gpuarray.empty_like(d_deriv_x)
def _gradient(scalar_values):
'''Calculate three-dimensional gradient for GPUArray
scalar_values.
'''
deriv_x(scalar_values, d_deriv_x, block=block, grid=grid)
deriv_y(scalar_values, d_deriv_y, block=block, grid=grid)
deriv_z(scalar_values, d_deriv_z, block=block, grid=grid)
context.synchronize()
return (d_deriv_x, d_deriv_y, d_deriv_z)[:mesh.dimension]
return _gradient
开发者ID:aoeftiger,项目名称:PyPIC,代码行数:52,代码来源:gradient.py
示例6: __init__
def __init__(self,n_units,n_incoming,N,init_sd=1.0,precision=np.float32,magic_numbers=False):
self.n_units = n_units
self.n_incoming = n_incoming
self.N = N
w = np.random.normal(0,init_sd,(self.n_incoming,self.n_units))
b = np.random.normal(0,init_sd,(1,n_units))
self.weights = gpuarray.to_gpu(w.copy().astype(precision))
self.gW = gpuarray.empty_like(self.weights)
# Prior and ID must be set after creation
self.prior = -1
self.ID = -1
self.biases = gpuarray.to_gpu(b.copy().astype(precision))
self.gB = gpuarray.empty_like(self.biases)
#Set up momentum variables for HMC sampler
self.pW = gpuarray.to_gpu(np.random.normal(0,1,self.gW.shape))
self.pB = gpuarray.to_gpu(np.random.normal(0,1,self.gB.shape))
self.epsW = gpuarray.zeros(self.weights.shape,precision) + 1.0
self.epsB = gpuarray.zeros(self.biases.shape,precision) + 1.0
self.precision = precision
self.outputs = gpuarray.zeros((self.N,self.n_units),precision)
self.magic_numbers = magic_numbers
#Define tan_h function on GPU
if magic_numbers:
self.tanh = ElementwiseKernel(
"float *x",
"x[i] = 1.7159 * tanh(2/3*x[i]);",
"tan_h",preamble="#include <math.h>")
else:
self.tanh = ElementwiseKernel(
"float *x",
"x[i] = tanh(min(max(-10.0,x[i]),10.0));",
"tan_h",preamble="#include <math.h>")
#Compile kernels
kernels = SourceModule(open(path+'/kernels.cu', "r").read())
self.add_bias_kernel = kernels.get_function("add_bias")
self.rng = curandom.XORWOWRandomNumberGenerator()
##Initialize posterior weights
self.posterior_weights = list()
self.posterior_biases = list()
开发者ID:beamandrew,项目名称:BNN,代码行数:49,代码来源:Layer.py
示例7: e1z
def e1z(z_gpu):
"""
Exponential integral with `n = 1` of complex arguments.
Parameters
----------
x_gpu : GPUArray
Input matrix of shape `(m, n)`.
Returns
-------
e_gpu : GPUArray
GPUarrays containing the exponential integrals of
the entries of `z_gpu`.
Examples
--------
>>> import pycuda.gpuarray as gpuarray
>>> import pycuda.autoinit
>>> import numpy as np
>>> import scipy.special
>>> import special
>>> z = np.asarray(np.random.rand(4, 4)+1j*np.random.rand(4, 4), np.complex64)
>>> z_gpu = gpuarray.to_gpu(z)
>>> e_gpu = e1z(z_gpu, pycuda.autoinit.device)
>>> e_sp = scipy.special.exp1(z)
>>> np.allclose(e_sp, e_gpu.get())
True
"""
if z_gpu.dtype == np.complex64:
use_double = 0
elif z_gpu.dtype == np.complex128:
use_double = 1
else:
raise ValueError('unsupported type')
# Get block/grid sizes; the number of threads per block is limited
# to 256 because the e1z kernel defined above uses too many
# registers to be invoked more threads per block:
dev = get_current_device()
max_threads_per_block = 256
block_dim, grid_dim = select_block_grid_sizes(dev, z_gpu.shape, max_threads_per_block)
# Set this to False when debugging to make sure the compiled kernel is
# not cached:
cache_dir=None
e1z_mod = \
SourceModule(e1z_mod_template.substitute(use_double=use_double),
cache_dir=cache_dir)
e1z_func = e1z_mod.get_function("e1z")
e_gpu = gpuarray.empty_like(z_gpu)
e1z_func(z_gpu, e_gpu,
np.uint32(z_gpu.size),
block=block_dim,
grid=grid_dim)
return e_gpu
开发者ID:sequoiar,项目名称:scikits.cuda,代码行数:60,代码来源:special.py
示例8: test_cublas_bug
def test_cublas_bug():
'''
The SGEMM call would cause all calls after it to fail for some unknown
reason. Likely this is caused swaprows causing memory corruption.
NOTE: this was confirmed by nvidia to be a bug within CUDA, and should be
fixed in CUDA 6.5
'''
from pycuda.driver import Stream
from skcuda.cublas import cublasSgemm
from skcuda.misc import _global_cublas_handle as handle
n = 131
s = slice(128, n)
X = gpuarray.to_gpu(np.random.randn(n, 2483).astype(np.float32))
a = gpuarray.empty((X.shape[1], 3), dtype=np.float32)
c = gpuarray.empty((a.shape[0], X.shape[1]), dtype=np.float32)
b = gpuarray.empty_like(X)
m, n = a.shape[0], b[s].shape[1]
k = a.shape[1]
lda = m
ldb = k
ldc = m
#cublasSgemm(handle, 0, 0, m, n, k, 0.0, b.gpudata, lda, a.gpudata, ldb, 0.0, c.gpudata, ldc)
cublasSgemm(handle, 'n', 'n', m, n, k, 1.0, b[s].gpudata, lda, a.gpudata, ldb, 0.0, c.gpudata, ldc)
#print handle, 'n', 'n', m, n, k, 1.0, b[s].gpudata, lda, a.gpudata, ldb, 0.0, c.gpudata, ldc
#gpuarray.dot(d, Xoutd[s])
#op.sgemm(a, b[s], c)
stream = Stream()
stream.synchronize()
开发者ID:stachon,项目名称:binet,代码行数:34,代码来源:test_op.py
示例9: feed_forward
def feed_forward(self, input_data, prediction=False):
"""Propagate forward through the layer
**Parameters:**
input_data : ``GPUArray``
Inpute data to perform dropout on.
prediction : bool, optional
Whether to use prediction model. If true, then the data is
scaled by ``1 - dropout_probability`` uses dropout.
**Returns:**
dropout_data : ``GPUArray``
The data after performing dropout.
"""
assert input_data.shape[1] == self.n_in
if not prediction:
dropout_input = gpuarray.empty_like(input_data)
dropout_mask = sample_dropout_mask(input_data,
self.dropout_probability, target=dropout_input
)
return dropout_input, dropout_mask
else:
return (input_data * (1 - self.dropout_probability),)
开发者ID:DavidDJChen,项目名称:hebel,代码行数:28,代码来源:input_dropout.py
示例10: initializeGpuMemory
def initializeGpuMemory(self):
K = self.modelParams["proc_id_model","K"]
# Sufficient statistics for the parameters of G kernels
self.gpuPtrs["impulse_model","nnz_Z"] = gpuarray.empty((K,K), dtype=np.int32)
self.gpuPtrs["impulse_model","g_suff_stats"] = gpuarray.empty((K,K), dtype=np.float32)
self.gpuPtrs["impulse_model","GS"] = gpuarray.empty_like(self.base.dSS["dS"])
开发者ID:richardkwo,项目名称:pyhawkes,代码行数:7,代码来源:impulse_models.py
示例11: computeIrDensity
def computeIrDensity(self, dS_gpu):
"""
Compute the impulse response density at the time intervals in dS_gpu
"""
K = self.modelParams["proc_id_model","K"]
N = self.base.data.N
gS_gpu = gpuarray.empty_like(dS_gpu)
# Update GS using the impulse response parameters
grid_w = int(np.ceil(N/1024.0))
self.gpuKernels["computeLogisticNormalGSIndiv"](np.int32(K),
np.int32(self.base.data.N),
self.gpuPtrs["proc_id_model","C"].gpudata,
self.base.dSS["rowIndices"].gpudata,
self.base.dSS["colPtrs"].gpudata,
self.gpuPtrs["impulse_model","g_mu"].gpudata,
self.gpuPtrs["impulse_model","g_tau"].gpudata,
np.float32(self.params["dt_max"]),
dS_gpu.gpudata,
gS_gpu.gpudata,
block=(1024, 1, 1),
grid=(grid_w,1)
)
return gS_gpu
开发者ID:richardkwo,项目名称:pyhawkes,代码行数:25,代码来源:impulse_models.py
示例12: feed_forward
def feed_forward(self, input_data, prediction=False):
"""Propagate forward through the layer
**Parameters:**
input_data : ``GPUArray``
Inpute data to perform dropout on.
prediction : bool, optional
Whether to use prediction model. If true, then the data is
scaled by ``1 - dropout_probability`` uses dropout.
**Returns:**
dropout_data : ``GPUArray``
The data after performing dropout.
"""
if input_data.shape[1] != self.n_in:
raise ValueError('Number of outputs from previous layer (%d) '
'does not match number of inputs to this layer (%d)' %
(input_data.shape[1], self.n_in))
if not prediction:
dropout_input = gpuarray.empty_like(input_data)
dropout_mask = sample_dropout_mask(input_data,
self.dropout_probability, target=dropout_input
)
return dropout_input, dropout_mask
else:
return (input_data * (1 - self.dropout_probability),)
开发者ID:hani1986ye,项目名称:hebel,代码行数:31,代码来源:input_dropout.py
示例13: e1z
def e1z(z_gpu, dev):
"""
Exponential integral with `n = 1` of complex arguments.
Parameters
----------
x_gpu : GPUArray
Input matrix of shape `(m, n)`.
dev : pycuda.driver.Device
Device object to be used.
Returns
-------
e_gpu : GPUArray
GPUarrays containing the exponential integrals of
the entries of `z_gpu`.
Examples
--------
>>> import pycuda.gpuarray as gpuarray
>>> import pycuda.autoinit
>>> import numpy as np
>>> import scipy.special
>>> import special
>>> z = np.asarray(np.random.rand(4, 4)+1j*np.random.rand(4, 4), np.complex64)
>>> z_gpu = gpuarray.to_gpu(z)
>>> e_gpu = e1z(z_gpu, pycuda.autoinit.device)
>>> e_sp = scipy.special.exp1(z)
>>> np.allclose(e_sp, e_gpu.get())
True
"""
if z_gpu.dtype == np.complex64:
use_double = 0
elif z_gpu.dtype == np.complex128:
use_double = 1
else:
raise ValueError("unsupported type")
# Get block/grid sizes:
max_threads_per_block, max_block_dim, max_grid_dim = get_dev_attrs(dev)
block_dim, grid_dim = select_block_grid_sizes(dev, z_gpu.shape)
max_blocks_per_grid = max(max_grid_dim)
# Set this to False when debugging to make sure the compiled kernel is
# not cached:
cache_dir = None
e1z_mod = SourceModule(
e1z_mod_template.substitute(
use_double=use_double, max_threads_per_block=max_threads_per_block, max_blocks_per_grid=max_blocks_per_grid
),
cache_dir=cache_dir,
options=["-I", install_headers],
)
e1z_func = e1z_mod.get_function("e1z")
e_gpu = gpuarray.empty_like(z_gpu)
e1z_func(z_gpu.gpudata, e_gpu.gpudata, np.uint32(z_gpu.size), block=block_dim, grid=grid_dim)
return e_gpu
开发者ID:stefanv,项目名称:scikits.cuda,代码行数:60,代码来源:special.py
示例14: worker
def worker():
comm = MPI.Comm.Get_parent()
size = comm.Get_size()
rank = comm.Get_rank()
name = MPI.Get_processor_name()
import pycuda.driver as drv
drv.init()
# Find maximum number of available GPUs:
max_gpus = drv.Device.count()
# Use modular arithmetic to avoid assigning a nonexistent GPU:
n = rank % max_gpus
dev = drv.Device(n)
ctx = dev.make_context()
atexit.register(ctx.pop)
# Execute a kernel:
import pycuda.gpuarray as gpuarray
from pycuda.elementwise import ElementwiseKernel
kernel = ElementwiseKernel('double *y, double *x, double a',
'y[i] = a*x[i]')
x_gpu = gpuarray.to_gpu(np.random.rand(2))
y_gpu = gpuarray.empty_like(x_gpu)
kernel(y_gpu, x_gpu, np.double(2.0))
print 'I am process %d of %d on CPU %s using GPU %s of %s [x_gpu=%s, y_gpu=%s]' % \
(rank, size, name, n, max_gpus, str(x_gpu.get()), str(y_gpu.get()))
comm.Disconnect()
开发者ID:lebedov,项目名称:cudamps,代码行数:31,代码来源:demo.py
示例15: mult_matrix
def mult_matrix(a, b, target=None):
assert a.shape == b.shape
if target is None:
target = gpuarray.empty_like(a)
all_kernels["mult_matrix"](a, b, target)
return target
开发者ID:Khodeir,项目名称:hebel,代码行数:7,代码来源:elementwise.py
示例16: run_function
def run_function(X, Y_expected, func, rtol=1e-6, with_inplace_test=True, **kwargs):
# CPU, with target argument
Y = np.empty_like(Y_expected)
Yhr = func(X, out=Y, **kwargs)
assert_allclose(Y_expected, Yhr, err_msg="CPU with target", rtol=rtol)
assert Yhr is Y
# CPU, no target argument
Yhr = func(X, **kwargs)
assert_allclose(Y_expected, Yhr, err_msg="CPU, no target", rtol=rtol)
if with_inplace_test:
X2 = X.copy()
Yhr = func(X2, out=X2, **kwargs)
assert_allclose(Y_expected, Yhr, err_msg="CPU, inplace target", rtol=rtol)
assert Yhr is X2
kwargs = op.to_gpu(kwargs)
# GPU, with target
Xd = op.to_gpu(X)
Yd = gpuarray.empty_like(op.to_gpu(Y_expected))
Ydr = func(Xd, out=Yd, **kwargs)
assert_allclose(Y_expected, op.to_cpu(Ydr), err_msg="GPU with target", rtol=rtol)
assert Ydr is Yd
# GPU, no target
Ydr = func(Xd, **kwargs)
assert_allclose(Y_expected, op.to_cpu(Ydr), err_msg="GPU, no target", rtol=rtol)
if with_inplace_test:
Ydr = func(Xd, out=Xd, **kwargs)
assert_allclose(Y_expected, op.to_cpu(Ydr), err_msg="GPU, inplace target", rtol=rtol)
assert Ydr is Xd
开发者ID:stachon,项目名称:binet,代码行数:34,代码来源:test_op.py
示例17: nan_to_zeros
def nan_to_zeros(x, target=None):
assert x.flags.c_contiguous
if target is None:
target = gpuarray.empty_like(x)
assert target.flags.c_contiguous
all_kernels['nan_to_zeros'](x, target)
return target
开发者ID:DamonAnderson,项目名称:hebel,代码行数:7,代码来源:elementwise.py
示例18: substract_matrix
def substract_matrix(a, b, target=None):
assert a.shape == b.shape
if target is None:
target = gpuarray.empty_like(a)
all_kernels['substract_matrix'](a, b, target)
return target
开发者ID:Snazz2001,项目名称:hebel,代码行数:7,代码来源:elementwise.py
示例19: test
def test():
gpu_func = getattr(cumath, name)
cpu_func = getattr(np, numpy_func_names.get(name, name))
if complex:
_dtypes = complex_dtypes
else:
_dtypes = dtypes
for s in sizes:
for dtype in _dtypes:
np.random.seed(1)
A = (np.random.random(s)*(b-a) + a).astype(dtype)
if complex:
A += (np.random.random(s)*(b-a) + a)*1j
args = gpuarray.to_gpu(A)
gpu_results = gpu_func(args).get()
cpu_results = cpu_func(A)
max_err = np.max(np.abs(cpu_results - gpu_results))
assert (max_err <= threshold).all(), \
(max_err, name, dtype)
gpu_results2 = gpuarray.empty_like(args)
gr2 = gpu_func(args, out=gpu_results2)
assert gpu_results2 is gr2
gr2 = gr2.get()
max_err = np.max(np.abs(cpu_results - gr2))
assert (max_err <= threshold).all(), \
(max_err, name, dtype)
开发者ID:seibert,项目名称:pycuda,代码行数:30,代码来源:test_cumath.py
示例20: buffer_apply
def buffer_apply(self, input):
# TODO: buffer apply to a large input may cause a launch timeout, need to buffer in
# smaller chunks if this is the case
b = self.filt_b_gpu
a = self.filt_a_gpu
zi = self.filt_state
if not hasattr(self, "filt_x_gpu") or input.size != self.filt_x_gpu.size:
self._desiredshape = input.shape
self._has_run_once = False
self.filt_x_gpu = gpuarray.to_gpu(input.flatten())
self.filt_y_gpu = gpuarray.empty_like(self.filt_x_gpu)
else:
self.filt_x_gpu.set(input.flatten())
filt_x_gpu = self.filt_x_gpu
filt_y_gpu = self.filt_y_gpu
if self._has_run_once:
self.gpu_filt_func.launch_grid(*self.grid)
else:
self.gpu_filt_func.prepared_call(
self.grid,
intp(b.gpudata),
intp(a.gpudata),
intp(filt_x_gpu.gpudata),
intp(zi.gpudata),
intp(filt_y_gpu.gpudata),
int32(input.shape[0]),
)
self._has_run_once = True
return reshape(filt_y_gpu.get(pagelocked=self.pagelocked_mem), self._desiredshape)
开发者ID:dkiela,项目名称:thesis,代码行数:29,代码来源:gpulinearfilterbank.py
注:本文中的pycuda.gpuarray.empty_like函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论