本文整理汇总了Python中pyopencl.array.to_device函数的典型用法代码示例。如果您正苦于以下问题:Python to_device函数的具体用法?Python to_device怎么用?Python to_device使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了to_device函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: computeEnergy
def computeEnergy(self, x, y, z, q):
xd = cl_array.to_device(self.queue, x)
yd = cl_array.to_device(self.queue, y)
zd = cl_array.to_device(self.queue, z)
qd = cl_array.to_device(self.queue, q)
coulombEnergy = cl_array.zeros_like(xd)
prec = x.dtype
if prec == numpy.float32:
self.compEnergyF.calc_potential_energy(self.queue,
(x.size, ), None,
xd.data, yd.data, zd.data,
qd.data, coulombEnergy.data, numpy.int32(len(x)),
numpy.float32(self.k),numpy.float32(self.impactFact),
g_times_l = False)
elif prec == numpy.float64:
self.compEnergyD.calc_potential_energy(self.queue,
(x.size, ), None,
xd.data, yd.data, zd.data,
qd.data, coulombEnergy.data, numpy.int32(len(x)) ,
numpy.float64(self.k),numpy.float64(self.impactFact),
g_times_l = False)
else:
print("Unknown float type.")
return numpy.sum(coulombEnergy.get(self.queue))
开发者ID:Tech-XCorp,项目名称:ultracold-ions,代码行数:26,代码来源:ComputePotentialEnergy.py
示例2: get_binned_data_angular
def get_binned_data_angular(self,limits=((-1,1),(-1,1)),points=500):
""" Azimuth/elevation map measured ray endpoints to a circle and bin them on the CL DEV. This linearly maps elevation to the circle's radius and azimuth to phi. nice for cross-section plots of directivity. Binning is done with points number of points within limits=((xmin,xmax),(ymin,ymax))."""
(pos0,pwr0) = self.get_measured_rays()
pos0_dev = cl_array.to_device(self.queue,pos0.astype(np.float32))
x_dev = cl_array.zeros(self.queue,pwr0.shape,dtype=np.float32)
y_dev = cl_array.zeros(self.queue,pwr0.shape,dtype=np.float32)
pwr0_dev = cl_array.to_device(self.queue,pwr0.astype(np.float32))
pwr_dev = cl_array.zeros(self.queue,pwr0.shape,dtype=np.float32)
pivot = cl_array.to_device(self.queue,np.array([0,0,0,0],dtype=np.float32))
time1 = time()
R_dev = cl_array.to_device(self.queue,np.array([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,0]]).astype(np.float32))
evt = self.prg.angular_project(self.queue, pwr0.shape, None, pos0_dev.data,pwr0_dev.data,R_dev.data,pivot.data,x_dev.data,y_dev.data,pwr_dev.data)
evt.wait()
x=x_dev.get()
y=y_dev.get()
pwr=np.float64(pwr_dev.get())
time2 = time()
dx = np.float64(limits[0][1]-limits[0][0])/np.float64(points)
dy = np.float64(limits[1][1]-limits[1][0])/np.float64(points)
pwr = pwr / (dx * dy)
(H,x_coord,y_coord)=np.histogram2d(x=x.flatten(),y=y.flatten(),bins=points,range=limits,weights=pwr.flatten())
self.hist_data = (H,x_coord,y_coord)
return self.hist_data
开发者ID:goulu,项目名称:LightPyCL,代码行数:29,代码来源:iterative_tracer.py
示例3: test_nan_arithmetic
def test_nan_arithmetic(ctx_getter):
context = ctx_getter()
queue = cl.CommandQueue(context)
def make_nan_contaminated_vector(size):
shape = (size,)
a = numpy.random.randn(*shape).astype(numpy.float32)
#for i in range(0, shape[0], 3):
#a[i] = float('nan')
from random import randrange
for i in range(size//10):
a[randrange(0, size)] = float('nan')
return a
size = 1 << 20
a = make_nan_contaminated_vector(size)
a_gpu = cl_array.to_device(context, queue, a)
b = make_nan_contaminated_vector(size)
b_gpu = cl_array.to_device(context, queue, b)
ab = a*b
ab_gpu = (a_gpu*b_gpu).get()
for i in range(size):
assert numpy.isnan(ab[i]) == numpy.isnan(ab_gpu[i])
开发者ID:initcrash,项目名称:pyopencl,代码行数:26,代码来源:test_array.py
示例4: test_fancy_indexing
def test_fancy_indexing(ctx_factory):
if _PYPY:
pytest.xfail("numpypy: multi value setting is not supported")
context = ctx_factory()
queue = cl.CommandQueue(context)
n = 2 ** 20 + 2**18 + 22
numpy_dest = np.zeros(n, dtype=np.int32)
numpy_idx = np.arange(n, dtype=np.int32)
np.random.shuffle(numpy_idx)
numpy_src = 20000+np.arange(n, dtype=np.int32)
cl_dest = cl_array.to_device(queue, numpy_dest)
cl_idx = cl_array.to_device(queue, numpy_idx)
cl_src = cl_array.to_device(queue, numpy_src)
numpy_dest[numpy_idx] = numpy_src
cl_dest[cl_idx] = cl_src
assert np.array_equal(numpy_dest, cl_dest.get())
numpy_dest = numpy_src[numpy_idx]
cl_dest = cl_src[cl_idx]
assert np.array_equal(numpy_dest, cl_dest.get())
开发者ID:inducer,项目名称:pyopencl,代码行数:25,代码来源:test_array.py
示例5: test_pthomas
def test_pthomas():
nz = 3
ny = 4
nx = 5
a = np.random.rand(nx)
b = np.random.rand(nx)
c = np.random.rand(nx)
d = np.random.rand(nz, ny, nx)
d_copy = d.copy()
solver = pthomas.PThomas(context, queue, (nz, ny, nx))
a_d = cl_array.to_device(queue, a)
b_d = cl_array.to_device(queue, b)
c_d = cl_array.to_device(queue, c)
c2_d = cl_array.to_device(queue, c)
d_d = cl_array.to_device(queue, d)
evt = solver.solve(a_d, b_d, c_d, c2_d, d_d)
d = d_d.get()
for i in range(nz):
for j in range(ny):
x_true = scipy_solve_banded(a, b, c, d_copy[i,j,:])
assert_allclose(x_true, d[i,j,:])
print 'pass'
开发者ID:shwina,项目名称:compact-finite-differences,代码行数:25,代码来源:test_kernels.py
示例6: allocate_arrays
def allocate_arrays(self):
"""
Allocate various types of arrays for the tests
"""
# numpy images
self.grad = np.zeros(self.image.shape, dtype=np.complex64)
self.grad2 = np.zeros((2,) + self.image.shape, dtype=np.float32)
self.grad_ref = gradient(self.image)
self.div_ref = divergence(self.grad_ref)
self.image2 = np.zeros_like(self.image)
# Device images
self.gradient_parray = parray.zeros(self.la.queue, self.image.shape, np.complex64)
# we should be using cl.Buffer(self.la.ctx, cl.mem_flags.READ_WRITE, size=self.image.nbytes*2),
# but platforms not suporting openCL 1.2 have a problem with enqueue_fill_buffer,
# so we use the parray "fill" utility
self.gradient_buffer = self.gradient_parray.data
# Do the same for image
self.image_parray = parray.to_device(self.la.queue, self.image)
self.image_buffer = self.image_parray.data
# Refs
tmp = np.zeros(self.image.shape, dtype=np.complex64)
tmp.real = np.copy(self.grad_ref[0])
tmp.imag = np.copy(self.grad_ref[1])
self.grad_ref_parray = parray.to_device(self.la.queue, tmp)
self.grad_ref_buffer = self.grad_ref_parray.data
开发者ID:dnaudet,项目名称:silx,代码行数:25,代码来源:test_linalg.py
示例7: _make_inputs
def _make_inputs(self, queue, pixel_size):
mf = cl.mem_flags
v_1 = cl_array.to_device(queue, self._make_vertices(0, pixel_size[1]))
v_2 = cl_array.to_device(queue, self._make_vertices(1, pixel_size[0]))
v_3 = cl_array.to_device(queue, self._make_vertices(2, pixel_size[1]))
return v_1, v_2, v_3
开发者ID:ufo-kit,项目名称:syris,代码行数:7,代码来源:mesh.py
示例8: computeEnergy
def computeEnergy(self, x, y, z, q):
coulombEnergy = cl_array.zero_like(q)
xd = cl_array.to_device(self.queue, x)
yd = cl_array.to_device(self.queue, y)
zd = cl_array.to_device(self.queue, z)
qd = cl_array.to_device(self.queue, q)
prec = x.dtype
if prec == numpy.float32:
self.compEnergyF.calc_potential_energy(
self.queue, (x.size, ),
None,
xd.data,
yd.data,
zd.data,
qd.data,
coulombEnergy.data,
g_time_l=False)
elif prec == numpy.float64:
self.compEnergyD.calc_potential_energy(
self.queue, (x.size, ),
None,
xd.data,
yd.data,
zd.data,
qd.data,
coulombEnergy.data,
g_time_l=False)
else:
print("Unknown float type.")
return np.sum(coulombEnergy.get(self.queue))
开发者ID:nistpenning,项目名称:ultracold-ions,代码行数:32,代码来源:ComputePotentialEnergy.py
示例9: compute_preconditioners
def compute_preconditioners(self):
"""
Create a diagonal preconditioner for the projection and backprojection
operator.
Each term of the diagonal is the sum of the projector/backprojector
along rows [1], i.e the projection/backprojection of an array of ones.
[1] Jens Gregor and Thomas Benson,
Computational Analysis and Improvement of SIRT,
IEEE transactions on medical imaging, vol. 27, no. 7, 2008
"""
# r_{i,i} = 1/(sum_j a_{i,j})
slice_ones = np.ones(self.backprojector.slice_shape, dtype=np.float32)
R = 1./self.projector.projection(slice_ones) # could be all done on GPU, but I want extra checks
R[np.logical_not(np.isfinite(R))] = 1. # In the case where the rotation axis is excentred
self.d_R = parray.to_device(self.queue, R)
# c_{j,j} = 1/(sum_i a_{i,j})
sino_ones = np.ones(self.sino_shape, dtype=np.float32)
C = 1./self.backprojector.backprojection(sino_ones)
C[np.logical_not(np.isfinite(C))] = 1. # In the case where the rotation axis is excentred
self.d_C = parray.to_device(self.queue, C)
self.add_to_cl_mem({
"d_R": self.d_R,
"d_C": self.d_C
})
开发者ID:dnaudet,项目名称:silx,代码行数:27,代码来源:reconstruction.py
示例10: get_array
def get_array(data, queue=None):
"""Get pyopencl.array.Array from *data* which can be a numpy array, a pyopencl.array.Array or a
pyopencl.Image. *queue* is an OpenCL command queue.
"""
if not queue:
queue = cfg.OPENCL.queue
if isinstance(data, cl_array.Array):
result = data
elif isinstance(data, np.ndarray):
if data.dtype.kind == 'c':
if data.dtype.itemsize != cfg.PRECISION.cl_cplx:
data = data.astype(cfg.PRECISION.np_cplx)
result = cl_array.to_device(queue, data.astype(cfg.PRECISION.np_cplx))
else:
if data.dtype.kind != 'f' or data.dtype.itemsize != cfg.PRECISION.cl_float:
data = data.astype(cfg.PRECISION.np_float)
result = cl_array.to_device(queue, data.astype(cfg.PRECISION.np_float))
elif isinstance(data, cl.Image):
result = cl_array.empty(queue, data.shape[::-1], np.float32)
cl.enqueue_copy(queue, result.data, data, offset=0, origin=(0, 0),
region=result.shape[::-1])
if result.dtype.itemsize != cfg.PRECISION.cl_float:
result = result.astype(cfg.PRECISION.np_float)
else:
raise TypeError('Unsupported data type {}'.format(type(data)))
return result
开发者ID:ufo-kit,项目名称:syris,代码行数:28,代码来源:util.py
示例11: __init__
def __init__(self, ctx, queue, dtype=np.float32):
self.ctx = ctx
self.queue = queue
sobel_c = np.array([1., 0., -1.]).astype(dtype)
sobel_r = np.array([1., 2., 1.]).astype(dtype)
self.sobel_c = cl_array.to_device(self.queue, sobel_c)
self.sobel_r = cl_array.to_device(self.queue, sobel_r)
self.scratch = None
self.sepconv_rc = LocalMemorySeparableCorrelation(self.ctx, self.queue, sobel_r, sobel_c)
self.sepconv_cr = LocalMemorySeparableCorrelation(self.ctx, self.queue, sobel_c, sobel_r)
TYPE = ""
if dtype == np.float32:
TYPE = "float"
elif dtype == np.uint8:
TYPE = "unsigned char"
elif dtype == np.uint16:
TYPE = "unsigned short"
self.mag = ElementwiseKernel(ctx,
"float *result, %s *imgx, %s *imgy" % (TYPE, TYPE),
"result[i] = sqrt((float)imgx[i]*imgx[i] + (float)imgy[i]*imgy[i])",
"mag")
开发者ID:coxlab,项目名称:camera-capture-thing,代码行数:25,代码来源:simple_cl_conv.py
示例12: get_binned_data_stereographic
def get_binned_data_stereographic(self,limits=((-1,1),(-1,1)),points=500): #project data stereographically onto xy plane and bin it
""" stereographically project measured ray endpoints and bin them on the CL DEV. This is a lot faster when you have loads of data. Binning is done with points number of points within limits=((xmin,xmax),(ymin,ymax))."""
(pos0,pwr0) = self.get_measured_rays()
pos0_dev = cl_array.to_device(self.queue,pos0.astype(np.float32))
x_dev = cl_array.zeros(self.queue,pwr0.shape,dtype=np.float32)
y_dev = cl_array.zeros(self.queue,pwr0.shape,dtype=np.float32)
pwr0_dev = cl_array.to_device(self.queue,pwr0.astype(np.float32))
pwr_dev = cl_array.zeros(self.queue,pwr0.shape,dtype=np.float32)
pivot = cl_array.to_device(self.queue,np.array([0,0,0,0],dtype=np.float32))
time1 = time()
R_dev = cl_array.to_device(self.queue,np.array([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,0]]).astype(np.float32))
evt = self.prg.stereograph_project(self.queue, pwr0.shape, None, pos0_dev.data,pwr0_dev.data,R_dev.data,pivot.data,x_dev.data,y_dev.data,pwr_dev.data)
evt.wait()
x=x_dev.get()
y=y_dev.get()
pwr=np.float64(pwr_dev.get())
time2 = time()
dx = np.float64(limits[0][1]-limits[0][0])/np.float64(points)
dy = np.float64(limits[1][1]-limits[1][0])/np.float64(points)
pwr = pwr / (dx * dy)
(H,x_coord,y_coord)=np.histogram2d(x=x.flatten(),y=y.flatten(),bins=points,range=limits,weights=pwr.flatten())
self.hist_data = (H,x_coord,y_coord)
return self.hist_data
开发者ID:goulu,项目名称:LightPyCL,代码行数:29,代码来源:iterative_tracer.py
示例13: test_count_1
def test_count_1(self):
nrepeats = 3
shape = [5, 5, 5]
np_interspace = randint(2, size=shape).astype(np.int32)
np_access_interspace = randint(nrepeats, size=shape).astype(np.int32)
np_count = np.ones([nrepeats] + shape, dtype=np.float32)
weight = 0.5
expected = np.ones_like(np_count)
tmp = expected[0]
tmp[np_interspace == 1] += weight
for i in range(1, nrepeats):
tmp = expected[i]
tmp[np_access_interspace == i] += weight
cl_interspace = cl_array.to_device(self.queue, np_interspace)
cl_access_interspace = cl_array.to_device(self.queue, np_access_interspace)
cl_count = cl_array.to_device(self.queue, np_count)
self.kernels.count(self.queue, cl_interspace, cl_access_interspace, weight, cl_count)
self.assertTrue(np.allclose(expected, cl_count.get()))
开发者ID:JoaoRodrigues,项目名称:disvis,代码行数:25,代码来源:test_kernels.py
示例14: CalcF
def CalcF(ctx, queue, m2, r2):
# Define dimensions
xdim = ydim = m2.shape[0]
# m2 = np.float32(m2)
# r2 = np.float32(r2)
# Get the compiled kernel
kernel = get_kernel(ctx, xdim)
# Move data to the GPU
gpu_m2 = cl_array.to_device(queue, m2)
gpu_r2 = cl_array.to_device(queue, r2)
gpu_result = cl_array.zeros(queue, (ydim, xdim), np.float32)
# Define grid shape (the same as the matrix dimensions)
grid_shape = (ydim, xdim)
# Get group shape based on the matrix dimensions and the actual hardware
group_shape = (16, 16)
event = kernel.CalcF(queue, grid_shape, group_shape, gpu_result.data, gpu_m2.data, gpu_r2.data)
event.wait()
result = gpu_result.get()
queue.finish()
return result
开发者ID:martinsparre,项目名称:ClusterArchitecturesAndComputations,代码行数:30,代码来源:GPU_functions.py
示例15: __init__
def __init__(self, target, queue, laplace=False):
super(GPUCorrelator, self).__init__(target, laplace=laplace)
self._queue = queue
self._ctx = self._queue.context
self._gpu = self._queue.device
self._allocate_arrays()
self._build_ffts()
self._generate_kernels()
target = self._target
if self._laplace:
target = self._laplace_filter(self._target)
# move some arrays to the GPU
self._gtarget = cl_array.to_device(self._queue, target.astype(np.float32))
self._lcc_mask = cl_array.to_device(self._queue, self._lcc_mask.astype(np.int32))
# Do some one-time precalculations
self._rfftn(self._gtarget, self._ft_target)
self._k.multiply(self._gtarget, self._gtarget, self._target2)
self._rfftn(self._target2, self._ft_target2)
self._gcenter = np.asarray(list(self._center) + [0], dtype=np.float32)
self._gshape = np.asarray(
list(self._target.shape) + [np.product(self._target.shape)],
dtype=np.int32)
开发者ID:latrocinia,项目名称:powerfit,代码行数:26,代码来源:powerfitter.py
示例16: build
def build(self, coords, values, base):
"""Use OpenCL to build the arrays."""
lenbase = base.shape[0]
lencoords = coords.shape[0]
coords_array = cla.to_device(self.queue, coords)
values_array = cla.to_device(self.queue, values)
base_array = cla.to_device(self.queue, base)
template_array = cla.zeros(self.queue, (lenbase), dtype=np.int32)
event = self.program.nearest(
self.queue,
base.shape,
None,
coords_array.data,
values_array.data,
base_array.data,
template_array.data,
np.int32(lencoords),
self.nnear,
self.usemajority,
)
try:
event.wait()
except cl.RuntimeError, inst:
errstr = inst.__str__()
if errstr == "clWaitForEvents failed: out of resources":
print "OpenCL timed out, probably due to the display manager."
print "Disable your display manager and try again!"
print "If that does not work, rerun with OpenCL disabled."
else:
raise cl.RuntimeError, inst
sys.exit(1)
开发者ID:KermMartian,项目名称:TopoMC,代码行数:31,代码来源:clidt.py
示例17: _gpu_init
def _gpu_init(self):
"""Method to initialize all the data for GPU-accelerate search"""
self.gpu_data = {}
g = self.gpu_data
d = self.data
q = self.queue
# move data to the GPU. All should be float32, as these is the native
# lenght for GPUs
g['rcore'] = cl_array.to_device(q, float32array(d['rcore'].array))
g['rsurf'] = cl_array.to_device(q, float32array(d['rsurf'].array))
# Make the scanning chain object an Image, as this is faster to rotate
g['im_lsurf'] = cl.image_from_array(q.context, float32array(d['lsurf'].array))
g['sampler'] = cl.Sampler(q.context, False, cl.addressing_mode.CLAMP,
cl.filter_mode.LINEAR)
if self.distance_restraints:
g['restraints'] = cl_array.to_device(q, float32array(d['restraints']))
# Allocate arrays on the GPU
g['lsurf'] = cl_array.zeros_like(g['rcore'])
g['clashvol'] = cl_array.zeros_like(g['rcore'])
g['intervol'] = cl_array.zeros_like(g['rcore'])
g['interspace'] = cl_array.zeros(q, d['shape'], dtype=np.int32)
g['restspace'] = cl_array.zeros_like(g['interspace'])
g['access_interspace'] = cl_array.zeros_like(g['interspace'])
g['best_access_interspace'] = cl_array.zeros_like(g['interspace'])
# arrays for counting
# Reductions are typically tedious on GPU, and we need to define the
# workgroupsize to allocate the correct amount of data
WORKGROUPSIZE = 32
nsubhists = int(np.ceil(g['rcore'].size/WORKGROUPSIZE))
g['subhists'] = cl_array.zeros(q, (nsubhists, d['nrestraints'] + 1), dtype=np.float32)
g['viol_counter'] = cl_array.zeros(q, (nsubhists, d['nrestraints'], d['nrestraints']), dtype=np.float32)
# complex arrays
g['ft_shape'] = list(d['shape'])
g['ft_shape'][0] = d['shape'][0]//2 + 1
g['ft_rcore'] = cl_array.zeros(q, g['ft_shape'], dtype=np.complex64)
g['ft_rsurf'] = cl_array.zeros_like(g['ft_rcore'])
g['ft_lsurf'] = cl_array.zeros_like(g['ft_rcore'])
g['ft_clashvol'] = cl_array.zeros_like(g['ft_rcore'])
g['ft_intervol'] = cl_array.zeros_like(g['ft_rcore'])
# other miscellanious data
g['nrot'] = d['nrot']
g['max_clash'] = d['max_clash']
g['min_interaction'] = d['min_interaction']
# kernels
g['k'] = Kernels(q.context)
g['k'].rfftn = pyclfft.RFFTn(q.context, d['shape'])
g['k'].irfftn = pyclfft.iRFFTn(q.context, d['shape'])
# initial calculations
g['k'].rfftn(q, g['rcore'], g['ft_rcore'])
g['k'].rfftn(q, g['rsurf'], g['ft_rsurf'])
开发者ID:JoaoRodrigues,项目名称:disvis,代码行数:59,代码来源:disvis.py
示例18: gs_mod_gpu
def gs_mod_gpu(idata,itera=10,osize=256):
cut=osize//2
pl=cl.get_platforms()[0]
devices=pl.get_devices(device_type=cl.device_type.GPU)
ctx = cl.Context(devices=[devices[0]])
queue = cl.CommandQueue(ctx)
plan = Plan(idata.shape, queue=queue,dtype=complex128) #no funciona con "complex128"
src = str(Template(KERNEL).render(
double_support=all(
has_double_support(dev) for dev in devices),
amd_double_support=all(
has_amd_double_support(dev) for dev in devices)
))
prg = cl.Program(ctx,src).build()
idata_gpu=cl_array.to_device(queue, ifftshift(idata).astype("complex128"))
fdata_gpu=cl_array.empty_like(idata_gpu)
rdata_gpu=cl_array.empty_like(idata_gpu)
plan.execute(idata_gpu.data,fdata_gpu.data)
mask=exp(2.j*pi*random(idata.shape))
mask[512-cut:512+cut,512-cut:512+cut]=0
idata_gpu=cl_array.to_device(queue, ifftshift(idata+mask).astype("complex128"))
fdata_gpu=cl_array.empty_like(idata_gpu)
rdata_gpu=cl_array.empty_like(idata_gpu)
error_gpu=cl_array.to_device(ctx, queue, zeros(idata_gpu.shape).astype("double"))
plan.execute(idata_gpu.data,fdata_gpu.data)
e=1000
ea=1000
for i in range (itera):
prg.norm(queue, fdata_gpu.shape, None,fdata_gpu.data)
plan.execute(fdata_gpu.data,rdata_gpu.data,inverse=True)
#~ prg.norm1(queue, rdata_gpu.shape,None,rdata_gpu.data,idata_gpu.data,error_gpu.data, int32(cut))
norm1=prg.norm1
norm1.set_scalar_arg_dtypes([None, None, None, int32])
norm1(queue, rdata_gpu.shape,None,rdata_gpu.data,idata_gpu.data,error_gpu.data, int32(cut))
e= sqrt(cl_array.sum(error_gpu).get())/(2*cut)
#~ if e>ea:
#~
#~ break
#~ ea=e
plan.execute(rdata_gpu.data,fdata_gpu.data)
fdata=fdata_gpu.get()
fdata=ifftshift(fdata)
fdata=exp(1.j*angle(fdata))
return fdata
开发者ID:ramezquitao,项目名称:pyoptools,代码行数:58,代码来源:gs.py
示例19: test_touch
def test_touch(self):
MAX_CLASH = 100 + 0.9
MIN_INTER = 300 + 0.9
NROT = np.random.randint(self.rotations.shape[0] + 1)
rotmat = self.rotations[0]
cpu_lsurf = np.zeros_like(self.im_lsurf.array)
disvis.libdisvis.rotate_image3d(self.im_lsurf.array, self.vlength, np.linalg.inv(rotmat), self.im_center, cpu_lsurf)
cpu_clashvol = numpy.fft.irfftn(numpy.fft.rfftn(cpu_lsurf).conj() * numpy.fft.rfftn(self.rcore.array))
gpu_rcore = cl_array.to_device(self.queue, np.asarray(self.rcore.array, dtype=np.float32))
gpu_im_lsurf = cl.image_from_array(self.queue.context, np.asarray(self.im_lsurf.array, dtype=np.float32))
gpu_lsurf = cl_array.zeros(self.queue, self.shape, dtype=np.float32)
self.kernels.rotate_image3d(self.queue, self.sampler, gpu_im_lsurf, rotmat, gpu_lsurf, self.im_center)
gpu_ft_lsurf = cl_array.zeros(self.queue, self.ft_shape, dtype=np.complex64)
gpu_ft_rcore = cl_array.zeros(self.queue, self.ft_shape, dtype=np.complex64)
gpu_ft_clashvol = cl_array.zeros(self.queue, self.ft_shape, dtype=np.complex64)
gpu_clashvol = cl_array.zeros(self.queue, self.shape, dtype=np.float32)
self.kernels.rfftn(self.queue, gpu_rcore, gpu_ft_rcore)
self.kernels.rfftn(self.queue, gpu_lsurf, gpu_ft_lsurf)
self.kernels.c_conj_multiply(self.queue, gpu_ft_lsurf, gpu_ft_rcore, gpu_ft_clashvol)
self.kernels.irfftn(self.queue, gpu_ft_clashvol, gpu_clashvol)
cpu_intervol = numpy.fft.irfftn(numpy.fft.rfftn(cpu_lsurf).conj() * numpy.fft.rfftn(self.rsurf.array))
gpu_rsurf = cl_array.to_device(self.queue, np.asarray(self.rsurf.array, dtype=np.float32))
gpu_ft_rsurf = cl_array.zeros(self.queue, self.ft_shape, dtype=np.complex64)
gpu_ft_intervol = cl_array.zeros(self.queue, self.ft_shape, dtype=np.complex64)
gpu_intervol = cl_array.zeros(self.queue, self.shape, dtype=np.float32)
cpu_interspace = np.zeros(self.shape, dtype=np.int32)
gpu_interspace = cl_array.zeros(self.queue, self.shape, dtype=np.int32)
self.kernels.rfftn(self.queue, gpu_rsurf, gpu_ft_rsurf)
self.kernels.rfftn(self.queue, gpu_lsurf, gpu_ft_lsurf)
self.kernels.c_conj_multiply(self.queue, gpu_ft_lsurf, gpu_ft_rsurf, gpu_ft_intervol)
self.kernels.irfftn(self.queue, gpu_ft_intervol, gpu_intervol)
self.kernels.touch(self.queue, gpu_clashvol, MAX_CLASH, gpu_intervol, MIN_INTER, gpu_interspace)
np.logical_and(cpu_clashvol < MAX_CLASH, cpu_intervol > MIN_INTER, cpu_interspace)
disvis.volume.Volume(cpu_interspace, self.im_lsurf.voxelspacing, self.im_lsurf.origin).tofile('cpu_interspace.mrc')
disvis.volume.Volume(gpu_interspace.get(), self.im_lsurf.voxelspacing, self.im_lsurf.origin).tofile('gpu_interspace.mrc')
disvis.volume.Volume(cpu_interspace - gpu_interspace.get(), self.im_lsurf.voxelspacing, self.im_lsurf.origin).tofile('diff.mrc')
print()
print(cpu_interspace.sum(), gpu_interspace.get().sum())
print(np.abs(cpu_interspace - gpu_interspace.get()).sum())
self.assertTrue(np.allclose(gpu_interspace.get(), cpu_interspace))
开发者ID:JoaoRodrigues,项目名称:disvis,代码行数:57,代码来源:test_cpu_vs_gpu.py
示例20: main
def main():
# Allocate the first GPU
ctx = cl.create_some_context(0)#use device 0, the GPU
queue = cl.CommandQueue(ctx)
# Define dimensions
ydim = 1024
xdim = 1024
# Create random matrix
matrix = np.random.random((ydim, xdim))
matrix = np.float32(matrix)
# Create random matrix2
matrix2 = np.random.random((ydim, xdim))
matrix2 = np.float32(matrix2)
# Get the compiled kernel
kernel = get_kernel(ctx, xdim)
# Start timing
t1 = time.time()
# Move data to the GPU
gpu_matrix = cl_array.to_device(queue, matrix)
gpu_matrix2 = cl_array.to_device(queue, matrix2)
gpu_result = cl_array.zeros(queue, (ydim, xdim), np.float32)
# Define grid shape (the same as the matrix dimensions)
grid_shape = (ydim, xdim)
# Get group shape based on the matrix dimensions and the actual hardware
group_shape = (16,16)#(32,16)
# Execute the kernel
event = kernel.add(queue,
grid_shape, group_shape,
gpu_result.data,
gpu_matrix.data,
gpu_matrix2.data)
# Wait for the kernel to finish
event.wait()
# Move the result from GPU to CPU
result = gpu_result.get()
# Measure end time
t2 = time.time()
# Print result and execution time
print result
print "Elapsed: %f seconds " % (t2-t1)
# Free the GPU resource
queue.finish()
开发者ID:martinsparre,项目名称:ClusterArchitecturesAndComputations,代码行数:56,代码来源:MatrixMultiplication.py
注:本文中的pyopencl.array.to_device函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论