• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python array.to_device函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中pyopencl.array.to_device函数的典型用法代码示例。如果您正苦于以下问题:Python to_device函数的具体用法?Python to_device怎么用?Python to_device使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了to_device函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: computeEnergy

    def computeEnergy(self, x, y, z, q):

        xd = cl_array.to_device(self.queue, x)
        yd = cl_array.to_device(self.queue, y)
        zd = cl_array.to_device(self.queue, z)
        qd = cl_array.to_device(self.queue, q)
        coulombEnergy = cl_array.zeros_like(xd)
        prec = x.dtype
        if prec == numpy.float32:
            self.compEnergyF.calc_potential_energy(self.queue,
                    (x.size, ), None,
                    xd.data, yd.data, zd.data,
                    qd.data, coulombEnergy.data, numpy.int32(len(x)),
                    numpy.float32(self.k),numpy.float32(self.impactFact),
                    g_times_l = False)
        elif prec == numpy.float64:
            self.compEnergyD.calc_potential_energy(self.queue,
                    (x.size, ), None,
                    xd.data, yd.data, zd.data,
                    qd.data, coulombEnergy.data, numpy.int32(len(x)) ,
                    numpy.float64(self.k),numpy.float64(self.impactFact),
                    g_times_l = False)
        else:
            print("Unknown float type.")

        return numpy.sum(coulombEnergy.get(self.queue))
开发者ID:Tech-XCorp,项目名称:ultracold-ions,代码行数:26,代码来源:ComputePotentialEnergy.py


示例2: get_binned_data_angular

	def get_binned_data_angular(self,limits=((-1,1),(-1,1)),points=500):
		""" Azimuth/elevation map measured ray endpoints to a circle and bin them on the CL DEV. This linearly maps elevation to the circle's radius and azimuth to phi. nice for cross-section plots of directivity. Binning is done with points number of points within limits=((xmin,xmax),(ymin,ymax))."""
		(pos0,pwr0) = self.get_measured_rays()
		pos0_dev = cl_array.to_device(self.queue,pos0.astype(np.float32))
		x_dev	 = cl_array.zeros(self.queue,pwr0.shape,dtype=np.float32)
		y_dev	 = cl_array.zeros(self.queue,pwr0.shape,dtype=np.float32)
		pwr0_dev = cl_array.to_device(self.queue,pwr0.astype(np.float32))
		pwr_dev  = cl_array.zeros(self.queue,pwr0.shape,dtype=np.float32)
		pivot    = cl_array.to_device(self.queue,np.array([0,0,0,0],dtype=np.float32))
			
		time1 = time()
		R_dev = cl_array.to_device(self.queue,np.array([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,0]]).astype(np.float32))
		evt = self.prg.angular_project(self.queue, pwr0.shape, None, pos0_dev.data,pwr0_dev.data,R_dev.data,pivot.data,x_dev.data,y_dev.data,pwr_dev.data)
			
			
		evt.wait()
			
		x=x_dev.get()
		y=y_dev.get()
		pwr=np.float64(pwr_dev.get())
	
		time2 = time()
		dx = np.float64(limits[0][1]-limits[0][0])/np.float64(points)
		dy = np.float64(limits[1][1]-limits[1][0])/np.float64(points)
		pwr = pwr / (dx * dy)
		
		(H,x_coord,y_coord)=np.histogram2d(x=x.flatten(),y=y.flatten(),bins=points,range=limits,weights=pwr.flatten())
		self.hist_data = (H,x_coord,y_coord)
		return self.hist_data
开发者ID:goulu,项目名称:LightPyCL,代码行数:29,代码来源:iterative_tracer.py


示例3: test_nan_arithmetic

def test_nan_arithmetic(ctx_getter):
    context = ctx_getter()
    queue = cl.CommandQueue(context)

    def make_nan_contaminated_vector(size):
        shape = (size,)
        a = numpy.random.randn(*shape).astype(numpy.float32)
        #for i in range(0, shape[0], 3):
            #a[i] = float('nan')
        from random import randrange
        for i in range(size//10):
            a[randrange(0, size)] = float('nan')
        return a

    size = 1 << 20

    a = make_nan_contaminated_vector(size)
    a_gpu = cl_array.to_device(context, queue, a)
    b = make_nan_contaminated_vector(size)
    b_gpu = cl_array.to_device(context, queue, b)

    ab = a*b
    ab_gpu = (a_gpu*b_gpu).get()

    for i in range(size):
        assert numpy.isnan(ab[i]) == numpy.isnan(ab_gpu[i])
开发者ID:initcrash,项目名称:pyopencl,代码行数:26,代码来源:test_array.py


示例4: test_fancy_indexing

def test_fancy_indexing(ctx_factory):
    if _PYPY:
        pytest.xfail("numpypy: multi value setting is not supported")
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    n = 2 ** 20 + 2**18 + 22
    numpy_dest = np.zeros(n, dtype=np.int32)
    numpy_idx = np.arange(n, dtype=np.int32)
    np.random.shuffle(numpy_idx)
    numpy_src = 20000+np.arange(n, dtype=np.int32)

    cl_dest = cl_array.to_device(queue, numpy_dest)
    cl_idx = cl_array.to_device(queue, numpy_idx)
    cl_src = cl_array.to_device(queue, numpy_src)

    numpy_dest[numpy_idx] = numpy_src
    cl_dest[cl_idx] = cl_src

    assert np.array_equal(numpy_dest, cl_dest.get())

    numpy_dest = numpy_src[numpy_idx]
    cl_dest = cl_src[cl_idx]

    assert np.array_equal(numpy_dest, cl_dest.get())
开发者ID:inducer,项目名称:pyopencl,代码行数:25,代码来源:test_array.py


示例5: test_pthomas

def test_pthomas():
    nz = 3
    ny = 4
    nx = 5

    a = np.random.rand(nx)
    b = np.random.rand(nx)
    c = np.random.rand(nx)
    d = np.random.rand(nz, ny, nx)
    d_copy = d.copy()

    solver = pthomas.PThomas(context, queue, (nz, ny, nx))
    a_d = cl_array.to_device(queue, a)
    b_d = cl_array.to_device(queue, b)
    c_d = cl_array.to_device(queue, c)
    c2_d = cl_array.to_device(queue, c)
    d_d = cl_array.to_device(queue, d)
    evt = solver.solve(a_d, b_d, c_d, c2_d, d_d)
    d = d_d.get()

    for i in range(nz):
        for j in range(ny):
            x_true = scipy_solve_banded(a, b, c, d_copy[i,j,:])
            assert_allclose(x_true, d[i,j,:])
    print 'pass'
开发者ID:shwina,项目名称:compact-finite-differences,代码行数:25,代码来源:test_kernels.py


示例6: allocate_arrays

 def allocate_arrays(self):
     """
     Allocate various types of arrays for the tests
     """
     # numpy images
     self.grad = np.zeros(self.image.shape, dtype=np.complex64)
     self.grad2 = np.zeros((2,) + self.image.shape, dtype=np.float32)
     self.grad_ref = gradient(self.image)
     self.div_ref = divergence(self.grad_ref)
     self.image2 = np.zeros_like(self.image)
     # Device images
     self.gradient_parray = parray.zeros(self.la.queue, self.image.shape, np.complex64)
     # we should be using cl.Buffer(self.la.ctx, cl.mem_flags.READ_WRITE, size=self.image.nbytes*2),
     # but platforms not suporting openCL 1.2 have a problem with enqueue_fill_buffer,
     # so we use the parray "fill" utility
     self.gradient_buffer = self.gradient_parray.data
     # Do the same for image
     self.image_parray = parray.to_device(self.la.queue, self.image)
     self.image_buffer = self.image_parray.data
     # Refs
     tmp = np.zeros(self.image.shape, dtype=np.complex64)
     tmp.real = np.copy(self.grad_ref[0])
     tmp.imag = np.copy(self.grad_ref[1])
     self.grad_ref_parray = parray.to_device(self.la.queue, tmp)
     self.grad_ref_buffer = self.grad_ref_parray.data
开发者ID:dnaudet,项目名称:silx,代码行数:25,代码来源:test_linalg.py


示例7: _make_inputs

    def _make_inputs(self, queue, pixel_size):
        mf = cl.mem_flags
        v_1 = cl_array.to_device(queue, self._make_vertices(0, pixel_size[1]))
        v_2 = cl_array.to_device(queue, self._make_vertices(1, pixel_size[0]))
        v_3 = cl_array.to_device(queue, self._make_vertices(2, pixel_size[1]))

        return v_1, v_2, v_3
开发者ID:ufo-kit,项目名称:syris,代码行数:7,代码来源:mesh.py


示例8: computeEnergy

    def computeEnergy(self, x, y, z, q):

        coulombEnergy = cl_array.zero_like(q)
        xd = cl_array.to_device(self.queue, x)
        yd = cl_array.to_device(self.queue, y)
        zd = cl_array.to_device(self.queue, z)
        qd = cl_array.to_device(self.queue, q)
        prec = x.dtype
        if prec == numpy.float32:
            self.compEnergyF.calc_potential_energy(
                self.queue, (x.size, ),
                None,
                xd.data,
                yd.data,
                zd.data,
                qd.data,
                coulombEnergy.data,
                g_time_l=False)
        elif prec == numpy.float64:
            self.compEnergyD.calc_potential_energy(
                self.queue, (x.size, ),
                None,
                xd.data,
                yd.data,
                zd.data,
                qd.data,
                coulombEnergy.data,
                g_time_l=False)
        else:
            print("Unknown float type.")

        return np.sum(coulombEnergy.get(self.queue))
开发者ID:nistpenning,项目名称:ultracold-ions,代码行数:32,代码来源:ComputePotentialEnergy.py


示例9: compute_preconditioners

    def compute_preconditioners(self):
        """
        Create a diagonal preconditioner for the projection and backprojection
        operator.
        Each term of the diagonal is the sum of the projector/backprojector
        along rows [1], i.e the projection/backprojection of an array of ones.

        [1] Jens Gregor and Thomas Benson,
            Computational Analysis and Improvement of SIRT,
            IEEE transactions on medical imaging, vol. 27, no. 7,  2008
        """

        # r_{i,i} = 1/(sum_j a_{i,j})
        slice_ones = np.ones(self.backprojector.slice_shape, dtype=np.float32)
        R = 1./self.projector.projection(slice_ones)  # could be all done on GPU, but I want extra checks
        R[np.logical_not(np.isfinite(R))] = 1.  # In the case where the rotation axis is excentred
        self.d_R = parray.to_device(self.queue, R)
        # c_{j,j} = 1/(sum_i a_{i,j})
        sino_ones = np.ones(self.sino_shape, dtype=np.float32)
        C = 1./self.backprojector.backprojection(sino_ones)
        C[np.logical_not(np.isfinite(C))] = 1.  # In the case where the rotation axis is excentred
        self.d_C = parray.to_device(self.queue, C)

        self.add_to_cl_mem({
            "d_R": self.d_R,
            "d_C": self.d_C
        })
开发者ID:dnaudet,项目名称:silx,代码行数:27,代码来源:reconstruction.py


示例10: get_array

def get_array(data, queue=None):
    """Get pyopencl.array.Array from *data* which can be a numpy array, a pyopencl.array.Array or a
    pyopencl.Image. *queue* is an OpenCL command queue.
    """
    if not queue:
        queue = cfg.OPENCL.queue

    if isinstance(data, cl_array.Array):
        result = data
    elif isinstance(data, np.ndarray):
        if data.dtype.kind == 'c':
            if data.dtype.itemsize != cfg.PRECISION.cl_cplx:
                data = data.astype(cfg.PRECISION.np_cplx)
            result = cl_array.to_device(queue, data.astype(cfg.PRECISION.np_cplx))
        else:
            if data.dtype.kind != 'f' or data.dtype.itemsize != cfg.PRECISION.cl_float:
                data = data.astype(cfg.PRECISION.np_float)
            result = cl_array.to_device(queue, data.astype(cfg.PRECISION.np_float))
    elif isinstance(data, cl.Image):
        result = cl_array.empty(queue, data.shape[::-1], np.float32)
        cl.enqueue_copy(queue, result.data, data, offset=0, origin=(0, 0),
                        region=result.shape[::-1])
        if result.dtype.itemsize != cfg.PRECISION.cl_float:
            result = result.astype(cfg.PRECISION.np_float)
    else:
        raise TypeError('Unsupported data type {}'.format(type(data)))

    return result
开发者ID:ufo-kit,项目名称:syris,代码行数:28,代码来源:util.py


示例11: __init__

    def __init__(self, ctx, queue, dtype=np.float32):
        self.ctx = ctx
        self.queue = queue
        sobel_c = np.array([1., 0., -1.]).astype(dtype)
        sobel_r = np.array([1., 2., 1.]).astype(dtype)
        self.sobel_c = cl_array.to_device(self.queue, sobel_c)
        self.sobel_r = cl_array.to_device(self.queue, sobel_r)

        self.scratch = None

        self.sepconv_rc = LocalMemorySeparableCorrelation(self.ctx, self.queue, sobel_r, sobel_c)
        self.sepconv_cr = LocalMemorySeparableCorrelation(self.ctx, self.queue, sobel_c, sobel_r)

        TYPE = ""
        if dtype == np.float32:
            TYPE = "float"
        elif dtype == np.uint8:
            TYPE = "unsigned char"
        elif dtype == np.uint16:
            TYPE = "unsigned short"

        self.mag = ElementwiseKernel(ctx,
                                    "float *result, %s *imgx, %s *imgy" % (TYPE, TYPE),
                                    "result[i] = sqrt((float)imgx[i]*imgx[i] + (float)imgy[i]*imgy[i])",
                                    "mag")
开发者ID:coxlab,项目名称:camera-capture-thing,代码行数:25,代码来源:simple_cl_conv.py


示例12: get_binned_data_stereographic

	def get_binned_data_stereographic(self,limits=((-1,1),(-1,1)),points=500): #project data stereographically onto xy plane and bin it
		""" stereographically project measured ray endpoints and bin them on the CL DEV. This is a lot faster when you have loads of data. Binning is done with points number of points within limits=((xmin,xmax),(ymin,ymax))."""
		(pos0,pwr0) = self.get_measured_rays()
		pos0_dev = cl_array.to_device(self.queue,pos0.astype(np.float32))
		x_dev	 = cl_array.zeros(self.queue,pwr0.shape,dtype=np.float32)
		y_dev	 = cl_array.zeros(self.queue,pwr0.shape,dtype=np.float32)
		pwr0_dev = cl_array.to_device(self.queue,pwr0.astype(np.float32))
		pwr_dev  = cl_array.zeros(self.queue,pwr0.shape,dtype=np.float32)
		pivot    = cl_array.to_device(self.queue,np.array([0,0,0,0],dtype=np.float32))
			
		time1 = time()
		R_dev = cl_array.to_device(self.queue,np.array([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,0]]).astype(np.float32))
		evt = self.prg.stereograph_project(self.queue, pwr0.shape, None, pos0_dev.data,pwr0_dev.data,R_dev.data,pivot.data,x_dev.data,y_dev.data,pwr_dev.data)
			
			
		evt.wait()
			
		x=x_dev.get()
		y=y_dev.get()
		pwr=np.float64(pwr_dev.get())
	
		time2 = time()
		dx = np.float64(limits[0][1]-limits[0][0])/np.float64(points)
		dy = np.float64(limits[1][1]-limits[1][0])/np.float64(points)
		pwr = pwr / (dx * dy)
		
		(H,x_coord,y_coord)=np.histogram2d(x=x.flatten(),y=y.flatten(),bins=points,range=limits,weights=pwr.flatten())
		self.hist_data = (H,x_coord,y_coord)
		return self.hist_data
开发者ID:goulu,项目名称:LightPyCL,代码行数:29,代码来源:iterative_tracer.py


示例13: test_count_1

    def test_count_1(self):
        
        nrepeats = 3
        shape = [5, 5, 5]

        np_interspace = randint(2, size=shape).astype(np.int32)
        np_access_interspace = randint(nrepeats, size=shape).astype(np.int32)
        np_count = np.ones([nrepeats] + shape, dtype=np.float32)
        weight = 0.5

        expected = np.ones_like(np_count)
        tmp = expected[0]
        tmp[np_interspace == 1] += weight
        for i in range(1, nrepeats):
            tmp = expected[i]
            tmp[np_access_interspace == i] += weight


        cl_interspace = cl_array.to_device(self.queue, np_interspace)
        cl_access_interspace = cl_array.to_device(self.queue, np_access_interspace)
        cl_count = cl_array.to_device(self.queue, np_count)

        self.kernels.count(self.queue, cl_interspace, cl_access_interspace, weight, cl_count)

        self.assertTrue(np.allclose(expected, cl_count.get()))
开发者ID:JoaoRodrigues,项目名称:disvis,代码行数:25,代码来源:test_kernels.py


示例14: CalcF

def CalcF(ctx, queue, m2, r2):

    # Define dimensions
    xdim = ydim = m2.shape[0]

    #    m2 = np.float32(m2)
    #    r2 = np.float32(r2)

    # Get the compiled kernel
    kernel = get_kernel(ctx, xdim)

    # Move data to the GPU

    gpu_m2 = cl_array.to_device(queue, m2)
    gpu_r2 = cl_array.to_device(queue, r2)
    gpu_result = cl_array.zeros(queue, (ydim, xdim), np.float32)

    # Define grid shape (the same as the matrix dimensions)
    grid_shape = (ydim, xdim)

    # Get group shape based on the matrix dimensions and the actual hardware
    group_shape = (16, 16)

    event = kernel.CalcF(queue, grid_shape, group_shape, gpu_result.data, gpu_m2.data, gpu_r2.data)

    event.wait()
    result = gpu_result.get()
    queue.finish()

    return result
开发者ID:martinsparre,项目名称:ClusterArchitecturesAndComputations,代码行数:30,代码来源:GPU_functions.py


示例15: __init__

        def __init__(self, target, queue, laplace=False):
            super(GPUCorrelator, self).__init__(target, laplace=laplace)
            self._queue = queue
            self._ctx = self._queue.context
            self._gpu = self._queue.device


            self._allocate_arrays()
            self._build_ffts()
            self._generate_kernels()

            target = self._target
            if self._laplace:
                target = self._laplace_filter(self._target)
            # move some arrays to the GPU
            self._gtarget = cl_array.to_device(self._queue, target.astype(np.float32))
            self._lcc_mask = cl_array.to_device(self._queue, self._lcc_mask.astype(np.int32))
            # Do some one-time precalculations
            self._rfftn(self._gtarget, self._ft_target)
            self._k.multiply(self._gtarget, self._gtarget, self._target2)
            self._rfftn(self._target2, self._ft_target2)

            self._gcenter = np.asarray(list(self._center) + [0], dtype=np.float32)
            self._gshape = np.asarray(
                    list(self._target.shape) + [np.product(self._target.shape)],
                    dtype=np.int32)
开发者ID:latrocinia,项目名称:powerfit,代码行数:26,代码来源:powerfitter.py


示例16: build

 def build(self, coords, values, base):
     """Use OpenCL to build the arrays."""
     lenbase = base.shape[0]
     lencoords = coords.shape[0]
     coords_array = cla.to_device(self.queue, coords)
     values_array = cla.to_device(self.queue, values)
     base_array = cla.to_device(self.queue, base)
     template_array = cla.zeros(self.queue, (lenbase), dtype=np.int32)
     event = self.program.nearest(
         self.queue,
         base.shape,
         None,
         coords_array.data,
         values_array.data,
         base_array.data,
         template_array.data,
         np.int32(lencoords),
         self.nnear,
         self.usemajority,
     )
     try:
         event.wait()
     except cl.RuntimeError, inst:
         errstr = inst.__str__()
         if errstr == "clWaitForEvents failed: out of resources":
             print "OpenCL timed out, probably due to the display manager."
             print "Disable your display manager and try again!"
             print "If that does not work, rerun with OpenCL disabled."
         else:
             raise cl.RuntimeError, inst
         sys.exit(1)
开发者ID:KermMartian,项目名称:TopoMC,代码行数:31,代码来源:clidt.py


示例17: _gpu_init

    def _gpu_init(self):
        """Method to initialize all the data for GPU-accelerate search"""

        self.gpu_data = {}
        g = self.gpu_data
        d = self.data
        q = self.queue

        # move data to the GPU. All should be float32, as these is the native
        # lenght for GPUs
        g['rcore'] = cl_array.to_device(q, float32array(d['rcore'].array))
        g['rsurf'] = cl_array.to_device(q, float32array(d['rsurf'].array))
        # Make the scanning chain object an Image, as this is faster to rotate
        g['im_lsurf'] = cl.image_from_array(q.context, float32array(d['lsurf'].array))
        g['sampler'] = cl.Sampler(q.context, False, cl.addressing_mode.CLAMP,
                                  cl.filter_mode.LINEAR)

        if self.distance_restraints:
            g['restraints'] = cl_array.to_device(q, float32array(d['restraints']))

        # Allocate arrays on the GPU
        g['lsurf'] = cl_array.zeros_like(g['rcore'])
        g['clashvol'] = cl_array.zeros_like(g['rcore'])
        g['intervol'] = cl_array.zeros_like(g['rcore'])
        g['interspace'] = cl_array.zeros(q, d['shape'], dtype=np.int32)
        g['restspace'] = cl_array.zeros_like(g['interspace'])
        g['access_interspace'] = cl_array.zeros_like(g['interspace'])
        g['best_access_interspace'] = cl_array.zeros_like(g['interspace'])

        # arrays for counting
        # Reductions are typically tedious on GPU, and we need to define the
        # workgroupsize to allocate the correct amount of data
        WORKGROUPSIZE = 32
        nsubhists = int(np.ceil(g['rcore'].size/WORKGROUPSIZE))
        g['subhists'] = cl_array.zeros(q, (nsubhists, d['nrestraints'] + 1), dtype=np.float32)
        g['viol_counter'] = cl_array.zeros(q, (nsubhists, d['nrestraints'], d['nrestraints']), dtype=np.float32)

        # complex arrays
        g['ft_shape'] = list(d['shape'])
        g['ft_shape'][0] = d['shape'][0]//2 + 1
        g['ft_rcore'] = cl_array.zeros(q, g['ft_shape'], dtype=np.complex64)
        g['ft_rsurf'] = cl_array.zeros_like(g['ft_rcore'])
        g['ft_lsurf'] = cl_array.zeros_like(g['ft_rcore'])
        g['ft_clashvol'] = cl_array.zeros_like(g['ft_rcore'])
        g['ft_intervol'] = cl_array.zeros_like(g['ft_rcore'])

        # other miscellanious data
        g['nrot'] = d['nrot']
        g['max_clash'] = d['max_clash']
        g['min_interaction'] = d['min_interaction']

        # kernels
        g['k'] = Kernels(q.context)
        g['k'].rfftn = pyclfft.RFFTn(q.context, d['shape'])
        g['k'].irfftn = pyclfft.iRFFTn(q.context, d['shape'])

        # initial calculations
        g['k'].rfftn(q, g['rcore'], g['ft_rcore'])
        g['k'].rfftn(q, g['rsurf'], g['ft_rsurf'])
开发者ID:JoaoRodrigues,项目名称:disvis,代码行数:59,代码来源:disvis.py


示例18: gs_mod_gpu

def gs_mod_gpu(idata,itera=10,osize=256):
    
    
    cut=osize//2
    
    pl=cl.get_platforms()[0]
    devices=pl.get_devices(device_type=cl.device_type.GPU)
    ctx = cl.Context(devices=[devices[0]])
    queue = cl.CommandQueue(ctx)

    plan = Plan(idata.shape, queue=queue,dtype=complex128) #no funciona con "complex128"
    
    src = str(Template(KERNEL).render(
        double_support=all(
            has_double_support(dev) for dev in devices),
        amd_double_support=all(
            has_amd_double_support(dev) for dev in devices)
        ))
    prg = cl.Program(ctx,src).build() 
    

    idata_gpu=cl_array.to_device(queue, ifftshift(idata).astype("complex128"))
    fdata_gpu=cl_array.empty_like(idata_gpu)
    rdata_gpu=cl_array.empty_like(idata_gpu)
    plan.execute(idata_gpu.data,fdata_gpu.data)
    
    mask=exp(2.j*pi*random(idata.shape))
    mask[512-cut:512+cut,512-cut:512+cut]=0
    
    
    idata_gpu=cl_array.to_device(queue, ifftshift(idata+mask).astype("complex128"))
    fdata_gpu=cl_array.empty_like(idata_gpu)
    rdata_gpu=cl_array.empty_like(idata_gpu)
    error_gpu=cl_array.to_device(ctx, queue, zeros(idata_gpu.shape).astype("double"))
    plan.execute(idata_gpu.data,fdata_gpu.data)
    
    e=1000
    ea=1000
    for i in range (itera):
        prg.norm(queue, fdata_gpu.shape, None,fdata_gpu.data)
        plan.execute(fdata_gpu.data,rdata_gpu.data,inverse=True)
        #~ prg.norm1(queue, rdata_gpu.shape,None,rdata_gpu.data,idata_gpu.data,error_gpu.data, int32(cut))
        norm1=prg.norm1
        norm1.set_scalar_arg_dtypes([None, None, None, int32])
        norm1(queue, rdata_gpu.shape,None,rdata_gpu.data,idata_gpu.data,error_gpu.data, int32(cut))
        
        e= sqrt(cl_array.sum(error_gpu).get())/(2*cut)

        #~ if e>ea: 
           #~ 
            #~ break
        #~ ea=e
        plan.execute(rdata_gpu.data,fdata_gpu.data)
    
    fdata=fdata_gpu.get()
    fdata=ifftshift(fdata)
    fdata=exp(1.j*angle(fdata))
    return fdata
开发者ID:ramezquitao,项目名称:pyoptools,代码行数:58,代码来源:gs.py


示例19: test_touch

    def test_touch(self):

        MAX_CLASH = 100 + 0.9
        MIN_INTER = 300 + 0.9

        NROT = np.random.randint(self.rotations.shape[0] + 1)
        rotmat = self.rotations[0]
        cpu_lsurf = np.zeros_like(self.im_lsurf.array)
        disvis.libdisvis.rotate_image3d(self.im_lsurf.array, self.vlength, np.linalg.inv(rotmat), self.im_center, cpu_lsurf)

        cpu_clashvol = numpy.fft.irfftn(numpy.fft.rfftn(cpu_lsurf).conj() * numpy.fft.rfftn(self.rcore.array))

        gpu_rcore = cl_array.to_device(self.queue, np.asarray(self.rcore.array, dtype=np.float32))
        gpu_im_lsurf = cl.image_from_array(self.queue.context, np.asarray(self.im_lsurf.array, dtype=np.float32))
        gpu_lsurf = cl_array.zeros(self.queue, self.shape, dtype=np.float32)

        self.kernels.rotate_image3d(self.queue, self.sampler, gpu_im_lsurf, rotmat, gpu_lsurf, self.im_center)

        gpu_ft_lsurf = cl_array.zeros(self.queue, self.ft_shape, dtype=np.complex64)
        gpu_ft_rcore = cl_array.zeros(self.queue, self.ft_shape, dtype=np.complex64)
        gpu_ft_clashvol = cl_array.zeros(self.queue, self.ft_shape, dtype=np.complex64)
        gpu_clashvol = cl_array.zeros(self.queue, self.shape, dtype=np.float32)

        self.kernels.rfftn(self.queue, gpu_rcore, gpu_ft_rcore)
        self.kernels.rfftn(self.queue, gpu_lsurf, gpu_ft_lsurf)
        self.kernels.c_conj_multiply(self.queue, gpu_ft_lsurf, gpu_ft_rcore, gpu_ft_clashvol)
        self.kernels.irfftn(self.queue, gpu_ft_clashvol, gpu_clashvol)
        
        cpu_intervol = numpy.fft.irfftn(numpy.fft.rfftn(cpu_lsurf).conj() * numpy.fft.rfftn(self.rsurf.array))

        gpu_rsurf = cl_array.to_device(self.queue, np.asarray(self.rsurf.array, dtype=np.float32))

        gpu_ft_rsurf = cl_array.zeros(self.queue, self.ft_shape, dtype=np.complex64)
        gpu_ft_intervol = cl_array.zeros(self.queue, self.ft_shape, dtype=np.complex64)
        gpu_intervol = cl_array.zeros(self.queue, self.shape, dtype=np.float32)

        cpu_interspace = np.zeros(self.shape, dtype=np.int32)
        gpu_interspace = cl_array.zeros(self.queue, self.shape, dtype=np.int32)

        self.kernels.rfftn(self.queue, gpu_rsurf, gpu_ft_rsurf)
        self.kernels.rfftn(self.queue, gpu_lsurf, gpu_ft_lsurf)
        self.kernels.c_conj_multiply(self.queue, gpu_ft_lsurf, gpu_ft_rsurf, gpu_ft_intervol)
        self.kernels.irfftn(self.queue, gpu_ft_intervol, gpu_intervol)

        self.kernels.touch(self.queue, gpu_clashvol, MAX_CLASH, gpu_intervol, MIN_INTER, gpu_interspace)

        np.logical_and(cpu_clashvol < MAX_CLASH, cpu_intervol > MIN_INTER, cpu_interspace)

        disvis.volume.Volume(cpu_interspace, self.im_lsurf.voxelspacing, self.im_lsurf.origin).tofile('cpu_interspace.mrc')
        disvis.volume.Volume(gpu_interspace.get(), self.im_lsurf.voxelspacing, self.im_lsurf.origin).tofile('gpu_interspace.mrc')
        disvis.volume.Volume(cpu_interspace - gpu_interspace.get(), self.im_lsurf.voxelspacing, self.im_lsurf.origin).tofile('diff.mrc')
        print()
        print(cpu_interspace.sum(), gpu_interspace.get().sum())
        print(np.abs(cpu_interspace - gpu_interspace.get()).sum())
                           

        self.assertTrue(np.allclose(gpu_interspace.get(), cpu_interspace))
开发者ID:JoaoRodrigues,项目名称:disvis,代码行数:57,代码来源:test_cpu_vs_gpu.py


示例20: main

def main():
    # Allocate the first GPU
    ctx = cl.create_some_context(0)#use device 0, the GPU
    queue = cl.CommandQueue(ctx)
    
    # Define dimensions
    ydim = 1024
    xdim = 1024

    # Create random matrix
    matrix = np.random.random((ydim, xdim))
    matrix = np.float32(matrix)

    # Create random matrix2
    matrix2 = np.random.random((ydim, xdim))
    matrix2 = np.float32(matrix2)

    # Get the compiled kernel
    kernel = get_kernel(ctx, xdim)

    # Start timing
    t1 = time.time()
    
    # Move data to the GPU
    gpu_matrix = cl_array.to_device(queue, matrix)
    gpu_matrix2 = cl_array.to_device(queue, matrix2)
    gpu_result = cl_array.zeros(queue, (ydim, xdim), np.float32)

    # Define grid shape (the same as the matrix dimensions)
    grid_shape = (ydim, xdim)
    
    # Get group shape based on the matrix dimensions and the actual hardware
    group_shape = (16,16)#(32,16)
    
    # Execute the kernel
    event = kernel.add(queue, 
                       grid_shape, group_shape, 
                       gpu_result.data, 
                       gpu_matrix.data, 
                       gpu_matrix2.data)
                       
    # Wait for the kernel to finish
    event.wait()
    
    # Move the result from GPU to CPU
    result = gpu_result.get()
    
    # Measure end time
    t2 = time.time()

    # Print result and execution time
    print result
    print "Elapsed: %f seconds " % (t2-t1)

    # Free the GPU resource
    queue.finish()
开发者ID:martinsparre,项目名称:ClusterArchitecturesAndComputations,代码行数:56,代码来源:MatrixMultiplication.py



注:本文中的pyopencl.array.to_device函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python array.zeros函数代码示例发布时间:2022-05-27
下一篇:
Python array.empty_like函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap