• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python gpuarray.empty_like函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中pycuda.gpuarray.empty_like函数的典型用法代码示例。如果您正苦于以下问题:Python empty_like函数的具体用法?Python empty_like怎么用?Python empty_like使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了empty_like函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: calc_x_G

def calc_x_G(Kp1, C, Cm1, rp1, lm2, Am1, A, Ap1, lm1_s, lm1_si, r_s, r_si, Vsh, handle=None):
    D = A[0].shape[1]
    Dm1 = A[0].shape[0]
    q = len(A)
    
    x = garr.zeros((Dm1, q * D - Dm1), dtype=A[0].dtype)
    x_part = garr.empty_like(x)
    x_subpart = garr.empty_like(A[0])
    
    if not (C is None and Kp1 is None):
        assert (not C is None) and (not Kp1 is None)
        x_part.fill(0)
        for s in range(q):
            x_subpart = eps_r(rp1, C[s], Ap1, x_subpart, handle) #~1st line
            
            x_subpart += cla.dot(A[s], Kp1, handle=handle) #~3rd line
    
            x_part += cla.dot(cla.dot(x_subpart, r_si, handle=handle), Vsh[s], handle=handle)

        x += cla.dot(lm1_s, x_part, handle=handle)

    if not lm2 is None:
        x_part.fill(0)
        for s in range(q):     #~2nd line
            x_subpart = eps_l(lm2, Am1, Cm1[s], x_subpart, handle)
            x_part += cla.dot(x_subpart, cla.dot(r_s, Vsh[s], handle=handle), handle=handle)
        x += cla.dot(lm1_si, x_part, handle=handle)
        
    return x
开发者ID:amilsted,项目名称:evoMPS,代码行数:29,代码来源:cuda_alternatives.py


示例2: gpubarlinedata

def gpubarlinedata(xdata, ydata, bins, minval=None, maxval=None):
    if maxval == None:
        maxval = gpumax(xdata)
    if minval == None:
        minval = gpumin(xdata)
    binsize = (maxval - minval) / float(bins)
    inbin = gpuarray.empty_like(xdata)
    select = gpuarray.empty_like(xdata)
    xmeans = []
    ymeans = []
    errors = []
    for i in xrange(bins):
        lo = minval + binsize * i
        hi = minval + binsize * (i + 1)
        gpubarlinekerna(xdata, lo, hi, inbin)
        N = gpusum(inbin)
        if N > 1:
            gpubarlinekernb(inbin, ydata, select)
            my = gpusum(select) / float(N)
            gpubarlinekernb(inbin, xdata, select)
            mx = gpusum(select) / float(N)
            gpubarlinekernc(inbin, ydata, my, select)
            s = sqrt(gpusum(select) / (N * (N - 1)))
            xmeans.append(mx)
            ymeans.append(my)
            errors.append(s)
    return (xmeans, ymeans, errors)
开发者ID:michaelerule,项目名称:neurotools,代码行数:27,代码来源:statistics.py


示例3: integrate

def integrate(stepsize=0.01, stores=5, steps=10000, number_of_particles=2 ** 10):
    gpu_r, gpu_v, gpu_mass = create_particles(number_of_particles)
    number_of_particles = np.int32(number_of_particles)
    gpu_rs, gpu_vs = [gpu_r], [gpu_v]

    for i in xrange(stores - 1):
        gpu_rs.append(gpuarray.empty_like(gpu_r))
        gpu_vs.append(gpuarray.empty_like(gpu_v))

    advance = SourceModule(advance_kernel).get_function("advance")
    advance.prepare([np.intp, np.intp, np.intp, np.intp, np.intp, np.int32])

    block_size = (32, 0, 0)
    grid_size = (int(number_of_particles / 32), 0, 0)

    advance.prepared_call(block_size, grid_size, gpu_r[0], gpu_v[0], gpu_mass, gpu_r[1], gpu_v[1], number_of_particles)

    old, new = 1, 2
    for i in xrange(steps):
        r = rs_gpu[old].get_async()
        v = vs_gpu[old].get_async()
        advance.prepared_call_async(
            block_size, grid_size, gpu_rs[old], gpu_vs[old], gpu_mass, gpu_rs[new], gpu_vs[new], number_of_particles
        )

        np.write("step{i:4}_r".format(i * stepsize) + ".dat", r)
        np.write("step{i:4}_v".format(i * stepsize) + ".dat", r)

        old, new = new, (new + 1) % stores
开发者ID:tricecold,项目名称:pynbody,代码行数:29,代码来源:nbody.py


示例4: sici

def sici(x_gpu):
    """
    Sine/Cosine integral.

    Computes the sine and cosine integral of every element in the
    input matrix.

    Parameters
    ----------
    x_gpu : GPUArray
        Input matrix of shape `(m, n)`.
        
    Returns
    -------
    (si_gpu, ci_gpu) : tuple of GPUArrays
        Tuple of GPUarrays containing the sine integrals and cosine
        integrals of the entries of `x_gpu`.
        
    Examples
    --------
    >>> import pycuda.gpuarray as gpuarray
    >>> import pycuda.autoinit
    >>> import numpy as np
    >>> import scipy.special
    >>> import special
    >>> x = np.array([[1, 2], [3, 4]], np.float32)
    >>> x_gpu = gpuarray.to_gpu(x)
    >>> (si_gpu, ci_gpu) = sici(x_gpu)
    >>> (si, ci) = scipy.special.sici(x)
    >>> np.allclose(si, si_gpu.get())
    True
    >>> np.allclose(ci, ci_gpu.get())
    True
    """

    if x_gpu.dtype == np.float32:
        args = 'float *x, float *si, float *ci'
        op = 'sicif(x[i], &si[i], &ci[i])'
    elif x_gpu.dtype == np.float64:
        args = 'double *x, double *si, double *ci'
        op = 'sici(x[i], &si[i], &ci[i])'
    else:
        raise ValueError('unsupported type')
    
    try:
        func = sici.cache[x_gpu.dtype]
    except KeyError:
        func = elementwise.ElementwiseKernel(args, op,
                                 options=["-I", install_headers],
                                 preamble='#include "cuSpecialFuncs.h"')
        sici.cache[x_gpu.dtype] = func

    si_gpu = gpuarray.empty_like(x_gpu)
    ci_gpu = gpuarray.empty_like(x_gpu)
    func(x_gpu, si_gpu, ci_gpu)
        
    return (si_gpu, ci_gpu)
开发者ID:Lurkman,项目名称:scikits.cuda,代码行数:57,代码来源:special.py


示例5: make_GPU_gradient

def make_GPU_gradient(mesh, context):
    '''Prepare to compute gradient on the GPU w.r.t. the given mesh.
    Return gradient function.
    '''
    mx = int(getattr(mesh, 'nx', 1))
    my = int(getattr(mesh, 'ny', 1))
    mz = int(getattr(mesh, 'nz', 1))

    dxInv = np.array(1./getattr(mesh, 'dx', 1), dtype=np.float64)
    dyInv = np.array(1./getattr(mesh, 'dy', 1), dtype=np.float64)
    dzInv = np.array(1./getattr(mesh, 'dz', 1), dtype=np.float64)

    sizeof_double = 8
    with open(where + 'gradient2.cu') as fdlib:
        source = fdlib.read()
    module = SourceModule(source)

    mx_ptr = module.get_global("mx")[0]
    my_ptr = module.get_global("my")[0]
    mz_ptr = module.get_global("mz")[0]
    cuda.memcpy_htod(mx_ptr, np.array(mx, dtype=np.int32))
    cuda.memcpy_htod(my_ptr, np.array(my, dtype=np.int32))
    cuda.memcpy_htod(mz_ptr, np.array(mz, dtype=np.int32))

    dxInv_ptr = module.get_global("dxInv")[0]
    dyInv_ptr = module.get_global("dyInv")[0]
    dzInv_ptr = module.get_global("dzInv")[0]
    cuda.memcpy_htod(dxInv_ptr, dxInv)
    cuda.memcpy_htod(dyInv_ptr, dyInv)
    cuda.memcpy_htod(dzInv_ptr, dzInv)

    deriv_x = module.get_function("gradient_x")
    deriv_y = module.get_function("gradient_y")
    deriv_z = module.get_function("gradient_z")

    block, grid = mesh.get_domain_decomposition(DeviceData().max_threads)

    d_deriv_x = gpuarray.empty(shape=(1, mesh.n_nodes), dtype=np.float64)
    d_deriv_y = gpuarray.empty_like(d_deriv_x)
    d_deriv_z = gpuarray.empty_like(d_deriv_x)

    def _gradient(scalar_values):
        '''Calculate three-dimensional gradient for GPUArray
        scalar_values.
        '''
        deriv_x(scalar_values, d_deriv_x, block=block, grid=grid)
        deriv_y(scalar_values, d_deriv_y, block=block, grid=grid)
        deriv_z(scalar_values, d_deriv_z, block=block, grid=grid)
        context.synchronize()

        return (d_deriv_x, d_deriv_y, d_deriv_z)[:mesh.dimension]
    return _gradient
开发者ID:aoeftiger,项目名称:PyPIC,代码行数:52,代码来源:gradient.py


示例6: __init__

 def __init__(self,n_units,n_incoming,N,init_sd=1.0,precision=np.float32,magic_numbers=False):
     
     self.n_units = n_units
     self.n_incoming = n_incoming
     self.N = N
     w = np.random.normal(0,init_sd,(self.n_incoming,self.n_units))
     b = np.random.normal(0,init_sd,(1,n_units))
     
     self.weights = gpuarray.to_gpu(w.copy().astype(precision))
     self.gW = gpuarray.empty_like(self.weights)
     
     # Prior and ID must be set after creation
     self.prior = -1
     self.ID = -1
             
     self.biases = gpuarray.to_gpu(b.copy().astype(precision))
     self.gB = gpuarray.empty_like(self.biases)
         
     #Set up momentum variables for HMC sampler
     self.pW = gpuarray.to_gpu(np.random.normal(0,1,self.gW.shape))
     self.pB = gpuarray.to_gpu(np.random.normal(0,1,self.gB.shape))
     
     self.epsW = gpuarray.zeros(self.weights.shape,precision) + 1.0
     self.epsB = gpuarray.zeros(self.biases.shape,precision) + 1.0        
     
     self.precision = precision
     self.outputs = gpuarray.zeros((self.N,self.n_units),precision)   
     
     self.magic_numbers = magic_numbers
     #Define tan_h function on GPU   
     if magic_numbers:
         self.tanh = ElementwiseKernel(
             "float *x",
             "x[i] = 1.7159 * tanh(2/3*x[i]);",
             "tan_h",preamble="#include <math.h>")
     else:
         self.tanh = ElementwiseKernel(
         "float *x",
         "x[i] = tanh(min(max(-10.0,x[i]),10.0));",
         "tan_h",preamble="#include <math.h>")
     #Compile kernels 
     kernels = SourceModule(open(path+'/kernels.cu', "r").read())        
     self.add_bias_kernel = kernels.get_function("add_bias")
     
     self.rng = curandom.XORWOWRandomNumberGenerator()
     
     ##Initialize posterior weights
     self.posterior_weights = list()
     self.posterior_biases = list()
开发者ID:beamandrew,项目名称:BNN,代码行数:49,代码来源:Layer.py


示例7: e1z

def e1z(z_gpu):
    """
    Exponential integral with `n = 1` of complex arguments.

    Parameters
    ----------
    x_gpu : GPUArray
        Input matrix of shape `(m, n)`.
        
    Returns
    -------
    e_gpu : GPUArray
        GPUarrays containing the exponential integrals of
        the entries of `z_gpu`.

    Examples
    --------
    >>> import pycuda.gpuarray as gpuarray
    >>> import pycuda.autoinit
    >>> import numpy as np
    >>> import scipy.special
    >>> import special
    >>> z = np.asarray(np.random.rand(4, 4)+1j*np.random.rand(4, 4), np.complex64)
    >>> z_gpu = gpuarray.to_gpu(z)
    >>> e_gpu = e1z(z_gpu, pycuda.autoinit.device)
    >>> e_sp = scipy.special.exp1(z)
    >>> np.allclose(e_sp, e_gpu.get())
    True

    """

    if z_gpu.dtype == np.complex64:
        use_double = 0
    elif z_gpu.dtype == np.complex128:
        use_double = 1
    else:
        raise ValueError('unsupported type')

    
    # Get block/grid sizes; the number of threads per block is limited
    # to 256 because the e1z kernel defined above uses too many
    # registers to be invoked more threads per block:
    dev = get_current_device()
    max_threads_per_block = 256
    block_dim, grid_dim = select_block_grid_sizes(dev, z_gpu.shape, max_threads_per_block)

    # Set this to False when debugging to make sure the compiled kernel is
    # not cached:
    cache_dir=None
    e1z_mod = \
             SourceModule(e1z_mod_template.substitute(use_double=use_double),
                          cache_dir=cache_dir)
    e1z_func = e1z_mod.get_function("e1z")

    e_gpu = gpuarray.empty_like(z_gpu)
    e1z_func(z_gpu, e_gpu,
              np.uint32(z_gpu.size),
              block=block_dim,
              grid=grid_dim)
    return e_gpu
开发者ID:sequoiar,项目名称:scikits.cuda,代码行数:60,代码来源:special.py


示例8: test_cublas_bug

def test_cublas_bug():
    '''
    The SGEMM call would cause all calls after it to fail for some unknown
    reason. Likely this is caused swaprows causing memory corruption.

    NOTE: this was confirmed by nvidia to be a bug within CUDA, and should be
          fixed in CUDA 6.5
    '''
    from pycuda.driver import Stream
    from skcuda.cublas import cublasSgemm
    from skcuda.misc import _global_cublas_handle as handle

    n = 131

    s = slice(128, n)
    X = gpuarray.to_gpu(np.random.randn(n, 2483).astype(np.float32))
    a = gpuarray.empty((X.shape[1], 3), dtype=np.float32)
    c = gpuarray.empty((a.shape[0], X.shape[1]), dtype=np.float32)
    b = gpuarray.empty_like(X)

    m, n = a.shape[0], b[s].shape[1]
    k = a.shape[1]
    lda = m
    ldb = k
    ldc = m
    #cublasSgemm(handle, 0, 0, m, n, k, 0.0, b.gpudata, lda, a.gpudata, ldb, 0.0, c.gpudata, ldc)
    cublasSgemm(handle, 'n', 'n', m, n, k, 1.0, b[s].gpudata, lda, a.gpudata, ldb, 0.0, c.gpudata, ldc)
    #print handle, 'n', 'n', m, n, k, 1.0, b[s].gpudata, lda, a.gpudata, ldb, 0.0, c.gpudata, ldc

    #gpuarray.dot(d, Xoutd[s])
    #op.sgemm(a, b[s], c)

    stream = Stream()
    stream.synchronize()
开发者ID:stachon,项目名称:binet,代码行数:34,代码来源:test_op.py


示例9: feed_forward

    def feed_forward(self, input_data, prediction=False):
        """Propagate forward through the layer

        **Parameters:**

        input_data : ``GPUArray``
            Inpute data to perform dropout on.

        prediction : bool, optional
            Whether to use prediction model. If true, then the data is
            scaled by ``1 - dropout_probability`` uses dropout.

        **Returns:**
        
        dropout_data : ``GPUArray``
            The data after performing dropout.
        """

        assert input_data.shape[1] == self.n_in

        if not prediction:
            dropout_input = gpuarray.empty_like(input_data)
            dropout_mask = sample_dropout_mask(input_data,
                                               self.dropout_probability, target=dropout_input
                                           )
            return dropout_input, dropout_mask
        else:
            return (input_data * (1 - self.dropout_probability),)
开发者ID:DavidDJChen,项目名称:hebel,代码行数:28,代码来源:input_dropout.py


示例10: initializeGpuMemory

 def initializeGpuMemory(self):
     K = self.modelParams["proc_id_model","K"]
     
     # Sufficient statistics for the parameters of G kernels
     self.gpuPtrs["impulse_model","nnz_Z"] = gpuarray.empty((K,K), dtype=np.int32)
     self.gpuPtrs["impulse_model","g_suff_stats"] = gpuarray.empty((K,K), dtype=np.float32) 
     self.gpuPtrs["impulse_model","GS"] = gpuarray.empty_like(self.base.dSS["dS"])
开发者ID:richardkwo,项目名称:pyhawkes,代码行数:7,代码来源:impulse_models.py


示例11: computeIrDensity

 def computeIrDensity(self, dS_gpu):
     """
     Compute the impulse response density at the time intervals in dS_gpu 
     """
     K = self.modelParams["proc_id_model","K"]
     N = self.base.data.N
     gS_gpu = gpuarray.empty_like(dS_gpu)
     
     # Update GS using the impulse response parameters
     grid_w = int(np.ceil(N/1024.0))
     self.gpuKernels["computeLogisticNormalGSIndiv"](np.int32(K),
                                                     np.int32(self.base.data.N),
                                                     self.gpuPtrs["proc_id_model","C"].gpudata,
                                                     self.base.dSS["rowIndices"].gpudata,
                                                     self.base.dSS["colPtrs"].gpudata,
                                                     self.gpuPtrs["impulse_model","g_mu"].gpudata,
                                                     self.gpuPtrs["impulse_model","g_tau"].gpudata,
                                                     np.float32(self.params["dt_max"]),
                                                     dS_gpu.gpudata,
                                                     gS_gpu.gpudata,
                                                     block=(1024, 1, 1), 
                                                     grid=(grid_w,1)
                                                     )
     
     return gS_gpu
开发者ID:richardkwo,项目名称:pyhawkes,代码行数:25,代码来源:impulse_models.py


示例12: feed_forward

    def feed_forward(self, input_data, prediction=False):
        """Propagate forward through the layer

        **Parameters:**

        input_data : ``GPUArray``
            Inpute data to perform dropout on.

        prediction : bool, optional
            Whether to use prediction model. If true, then the data is
            scaled by ``1 - dropout_probability`` uses dropout.

        **Returns:**
        
        dropout_data : ``GPUArray``
            The data after performing dropout.
        """

        if input_data.shape[1] != self.n_in:
            raise ValueError('Number of outputs from previous layer (%d) '
                             'does not match number of inputs to this layer (%d)' %
                             (input_data.shape[1], self.n_in))

        if not prediction:
            dropout_input = gpuarray.empty_like(input_data)
            dropout_mask = sample_dropout_mask(input_data,
                                               self.dropout_probability, target=dropout_input
                                           )
            return dropout_input, dropout_mask
        else:
            return (input_data * (1 - self.dropout_probability),)
开发者ID:hani1986ye,项目名称:hebel,代码行数:31,代码来源:input_dropout.py


示例13: e1z

def e1z(z_gpu, dev):
    """
    Exponential integral with `n = 1` of complex arguments.

    Parameters
    ----------
    x_gpu : GPUArray
        Input matrix of shape `(m, n)`.
    dev : pycuda.driver.Device
        Device object to be used.
        
    Returns
    -------
    e_gpu : GPUArray
        GPUarrays containing the exponential integrals of
        the entries of `z_gpu`.

    Examples
    --------
    >>> import pycuda.gpuarray as gpuarray
    >>> import pycuda.autoinit
    >>> import numpy as np
    >>> import scipy.special
    >>> import special
    >>> z = np.asarray(np.random.rand(4, 4)+1j*np.random.rand(4, 4), np.complex64)
    >>> z_gpu = gpuarray.to_gpu(z)
    >>> e_gpu = e1z(z_gpu, pycuda.autoinit.device)
    >>> e_sp = scipy.special.exp1(z)
    >>> np.allclose(e_sp, e_gpu.get())
    True

    """

    if z_gpu.dtype == np.complex64:
        use_double = 0
    elif z_gpu.dtype == np.complex128:
        use_double = 1
    else:
        raise ValueError("unsupported type")

    # Get block/grid sizes:
    max_threads_per_block, max_block_dim, max_grid_dim = get_dev_attrs(dev)
    block_dim, grid_dim = select_block_grid_sizes(dev, z_gpu.shape)
    max_blocks_per_grid = max(max_grid_dim)

    # Set this to False when debugging to make sure the compiled kernel is
    # not cached:
    cache_dir = None
    e1z_mod = SourceModule(
        e1z_mod_template.substitute(
            use_double=use_double, max_threads_per_block=max_threads_per_block, max_blocks_per_grid=max_blocks_per_grid
        ),
        cache_dir=cache_dir,
        options=["-I", install_headers],
    )
    e1z_func = e1z_mod.get_function("e1z")

    e_gpu = gpuarray.empty_like(z_gpu)
    e1z_func(z_gpu.gpudata, e_gpu.gpudata, np.uint32(z_gpu.size), block=block_dim, grid=grid_dim)
    return e_gpu
开发者ID:stefanv,项目名称:scikits.cuda,代码行数:60,代码来源:special.py


示例14: worker

def worker():
    comm = MPI.Comm.Get_parent()
    size = comm.Get_size()
    rank = comm.Get_rank()
    name = MPI.Get_processor_name()

    import pycuda.driver as drv
    drv.init()

    # Find maximum number of available GPUs:
    max_gpus = drv.Device.count()

    # Use modular arithmetic to avoid assigning a nonexistent GPU:
    n = rank % max_gpus
    dev = drv.Device(n)
    ctx = dev.make_context()
    atexit.register(ctx.pop)

    # Execute a kernel:
    import pycuda.gpuarray as gpuarray
    from pycuda.elementwise import ElementwiseKernel
    
    kernel = ElementwiseKernel('double *y, double *x, double a',
                               'y[i] = a*x[i]')
    x_gpu = gpuarray.to_gpu(np.random.rand(2))
    y_gpu = gpuarray.empty_like(x_gpu)
    kernel(y_gpu, x_gpu, np.double(2.0))

    print 'I am process %d of %d on CPU %s using GPU %s of %s [x_gpu=%s, y_gpu=%s]' % \
        (rank, size, name, n, max_gpus, str(x_gpu.get()), str(y_gpu.get()))
    comm.Disconnect()
开发者ID:lebedov,项目名称:cudamps,代码行数:31,代码来源:demo.py


示例15: mult_matrix

def mult_matrix(a, b, target=None):
    assert a.shape == b.shape
    if target is None:
        target = gpuarray.empty_like(a)

    all_kernels["mult_matrix"](a, b, target)
    return target
开发者ID:Khodeir,项目名称:hebel,代码行数:7,代码来源:elementwise.py


示例16: run_function

def run_function(X, Y_expected, func, rtol=1e-6, with_inplace_test=True, **kwargs):
    # CPU, with target argument
    Y = np.empty_like(Y_expected)
    Yhr = func(X, out=Y, **kwargs)
    assert_allclose(Y_expected, Yhr, err_msg="CPU with target", rtol=rtol)
    assert Yhr is Y

    # CPU, no target argument
    Yhr = func(X, **kwargs)
    assert_allclose(Y_expected, Yhr, err_msg="CPU, no target", rtol=rtol)

    if with_inplace_test:
        X2 = X.copy()
        Yhr = func(X2, out=X2, **kwargs)
        assert_allclose(Y_expected, Yhr, err_msg="CPU, inplace target", rtol=rtol)
        assert Yhr is X2

    kwargs = op.to_gpu(kwargs)

    # GPU, with target
    Xd = op.to_gpu(X)
    Yd = gpuarray.empty_like(op.to_gpu(Y_expected))
    Ydr = func(Xd, out=Yd, **kwargs)
    assert_allclose(Y_expected, op.to_cpu(Ydr), err_msg="GPU with target", rtol=rtol)
    assert Ydr is Yd

    # GPU, no target
    Ydr = func(Xd, **kwargs)
    assert_allclose(Y_expected, op.to_cpu(Ydr), err_msg="GPU, no target", rtol=rtol)

    if with_inplace_test:
        Ydr = func(Xd, out=Xd, **kwargs)
        assert_allclose(Y_expected, op.to_cpu(Ydr), err_msg="GPU, inplace target", rtol=rtol)
        assert Ydr is Xd
开发者ID:stachon,项目名称:binet,代码行数:34,代码来源:test_op.py


示例17: nan_to_zeros

def nan_to_zeros(x, target=None):
    assert x.flags.c_contiguous
    if target is None:
        target = gpuarray.empty_like(x)
    assert target.flags.c_contiguous
    all_kernels['nan_to_zeros'](x, target)
    return target
开发者ID:DamonAnderson,项目名称:hebel,代码行数:7,代码来源:elementwise.py


示例18: substract_matrix

def substract_matrix(a, b, target=None):
    assert a.shape == b.shape
    if target is None:
        target = gpuarray.empty_like(a)

    all_kernels['substract_matrix'](a, b, target)
    return target
开发者ID:Snazz2001,项目名称:hebel,代码行数:7,代码来源:elementwise.py


示例19: test

    def test():
        gpu_func = getattr(cumath, name)
        cpu_func = getattr(np, numpy_func_names.get(name, name))
        if complex:
            _dtypes = complex_dtypes
        else:
            _dtypes = dtypes

        for s in sizes:
            for dtype in _dtypes:
                np.random.seed(1)
                A = (np.random.random(s)*(b-a) + a).astype(dtype)
                if complex:
                    A += (np.random.random(s)*(b-a) + a)*1j

                args = gpuarray.to_gpu(A)
                gpu_results = gpu_func(args).get()
                cpu_results = cpu_func(A)

                max_err = np.max(np.abs(cpu_results - gpu_results))
                assert (max_err <= threshold).all(), \
                        (max_err, name, dtype)

                gpu_results2 = gpuarray.empty_like(args)
                gr2 = gpu_func(args, out=gpu_results2)
                assert gpu_results2 is gr2
                gr2 = gr2.get()
                max_err = np.max(np.abs(cpu_results - gr2))
                assert (max_err <= threshold).all(), \
                        (max_err, name, dtype)
开发者ID:seibert,项目名称:pycuda,代码行数:30,代码来源:test_cumath.py


示例20: buffer_apply

 def buffer_apply(self, input):
     # TODO: buffer apply to a large input may cause a launch timeout, need to buffer in
     # smaller chunks if this is the case
     b = self.filt_b_gpu
     a = self.filt_a_gpu
     zi = self.filt_state
     if not hasattr(self, "filt_x_gpu") or input.size != self.filt_x_gpu.size:
         self._desiredshape = input.shape
         self._has_run_once = False
         self.filt_x_gpu = gpuarray.to_gpu(input.flatten())
         self.filt_y_gpu = gpuarray.empty_like(self.filt_x_gpu)
     else:
         self.filt_x_gpu.set(input.flatten())
     filt_x_gpu = self.filt_x_gpu
     filt_y_gpu = self.filt_y_gpu
     if self._has_run_once:
         self.gpu_filt_func.launch_grid(*self.grid)
     else:
         self.gpu_filt_func.prepared_call(
             self.grid,
             intp(b.gpudata),
             intp(a.gpudata),
             intp(filt_x_gpu.gpudata),
             intp(zi.gpudata),
             intp(filt_y_gpu.gpudata),
             int32(input.shape[0]),
         )
         self._has_run_once = True
     return reshape(filt_y_gpu.get(pagelocked=self.pagelocked_mem), self._desiredshape)
开发者ID:dkiela,项目名称:thesis,代码行数:29,代码来源:gpulinearfilterbank.py



注:本文中的pycuda.gpuarray.empty_like函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python gpuarray.sum函数代码示例发布时间:2022-05-25
下一篇:
Python gpuarray.empty函数代码示例发布时间:2022-05-25
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap