• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python gpuarray.zeros函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中pycuda.gpuarray.zeros函数的典型用法代码示例。如果您正苦于以下问题:Python zeros函数的具体用法?Python zeros怎么用?Python zeros使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了zeros函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: _initialize_gpu_ds

    def _initialize_gpu_ds(self):
        """
        Setup GPU arrays.
        """

        self.synapse_state = garray.zeros(int(self.total_synapses) + \
                                    len(self.input_neuron_list), np.float64)
        if self.my_num_gpot_neurons>0:
            self.V = garray.zeros(int(self.my_num_gpot_neurons), np.float64)
        else:
            self.V = None

        if self.my_num_spike_neurons>0:
            self.spike_state = garray.zeros(int(self.my_num_spike_neurons), np.int32)

        if len(self.public_gpot_list)>0:
            self.public_gpot_list_g = garray.to_gpu(self.public_gpot_list)
            self.projection_gpot = garray.zeros(len(self.public_gpot_list), np.double)
            self._extract_gpot = self._extract_projection_gpot_func()

        if len(self.public_spike_list)>0:
            self.public_spike_list_g = garray.to_gpu( \
                (self.public_spike_list-self.spike_shift).astype(np.int32))
            self.projection_spike = garray.zeros(len(self.public_spike_list), np.int32)
            self._extract_spike = self._extract_projection_spike_func()
开发者ID:LuisMoralesAlonso,项目名称:neurokernel,代码行数:25,代码来源:LPU.py


示例2: get_next_batch

  def get_next_batch(self, batch_size):
    if self._reader is None:
      self._start_read()

    if self._gpu_batch is None:
      self._fill_reserved_data()

    height, width = self._gpu_batch.data.shape
    gpu_data = self._gpu_batch.data
    gpu_labels = self._gpu_batch.labels

    if self.index + batch_size >=  width:
      width = width - self.index
      labels = gpu_labels[self.index:self.index + batch_size]

      #data = gpu_data[:, self.index:self.index + batch_size]
      data = gpuarray.zeros((height, width), dtype = np.float32)
      gpu_partial_copy_to(gpu_data, data, 0, height, self.index, self.index + width)

      self.index = 0
      self._fill_reserved_data()
    else:
      labels = gpu_labels[self.index:self.index + batch_size]
      #data = gpu_data[:, self.index:self.index + batch_size]
      data = gpuarray.zeros((height, batch_size), dtype = np.float32)
      gpu_partial_copy_to(gpu_data, data, 0, height, self.index, self.index + batch_size)
      #labels = gpu_labels[self.index:self.index + batch_size]
      self.index += batch_size
    return BatchData(data, labels, self._gpu_batch.epoch)
开发者ID:tesatory,项目名称:fastnet,代码行数:29,代码来源:data.py


示例3: _initialize_gpu_ds

    def _initialize_gpu_ds(self):
        """
        Setup GPU arrays.
        """

        self.synapse_state = garray.zeros(max(int(self.total_synapses) + len(self.input_neuron_list), 1), np.float64)

        if self.total_num_gpot_neurons > 0:
            self.V = garray.zeros(int(self.total_num_gpot_neurons), np.float64)
        else:
            self.V = None

        if self.total_num_spike_neurons > 0:
            self.spike_state = garray.zeros(int(self.total_num_spike_neurons), np.int32)

        self.block_extract = (256, 1, 1)
        if len(self.out_ports_ids_gpot) > 0:
            self.out_ports_ids_gpot_g = garray.to_gpu(self.out_ports_ids_gpot)
            self.sel_out_gpot_ids_g = garray.to_gpu(self.sel_out_gpot_ids)

            self._extract_gpot = self._extract_projection_gpot_func()

        if len(self.out_ports_ids_spk) > 0:
            self.out_ports_ids_spk_g = garray.to_gpu((self.out_ports_ids_spk - self.spike_shift).astype(np.int32))
            self.sel_out_spk_ids_g = garray.to_gpu(self.sel_out_spk_ids)

            self._extract_spike = self._extract_projection_spike_func()

        if self.ports_in_gpot_mem_ind is not None:
            inds = self.sel_in_gpot_ids
            self.inds_gpot = garray.to_gpu(inds)

        if self.ports_in_spk_mem_ind is not None:
            inds = self.sel_in_spk_ids
            self.inds_spike = garray.to_gpu(inds)
开发者ID:yiyin,项目名称:neurokernel,代码行数:35,代码来源:LPU.py


示例4: riemanntheta_high_dim

def riemanntheta_high_dim(X, Yinv, T, z, g, rad, max_points = 10000000):
    parRiemann = RiemannThetaCuda(1,512)
    #initialize parRiemann
    parRiemann.compile(g)
    parRiemann.cache_omega_real(X)
    parRiemann.cache_omega_imag(Yinv,T)
    #compile the box_points program
    point_finder = func1()
    R = get_rad(T, rad)
    print R
    num_int_points = (2*R + 1)**g
    num_partitions = num_int_points//max_points
    num_final_partition = num_int_points - num_partitions*max_points
    osc_part = 0 + 0*1.j
    if (num_partitions > 0):
        S = gpuarray.zeros(np.int(max_points * g), dtype=np.double)
    print "Required number of iterations"
    print num_partitions
    print 
    for p in range(num_partitions):
        print p
        print
        S = box_points(point_finder, max_points*p, max_points*(p+1),g,R, S)
        parRiemann.cache_intpoints(S, gpu_already=True)
        osc_part += parRiemann.compute_v_without_derivs(np.array([z]))
    S = gpuarray.zeros(np.int((num_int_points - num_partitions*max_points)*g), dtype = np.double)
    print num_partitions*max_points,num_int_points
    S = box_points(point_finder, num_partitions*max_points, num_int_points, g, R,S)
    parRiemann.cache_intpoints(S,gpu_already = True)
    osc_part += parRiemann.compute_v_without_derivs(np.array([z]))
    print osc_part
    return osc_part
开发者ID:abelfunctions,项目名称:abelfunctions,代码行数:32,代码来源:box_points.py


示例5: __init__

    def __init__( self, s_dict, synapse_state, dt, debug=False):
        self.debug = debug
        self.dt = dt
        self.num = len( s_dict['id'] )

        self.pre  = garray.to_gpu( np.asarray( s_dict['pre'], dtype=np.int32 ))
        self.ar   = garray.to_gpu( np.asarray( s_dict['ar'], dtype=np.float64 ))
        self.ad   = garray.to_gpu( np.asarray( s_dict['ad'], dtype=np.float64 ))
        self.gmax = garray.to_gpu( np.asarray( s_dict['gmax'], dtype=np.float64 ))
        self.a0   = garray.zeros( (self.num,), dtype=np.float64 )
        self.a1   = garray.zeros( (self.num,), dtype=np.float64 )
        self.a2   = garray.zeros( (self.num,), dtype=np.float64 )
        self.cond = synapse_state

        _num_dendrite_cond = np.asarray(
            [s_dict['num_dendrites_cond'][i] for i in s_dict['id']],\
            dtype=np.int32).flatten()
        _num_dendrite = np.asarray(
            [s_dict['num_dendrites_I'][i] for i in s_dict['id']],\
            dtype=np.int32).flatten()

        self._cum_num_dendrite = garray.to_gpu(_0_cumsum(_num_dendrite))
        self._cum_num_dendrite_cond = garray.to_gpu(_0_cumsum(_num_dendrite_cond))
        self._num_dendrite = garray.to_gpu(_num_dendrite)
        self._num_dendrite_cond = garray.to_gpu(_num_dendrite_cond)
        self._pre = garray.to_gpu(np.asarray(s_dict['I_pre'], dtype=np.int32))
        self._cond_pre = garray.to_gpu(np.asarray(s_dict['cond_pre'], dtype=np.int32))
        self._V_rev = garray.to_gpu(np.asarray(s_dict['reverse'],dtype=np.double))
        self.I = garray.zeros(self.num, np.double)
        #self._update_I_cond = self._get_update_I_cond_func()
        self._update_I_non_cond = self._get_update_I_non_cond_func()
        self.update = self._get_gpu_kernel()
开发者ID:yiyin,项目名称:neurokernel,代码行数:32,代码来源:AlphaSynapsePre.py


示例6: compute_v_without_derivs

 def compute_v_without_derivs(self, Xs, Yinvs, Ts):
     #Turn the parts of omega into gpuarrays
     Xs = np.require(Xs, dtype = np.double, requirements=['A', 'W', 'O', 'C'])
     Yinvs = np.require(Yinvs, dtype = np.double, requirements=['A', 'W', 'O', 'C'])
     Ts = np.require(Ts, dtype = np.double, requirements=['A', 'W', 'O', 'C'])
     Xs_d = gpuarray.to_gpu(Xs)
     Yinvs_d = gpuarray.to_gpu(Yinvs)
     Ts_d = gpuarray.to_gpu(Ts)
     #Determine N = the number of integer points to sum over
     #          K = the number of different omegas to compute the function at
     N = self.Sd.size/self.g
     K = Xs.size/(self.g**2)
     #Create room on the gpu for the real and imaginary finite sum calculations
     fsum_reald = gpuarray.zeros(N*K, dtype=np.double)
     fsum_imagd = gpuarray.zeros(N*K, dtype=np.double)
     #Turn all scalars into numpy data types
     Nd = np.int32(N)
     Kd = np.int32(K)
     gd = np.int32(self.g)
     blocksize = (self.tilewidth, self.tileheight, 1)
     gridsize = (N//self.tilewidth + 1, K//self.tileheight + 1, 1)
     self.finite_sum_without_derivs(fsum_reald, fsum_imagd, Xs_d, Yinvs_d, Ts_d,
                                    self.Sd, gd, Nd, Kd,
                                    block = blocksize,
                                    grid = gridsize)
     cuda.Context.synchronize()
     fsums_real = self.sum_reduction(fsum_reald, N, K, Kd, Nd)
     fsums_imag = self.sum_reduction(fsum_imagd, N, K, Kd, Nd)
     return fsums_real + 1.0j*fsums_imag
开发者ID:abelfunctions,项目名称:abelfunctions,代码行数:29,代码来源:riemanntheta_omegas.py


示例7: prepare_for_train

  def prepare_for_train(data, label):
    assert len(data.shape) == 4
    if data.shape[3] != self.batchSize:
      self.batchSize = data.shape[3]
      for l in self.layers:
        l.change_batch_size(self.batchSize)
      self.inputShapes = None
      self.imgShapes = None
      self.outputs = []
      self.grads = []
      self.local_outputs = []
      self.local_grads = []


      self.imgShapes = [(self.numColor, self.imgSize / 2, self.imgSize / 2, self.batchSize)]
      self.inputShapes = [(self.numColr * (self.imgSize ** 2) / 4, self.batchSize)]

      fc = False
      for layer in self.layers:
        outputShape = layer.get_output_shape()

        row = outputShape[0] * outputShape[1] * outputShape[2]
        col = outputShape[3]

        if layer.type == 'softmax':
          row *= comm.Get_size()
          outputShape = (outputShape[0] * comm.Get_size(), 1, 1, outputShape[3])

        self.inputShapes.append((row, col))
        self.imgShapes.append(outputShape)

        area = make_area(outputShape)
        self.outputs.append(virtual_array(rank, area = area))
        self.local_outputs.append(gpuarray.zeros((row, col), dtype =np.float32))

        inputShape = self.inputShapes[-2]
        #if layer.type == 'fc':
        #  inputShape = (inputShape[0] * comm.Get_size(), inputShape[1])
        #  self.local_grads.append(gpuarray.zeors(inputShape, dtype = np.float32))
        #  area = make_plain_area(inputShape)
        #else:
        #  self.local_grads.append(gpuarray.zeros(inputShape, dtype= np.float32))
        #  area = make_area(self.imgShapes[-2])
        #self.grads.append(virtual_array(rank, area = area))

      area = make_area((self.numColor, self.imgSize / 2, self.imgSize / 2, self.batchSize))
      self.data = virtual_array(rank, local = gpuarray.to_gpu(data.__getitem__(area.to_slice())),
          area = area)

      if not isinstance(label, GPUArray):
        self.label = gpuarray.to_gpu(label).astype(np.float32)
      else:
        self.label = label

      self.label = self.label.reshape((label.size, 1))
      self.numCase += data.shape[1]
      outputShape = self.inputShapes[-1]

      if self.output is None or self.output.shape != outputShape:
        self.output = gpuarray.zeros(outputShape, dtype = np.float32)
开发者ID:iskandr,项目名称:striate,代码行数:60,代码来源:fastnet.py


示例8: logreg_cost

 def logreg_cost(self, label, output):
   if self.cost.shape[0] !=  self.batchSize:
     self.cost = gpuarray.zeros((self.batchSize, 1), dtype=np.float32)
   maxid = gpuarray.zeros((self.batchSize, 1), dtype=np.float32)
   find_col_max_id(maxid, output)
   self.batchCorrect = same_reduce(label , maxid)
   logreg_cost_col_reduce(output, label, self.cost)
开发者ID:phecy,项目名称:striate,代码行数:7,代码来源:layer.py


示例9: update_ptrs

    def update_ptrs(self):
        self.tps_param_ptrs = get_gpu_ptrs(self.tps_params)
        self.trans_d_ptrs = get_gpu_ptrs(self.trans_d)
        self.lin_dd_ptrs = get_gpu_ptrs(self.lin_dd)
        self.w_nd_ptrs = get_gpu_ptrs(self.w_nd)

        for b in self.bend_coefs:
            self.proj_mat_ptrs[b] = get_gpu_ptrs(self.proj_mats[b])
            self.offset_mat_ptrs[b] = get_gpu_ptrs(self.offset_mats[b])

        self.pt_ptrs = get_gpu_ptrs(self.pts)
        self.kernel_ptrs = get_gpu_ptrs(self.kernels)
        self.pt_w_ptrs = get_gpu_ptrs(self.pts_w)
        self.pt_t_ptrs = get_gpu_ptrs(self.pts_t)
        self.corr_cm_ptrs = get_gpu_ptrs(self.corr_cm)
        self.corr_rm_ptrs = get_gpu_ptrs(self.corr_rm)
        self.r_coef_ptrs = get_gpu_ptrs(self.r_coefs)
        self.c_coef_rn_ptrs = get_gpu_ptrs(self.c_coefs_rn)
        self.c_coef_cn_ptrs = get_gpu_ptrs(self.c_coefs_cn)
        # temporary space for warping cost computations
        self.warp_err = gpuarray.zeros((self.N, MAX_CLD_SIZE), np.float32)
        self.bend_res_mat = gpuarray.zeros((DATA_DIM * self.N, DATA_DIM), np.float32)
        self.bend_res = [self.bend_res_mat[i * DATA_DIM : (i + 1) * DATA_DIM] for i in range(self.N)]
        self.bend_res_ptrs = get_gpu_ptrs(self.bend_res)

        self.dims_gpu = gpuarray.to_gpu(np.array(self.dims, dtype=np.int32))
        self.ptrs_valid = True
开发者ID:rll,项目名称:lfd,代码行数:27,代码来源:batchtps.py


示例10: compute_v_without_derivs

 def compute_v_without_derivs(self, Z):
     #Turn the numpy set Z into gpuarrays
     x = Z.real
     y = Z.imag
     x = np.require(x, dtype = np.double, requirements=['A','W','O','C'])
     y = np.require(y, dtype = np.double, requirements=['A','W','O','C'])
     xd = gpuarray.to_gpu(x)
     yd = gpuarray.to_gpu(y)
     self.yd = yd
     #Detemine N = the number of integer points to sum over and
     #         K = the number of values to compute the function at
     N = self.Sd.size/self.g
     K = Z.size/self.g
     #Create room on the gpu for the real and imaginary finite sum calculations
     fsum_reald = gpuarray.zeros(N*K, dtype=np.double)
     fsum_imagd = gpuarray.zeros(N*K, dtype=np.double)
     #Make all scalars into numpy data types
     Nd = np.int32(N)
     Kd = np.int32(K)
     gd = np.int32(self.g)
     blocksize = (self.tilewidth, self.tileheight, 1)
     gridsize = (N//self.tilewidth + 1, K//self.tileheight + 1, 1)
     self.finite_sum_without_derivs(fsum_reald, fsum_imagd, xd, yd, 
                  self.Sd, gd, Nd, Kd,
                  block = blocksize,
                  grid = gridsize)
     cuda.Context.synchronize()
     fsums_real = self.sum_reduction(fsum_reald, N, K, Kd, Nd)
     fsums_imag = self.sum_reduction(fsum_imagd, N, K, Kd, Nd)
     return fsums_real + 1.0j*fsums_imag
开发者ID:abelfunctions,项目名称:abelfunctions,代码行数:30,代码来源:riemanntheta_cuda.py


示例11: setup_pdf_eval

    def setup_pdf_eval(self, event_hit, event_time, event_charge, min_twidth,
                       trange, min_qwidth, qrange, min_bin_content=10,
                       time_only=True):
        """Setup GPU arrays to compute PDF values for the given event.
        The pdf_eval calculation allows the PDF to be evaluated at a
        single point for each channel as the Monte Carlo is run.  The
        effective bin size will be as small as (`min_twidth`,
        `min_qwidth`) around the point of interest, but will be large
        enough to ensure that `min_bin_content` Monte Carlo events
        fall into the bin.

            event_hit: ndarray
              Hit or not-hit status for each channel in the detector.
            event_time: ndarray
              Hit time for each channel in the detector.  If channel 
              not hit, the time will be ignored.
            event_charge: ndarray
              Integrated charge for each channel in the detector.
              If channel not hit, the charge will be ignored.

            min_twidth: float
              Minimum bin size in the time dimension
            trange: (float, float)
              Range of time dimension in PDF
            min_qwidth: float
              Minimum bin size in charge dimension
            qrange: (float, float)
              Range of charge dimension in PDF
            min_bin_content: int
              The bin will be expanded to include at least this many events
            time_only: bool
              If True, only the time observable will be used in the PDF.
        """
        self.event_nhit = count_nonzero(event_hit)
        
        # Define a mapping from an array of len(event_hit) to an array of length event_nhit
        self.map_hit_offset_to_channel_id = np.where(event_hit)[0].astype(np.uint32)
        self.map_hit_offset_to_channel_id_gpu = ga.to_gpu(self.map_hit_offset_to_channel_id)
        self.map_channel_id_to_hit_offset = np.maximum(0, event_hit.cumsum() - 1).astype(np.uint32)
        self.map_channel_id_to_hit_offset_gpu = ga.to_gpu(self.map_channel_id_to_hit_offset)

        self.event_hit_gpu = ga.to_gpu(event_hit.astype(np.uint32))
        self.event_time_gpu = ga.to_gpu(event_time.astype(np.float32))
        self.event_charge_gpu = ga.to_gpu(event_charge.astype(np.float32))

        self.eval_hitcount_gpu = ga.zeros(len(event_hit), dtype=np.uint32)
        self.eval_bincount_gpu = ga.zeros(len(event_hit), dtype=np.uint32)
        self.nearest_mc_gpu = ga.empty(shape=self.event_nhit * min_bin_content, 
                                             dtype=np.float32)
        self.nearest_mc_gpu.fill(1e9)
        
        self.min_twidth = min_twidth
        self.trange = trange
        self.min_qwidth = min_qwidth
        self.qrange = qrange
        self.min_bin_content = min_bin_content

        assert time_only # Only support time right now
        self.time_only = time_only
开发者ID:BenLand100,项目名称:chroma,代码行数:59,代码来源:pdf.py


示例12: fprop

 def fprop(self, input, output):
   max = gpuarray.zeros((1, self.batchSize), dtype = np.float32)
   col_max_reduce(max, input)
   add_vec_to_cols(input, max, output, alpha = -1)
   gpu_copy_to(cumath.exp(output), output)
   sum = gpuarray.zeros(max.shape, dtype = np.float32)
   add_col_sum_to_vec(sum, output, alpha = 0)
   div_vec_to_cols(output, sum)
开发者ID:smessing,项目名称:striate,代码行数:8,代码来源:layer.py


示例13: createHashTable

    def createHashTable(kd, vd, capacity):
        table_capacity_gpu, _ = mod.get_global('table_capacity')
        cuda.memcpy_htod(table_capacity_gpu, np.uint([capacity]))

        # CUDA_SAFE_CALL(cudaMemcpyToSymbol(table_capacity,
        #           &capacity,
        #           sizeof(unsigned int)));

        table_vals_gpu, table_vals_size = mod.get_global('table_values') # pointer-2-pointer
        values_gpu = gpuarray.zeros((capacity*vd,1), dtype=np.float32)
        # values_gpu = gpuarray.zeros((capacity*vd,1), dtype=np.float32)
        # cuda.memset_d32(values_gpu.gpudata, 0, values_gpu.size)
        cuda.memcpy_dtod(table_vals_gpu, values_gpu.gpudata, table_vals_size)

        # float *values;
        # allocateCudaMemory((void**)&values, capacity*vd*sizeof(float));
        # CUDA_SAFE_CALL(cudaMemset((void *)values, 0, capacity*vd*sizeof(float)));
        # CUDA_SAFE_CALL(cudaMemcpyToSymbol(table_values,
        #                   &values,
        #                   sizeof(float *)));

        table_entries, table_entries_size = mod.get_global('table_entries')
        entries_gpu = gpuarray.empty((capacity*2,1), dtype=np.int)
        entries_gpu.fill(-1)
        # cuda.memset_d32(entries_gpu.gpudata, 1, entries_gpu.size)
        cuda.memcpy_dtod(table_entries, entries_gpu.gpudata, table_entries_size)

        # int *entries;
        # allocateCudaMemory((void **)&entries, capacity*2*sizeof(int));
        # CUDA_SAFE_CALL(cudaMemset((void *)entries, -1, capacity*2*sizeof(int)));
        # CUDA_SAFE_CALL(cudaMemcpyToSymbol(table_entries,
        #                   &entries,
        #                   sizeof(unsigned int *)));

        ########################################
        # Assuming LINEAR_D_MEMORY not defined #
        ########################################

        #  #ifdef LINEAR_D_MEMORY
        # char *ranks;
        # allocateCudaMemory((void**)&ranks, capacity*sizeof(char));
        # CUDA_SAFE_CALL(cudaMemcpyToSymbol(table_rank,
        #                   &ranks,
        #                   sizeof(char *)));
        #
        # signed short *zeros;
        # allocateCudaMemory((void**)&zeros, capacity*sizeof(signed short));
        # CUDA_SAFE_CALL(cudaMemcpyToSymbol(table_zeros,
        #                   &zeros,
        #                   sizeof(char *)));
        #
        # #else

        table_keys_gpu, table_keys_size = mod.get_global('table_keys')
        keys_gpu = gpuarray.zeros((capacity*kd,1), dtype=np.short)
        # keys_gpu = gpuarray.empty((capacity*kd,1), dtype=np.short)
        # cuda.memset_d32(keys_gpu.gpudata, 0, keys_gpu.size)
        cuda.memcpy_dtod(table_keys_gpu, keys_gpu.gpudata, table_keys_size)
开发者ID:AdrianLsk,项目名称:permutohedral_pycuda,代码行数:58,代码来源:filter_pycuda.py


示例14: logreg_cost_multiview

 def logreg_cost_multiview(self, label, output, num_view):
   unit = self.batch_size / num_view
   if self.cost.shape[0] != unit:
     self.cost = gpuarray.zeros((unit, 1), dtype = np.float32)
   maxid = gpuarray.zeros((self.batch_size, 1), dtype = np.float32)
   find_col_max_id(maxid, output)
   self.batchCorrect = same_reduce_multiview(label, maxid, num_view)
   tmp = gpuarray.zeros((output.shape[0], unit), dtype = np.float32)
   gpu_partial_copy_to(output, tmp, 0, output.shape[0], 0, unit)
   logreg_cost_col_reduce(tmp, label, self.cost)
开发者ID:alemagnani,项目名称:fastnet,代码行数:10,代码来源:layer.py


示例15: fprop

 def fprop(self, input, output, train=TRAIN):
   max = gpuarray.zeros((1, self.batchSize), dtype=np.float32)
   col_max_reduce(max, input)
   add_vec_to_cols(input, max, output, alpha= -1)
   eltwise_exp(output)
   sum = gpuarray.zeros(max.shape, dtype=np.float32)
   add_col_sum_to_vec(sum, output, alpha=0)
   div_vec_to_cols(output, sum)
   if PFout:
     print_matrix(output, self.name)
开发者ID:phecy,项目名称:striate,代码行数:10,代码来源:layer.py


示例16: __init__

    def __init__(self, A1, A2, left, use_batch=False):
        """Creates a new LinearOperator interface to the superoperator E.
        
        This is a wrapper to be used with SciPy's sparse linear algebra routines.
        
        Parameters
        ----------
        A1 : ndarray
            Ket parameter tensor. 
        A2 : ndarray
            Bra parameter tensor.
        left : bool
            Whether to multiply with a vector to the left (or to the right).
        """
        self.A1G = [list(map(garr.to_gpu, A1k)) for A1k in A1]
        self.A2G = [list(map(garr.to_gpu, A2k)) for A2k in A2]
        self.tmp = list(map(garr.empty_like, self.A1G[0]))
        self.tmp2 = list(map(garr.empty_like, self.A1G[0]))
        
        self.use_batch = use_batch
        self.left = left
        
        self.D = A1[0].shape[1]        
        self.shape = (self.D**2, self.D**2)        
        self.dtype = sp.dtype(A1[0][0].dtype)
        
        self.calls = 0        
        
        self.out = garr.empty((self.D, self.D), dtype=self.dtype)        
        self.xG = garr.empty((self.D, self.D), dtype=self.dtype)

        if use_batch:
            self.A1G_p = list(map(get_batch_ptrs, self.A1G))
            self.A2G_p = list(map(get_batch_ptrs, self.A2G))
            self.tmp_p = get_batch_ptrs(self.tmp)
            self.tmp2_p = get_batch_ptrs(self.tmp2)
            self.xG_p = get_batch_ptrs([self.xG] * len(A1[0]))
            self.out_p = get_batch_ptrs([self.out] * len(A1[0]))
        else:
            self.A1G_p = None
            self.A2G_p = None
            self.tmp_p = None
            self.tmp2_p = None
            self.xG_p = None
            self.out_p = None

            self.ones = [garr.zeros((1), dtype=sp.complex128) for s in range(len(A1[0]))]
            self.ones = [one.fill(1) for one in self.ones]
            self.zeros = [garr.zeros((1), dtype=sp.complex128) for s in range(len(A1[0]))]
            
            self.streams = []
            for s in range(A1[0].shape[0]):
                self.streams.append(cd.Stream())
        
        self.hdl = cb.cublasCreate()
开发者ID:amilsted,项目名称:evoMPS,代码行数:55,代码来源:cuda_alternatives.py


示例17: get_next_batch

    def get_next_batch(self, batch_size):
      if self._reader is None:
        self._start_read()

      if self._gpu_batch is None:
        self._fill_reserved_data()

      if not self.multiview:
        height, width = self._gpu_batch.data.shape
        gpu_data = self._gpu_batch.data
        gpu_labels = self._gpu_batch.labels
        epoch = self._gpu_batch.epoch

        if self.index + batch_size >=  width:
          width = width - self.index
          labels = gpu_labels[self.index:self.index + batch_size]

          data = gpuarray.zeros((height, width), dtype = np.float32)
          gpu_partial_copy_to(gpu_data, data, 0, height, self.index, self.index + width)
          self.index = 0
          self._fill_reserved_data()
        else:
          labels = gpu_labels[self.index:self.index + batch_size]
          data = gpuarray.zeros((height, batch_size), dtype = np.float32)
          gpu_partial_copy_to(gpu_data, data, 0, height, self.index, self.index + batch_size)
          self.index += batch_size
      else:
        # multiview provider
        # number of views should be 10
        # when using multiview, do not pre-move data and labels to gpu
        height, width = self._cpu_batch.data.shape
        cpu_data = self._cpu_batch.data
        cpu_labels = self._cpu_batch.labels
        epoch = self._cpu_batch.epoch

        width /= self.num_view

        if self.index + batch_size >=  width:
          batch_size = width - self.index

        labels = cpu_labels[self.index:self.index + batch_size]
        data = np.zeros((height, batch_size * self.num_view), dtype = np.float32)
        for i in range(self.num_view):
          data[:, i* batch_size: (i+ 1) * batch_size] = cpu_data[:, self.index + width * i : self.index + width * i + batch_size]

        data = copy_to_gpu(np.require(data, requirements = 'C'))
        labels = copy_to_gpu(np.require(labels, requirements = 'C'))


        self.index = (self.index + batch_size) / width
      
      #util.log_info('Batch: %s %s %s', data.shape, gpu_labels.shape, labels.shape)
      return BatchData(data, labels, epoch)
开发者ID:rjpower,项目名称:fastnet,代码行数:53,代码来源:data.py


示例18: cuda_hogbom

def cuda_hogbom(gpu_dirty,gpu_dpsf,gpu_cpsf,thresh=0.2,damp=1,gain=0.1,prefix='test'):
  """
  Use CUDA to implement the Hogbom CLEAN algorithm

  A nice description of the algorithm is given by the NRAO, here:
  http://www.cv.nrao.edu/~abridle/deconvol/node8.html

  Parameters:
  * dirty: The dirty image (2D numpy array)
  * dpsf: The dirty beam psf  (2D numpy array)
  * thresh: User-defined threshold to stop iteration, as a fraction of the max pixel intensity (float)
  * damp: The damping factor to scale the dirty beam by
  * prefix: prefix for output image file names
  """
  height,width=np.shape(gpu_dirty)
  ## Grid parameters - #improvable#
  tsize=8
  blocksize = (int(tsize),int(tsize),1)     	     # The number of threads per block (x,y,z)
  gridsize  = (int(width/tsize),int(height/tsize))   # The number of thread blocks     (x,y)
  ## Setup cleam image and point source model
  gpu_pmodel = gpu.zeros([height,width],dtype=np.float32)
  gpu_clean = gpu.zeros([height,width],dtype=np.float32)
  ## Setup GPU constants
  gpu_max_id = gpu.to_gpu(np.int32(0))
  imax=gpu_getmax(gpu_dirty)
  thresh_val=np.float32(thresh*imax)
  ## Steps 1-3 - Iterate until threshold has been reached
  t_start=time.time()
  i=0
  while abs(imax)>(thresh_val):
    if (np.mod(i,100)==0):
      print "Hogbom iteration",i
    ## Step 1 - Find max
    find_max_kernel(gpu_dirty,gpu_max_id,imax,np.int32(width),np.int32(height),gpu_pmodel,\
			block=blocksize, grid=gridsize)
    ## Step 2 - Subtract the beam (assume that it is normalized to have max 1)
    ##          This kernel simultaneously reconstructs the CLEANed image.
    if PLOTME: print "Subtracting dirty beam "+str(i)+", maxval=%0.8f"%imax+' at x='+str(gpu_max_id.get()%width)+\
			', y='+str(gpu_max_id.get()/width)
    sub_beam_kernel(gpu_dirty,gpu_dpsf,gpu_max_id,gpu_clean,gpu_cpsf,np.float32(gain*imax),np.int32(width),\
			np.int32(height), block=blocksize, grid=gridsize)
    i+=1
    ## Step 3 - Find maximum value using gpuarray
    imax=gpu_getmax(gpu_dirty)
  t_end=time.time()
  t_full=t_end-t_start
  print "Hogbom execution time %0.5f"%t_full+' s'
  print "\t%0.5f"%(t_full/i)+' s per iteration'
  ## Step 4 - Add the residuals back in
  add_noise_kernel(gpu_dirty,gpu_clean,np.float32(width+height))
  return gpu_dirty,gpu_pmodel,gpu_clean
开发者ID:shaoguangleo,项目名称:autoFits,代码行数:51,代码来源:gICLEAN.py


示例19: __init__

    def __init__(self,**params):

        '''
        Hack-ish way to avoid initialisation until the weights are transfered:
        '''
        should_apply = self.apply_output_fns_init
        params['apply_output_fns_init'] = False

        super(GPUSparseCFProjection,self).__init__(**params)
        # Transfering the weights:
        self.pycuda_stream = cuda.Stream()
        self.weights_gpu = cusparse.CSR.to_CSR(self.weights.toSparseArray().transpose())
        # Getting the row and columns indices for the *transposed* matrix. Used for Hebbian learning and normalisation:
        nzcols, nzrows = self.weights.nonzero()
        tups = sorted(zip(nzrows, nzcols))
        nzrows = [x[0] for x in tups]
        nzcols = [x[1] for x in tups]

        '''
        Allocating a page-locked piece of memory for the activity so that GPU could transfer data to the
        main memory without the involvment of the CPU:
        '''
        self.activity = cuda.pagelocked_empty(self.activity.shape, np.float32)
        self.activity_gpu_buffer = gpuarray.zeros(shape=(self.weights_gpu.shape[0],), dtype=np.float32)

        self.input_buffer_pagelocked = cuda.pagelocked_empty(shape=(self.weights_gpu.shape[1],), dtype=np.float32, mem_flags=cuda.host_alloc_flags.WRITECOMBINED)
        self.input_buffer = gpuarray.zeros(shape=(self.weights_gpu.shape[1], ), dtype=np.float32)

        self.norm_total_gpu = gpuarray.zeros(shape=(self.weights_gpu.shape[0],), dtype=np.float32)

        # Getting them on the GPU:
        self.nzcount = self.weights.getnnz()
        self.nzrows_gpu = gpuarray.to_gpu(np.array(nzrows, np.int32))
        self.nzcols_gpu = gpuarray.to_gpu(np.array(nzcols, np.int32))
        # Helper array for normalization:
        self.norm_ones_gpu = gpuarray.to_gpu(np.array([1.0] * self.weights_gpu.shape[1], np.float32))
        # Kernel that applies the normalisation:
        self.normalize_kernel = ElementwiseKernel(
                        "int *nzrows, float *norm_total, float *weights",
                        "weights[i] *= norm_total[nzrows[i]]",
                        "divisive_normalize")
        # Kernel that calculates the learning:
        self.hebbian_kernel = ElementwiseKernel(
                        "float single_conn_lr, int *row, int *col, float *src_activity, float *dest_activity, float *result",
                        "result[i] += single_conn_lr * src_activity[col[i]] * dest_activity[row[i]]",
                        "hebbian_learning")

        params['apply_output_fns_init'] = should_apply
        self.apply_output_fns_init = should_apply
        if self.apply_output_fns_init:
            self.apply_learn_output_fns()
开发者ID:Tasignotas,项目名称:topographica_mirror,代码行数:51,代码来源:projection.py


示例20: append_layer

  def append_layer(self, layer):
    self.layers.append(layer)
    if layer.type == 'conv':
      self.numConv += 1

    outputShape = layer.get_output_shape()
    row = outputShape[0] * outputShape[1] * outputShape[2]
    col = outputShape[3]
    self.inputShapes.append((row, col))
    self.imgShapes.append(outputShape)

    self.outputs.append(gpuarray.zeros((row, col), dtype=np.float32))
    self.grads.append(gpuarray.zeros(self.inputShapes[-2], dtype=np.float32))
    print >> sys.stderr,  '%s[%s]:%s' % (layer.name, layer.type, outputShape)
开发者ID:iskandr,项目名称:striate,代码行数:14,代码来源:fastnet.py



注:本文中的pycuda.gpuarray.zeros函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python gpuarray.zeros_like函数代码示例发布时间:2022-05-25
下一篇:
Python gpuarray.to_gpu函数代码示例发布时间:2022-05-25
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap