• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python driver.memcpy_htod函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中pycuda.driver.memcpy_htod函数的典型用法代码示例。如果您正苦于以下问题:Python memcpy_htod函数的具体用法?Python memcpy_htod怎么用?Python memcpy_htod使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了memcpy_htod函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: __init__

    def __init__(self, view_tile, size, sigma, debug=False):
        self.debug = debug
        if size[0] < 2 or size[1] < 2:
            raise ValueError("Split needs to be at least 2x2")

        self.data_sets = view_tile.get_Data()
        for dset in self.data_sets:
            data = dset.getDataSet()
            if not data.flags['C_CONTIGUOUS']:
                print "NOT CONTIGUOUS, trying to reformat the points"
                data = np.require(data, dtype=data.dtype, requirements=['C'])
                if not data.flags['C_CONTIGUOUS']:
                    raise Exception("Points are not contiguous")
                dset.setDataSet(data)

        self.view_tile = view_tile
        self.sigma = sigma
        self.pts_gpu = None

        # Initiates all of cuda stuff
        self.grid = np.zeros(size).astype(np.float32)
        self.grid_gpu = cuda.mem_alloc_like(self.grid)
        cuda.memcpy_htod(self.grid_gpu, self.grid)

        kernel = SourceModule(self.__cuda_code)
        self.gpu_gaussian = kernel.get_function("gpu_gaussian")

        self.view = self.view_tile.get_View()

        self.grid_size, self.block_size = self.__setup_cuda_sizes(size)

        self.dx = 1 / float(size[1] - 1)
        self.dy = 1 / float(size[0] - 1)
开发者ID:SCIInstitute,项目名称:MLM,代码行数:33,代码来源:gaussian_gpu_grid.py


示例2: test_constant_memory

    def test_constant_memory(self):
        # contributed by Andrew Wagner

        module = SourceModule("""
        __constant__ float const_array[32];

        __global__ void copy_constant_into_global(float* global_result_array)
        {
            global_result_array[threadIdx.x] = const_array[threadIdx.x];
        }
        """)

        copy_constant_into_global = module.get_function("copy_constant_into_global")
        const_array, _ = module.get_global('const_array')

        host_array = np.random.randint(0,255,(32,)).astype(np.float32)

        global_result_array = drv.mem_alloc_like(host_array)
        drv.memcpy_htod(const_array, host_array)

        copy_constant_into_global(
                global_result_array,
                grid=(1, 1), block=(32, 1, 1))

        host_result_array = np.zeros_like(host_array)
        drv.memcpy_dtoh(host_result_array, global_result_array)

        assert (host_result_array == host_array).all
开发者ID:davidweichiang,项目名称:pycuda,代码行数:28,代码来源:test_driver.py


示例3: edgetaper_gpu

def edgetaper_gpu(y_gpu, sf, win='barthann'):

  shape = np.array(y_gpu.shape).astype(np.uint32)
  dtype = y_gpu.dtype
  block_size = (16,16,1)
  grid_size = (int(np.ceil(float(shape[1])/block_size[0])),
               int(np.ceil(float(shape[0])/block_size[1])))

  # Ensure that sf is odd
  sf = sf+(1-np.mod(sf,2))
  wx = scipy.signal.get_window(win, sf[1])
  wy = scipy.signal.get_window(win, sf[0])
  maxw = wx.max() * wy.max()
  
  hsf = np.floor(sf/2)
  wx = (wx[0:hsf[1]] / maxw).astype(dtype)
  wy = (wy[0:hsf[0]] / maxw).astype(dtype)

  preproc = _generate_preproc(dtype, shape)
  preproc += '#define wx_size %d\n' % wx.size
  preproc += '#define wy_size %d\n' % wy.size
  mod = SourceModule(preproc + edgetaper_code, keep=True)
  edgetaper_gpu = mod.get_function("edgetaper")
  wx_gpu, wx_size = mod.get_global('wx')
  wy_gpu, wy_size = mod.get_global('wy')

  cu.memcpy_htod(wx_gpu, wx)
  cu.memcpy_htod(wy_gpu, wy)

  edgetaper_gpu(y_gpu, np.int32(hsf[1]), np.int32(hsf[0]),
                block=block_size, grid=grid_size)
开发者ID:matthiaslee,项目名称:VMBD,代码行数:31,代码来源:gputools.py


示例4: prepare_device_arrays

    def prepare_device_arrays(self):

        self.maxLayers  = self.grid_prop.GetMaxLayers()
        nczbins_fine    = len(self.czcen_fine)
        numLayers       = np.zeros(nczbins_fine,dtype=np.int32)
        densityInLayer  = np.zeros((nczbins_fine*self.maxLayers),dtype=self.FTYPE)
        distanceInLayer = np.zeros((nczbins_fine*self.maxLayers),dtype=self.FTYPE)

        self.grid_prop.GetNumberOfLayers(numLayers)
        self.grid_prop.GetDensityInLayer(densityInLayer)
        self.grid_prop.GetDistanceInLayer(distanceInLayer)

        # Copy all these earth info arrays to device:
        self.d_numLayers       = cuda.mem_alloc(numLayers.nbytes)
        self.d_densityInLayer  = cuda.mem_alloc(densityInLayer.nbytes)
        self.d_distanceInLayer = cuda.mem_alloc(distanceInLayer.nbytes)
        cuda.memcpy_htod(self.d_numLayers,numLayers)
        cuda.memcpy_htod(self.d_densityInLayer,densityInLayer)
        cuda.memcpy_htod(self.d_distanceInLayer,distanceInLayer)

        self.d_ecen_fine = cuda.mem_alloc(self.ecen_fine.nbytes)
        self.d_czcen_fine = cuda.mem_alloc(self.czcen_fine.nbytes)
        cuda.memcpy_htod(self.d_ecen_fine,self.ecen_fine)
        cuda.memcpy_htod(self.d_czcen_fine,self.czcen_fine)

        return
开发者ID:gkrueckl,项目名称:pisa,代码行数:26,代码来源:Prob3GPUOscillationService.py


示例5: _set

    def _set(self, ary):
        # Allocate a new buffer with suitable padding and pack it
        buf = np.zeros((self.nrow, self.leaddim), dtype=self.dtype)
        buf[:, :self.ncol] = self._pack(ary)

        # Copy
        cuda.memcpy_htod(self.data, buf)
开发者ID:pv101,项目名称:PyFR,代码行数:7,代码来源:types.py


示例6: from_np

    def from_np(np_data):
        cudabuf = cuda.mem_alloc(np_data.nbytes)
        cuda.memcpy_htod(cudabuf, np_data)
#        self.cpudata = np_data
        tensor = MyTensor(cudabuf, shape=np_data.shape, size=np_data.size)
        tensor.cpudata = np_data
        return tensor
开发者ID:hughperkins,项目名称:neon,代码行数:7,代码来源:test_correctness.py


示例7: cuda_crossOver

def cuda_crossOver(sola, solb):
    """ """
    
    sol_len = len(sola);
    
    a_gpu = cuda.mem_alloc(sola.nbytes);
    b_gpu = cuda.mem_alloc(solb.nbytes);
    
    cuda.memcpy_htod(a_gpu, sola);
    cuda.memcpy_htod(b_gpu, solb);
    
    func = mod.get_function("crossOver");
    func(a_gpu,b_gpu, block=(sol_len,1,1));
    
    a_new = numpy.empty_like(sola);
    b_new = numpy.empty_like(solb);
    
    cuda.memcpy_dtoh(a_new, a_gpu);
    cuda.memcpy_dtoh(b_new, b_gpu);
    
    if debug == True:
        print "a:", a;
        print "b:",b;
        print "new a:",a_new;
        print "new b:",b_new;
        
    return a_new,b_new;
开发者ID:adamuas,项目名称:coevondm,代码行数:27,代码来源:cudaInterface.py


示例8: _to_device

 def _to_device(self, module):
     ptr, size = module.get_global(self.name)
     if size != self.data.nbytes:
         raise RuntimeError("Const %s needs %d bytes, but only space for %d" % (self, self.data.nbytes, size))
     if self.state is DeviceDataMixin.HOST:
         driver.memcpy_htod(ptr, self._data)
         self.state = DeviceDataMixin.BOTH
开发者ID:RomainBrault,项目名称:PyOP2,代码行数:7,代码来源:cuda.py


示例9: __init__

    def __init__(self, n_dict, V, dt, debug=False):

        self.num_neurons = len(n_dict['id'])
        self.dt = np.double(dt)
        self.steps = max(int(round(dt / 1e-5)), 1)
        self.debug = debug

        self.ddt = dt / self.steps

        self.V = V

        self.n = garray.to_gpu(np.asarray(n_dict['initn'], dtype=np.float64))

        self.V_1 = garray.to_gpu(np.asarray(n_dict['V1'], dtype=np.float64))
        self.V_2 = garray.to_gpu(np.asarray(n_dict['V2'], dtype=np.float64))
        self.V_3 = garray.to_gpu(np.asarray(n_dict['V3'], dtype=np.float64))
        self.V_4 = garray.to_gpu(np.asarray(n_dict['V4'], dtype=np.float64))
        self.V_l = garray.to_gpu(np.asarray(n_dict['V_l'], dtype = np.float64))
        self.V_ca = garray.to_gpu(np.asarray(n_dict['V_ca'], dtype = np.float64))
        self.V_k = garray.to_gpu(np.asarray(n_dict['V_k'], dtype = np.float64))
        self.G_l = garray.to_gpu(np.asarray(n_dict['G_l'], dtype = np.float64))
        self.G_ca = garray.to_gpu(np.asarray(n_dict['G_ca'], dtype = np.float64))
        self.G_k = garray.to_gpu(np.asarray(n_dict['G_k'], dtype = np.float64))
        self.Tphi = garray.to_gpu(np.asarray(n_dict['phi'], dtype=np.float64))
        self.offset = garray.to_gpu(np.asarray(n_dict['offset'],
                                               dtype=np.float64))

        cuda.memcpy_htod(int(self.V), np.asarray(n_dict['initV'], 
                         dtype=np.double))
        self.update = self.get_euler_kernel()
开发者ID:yiyin,项目名称:neurokernel,代码行数:30,代码来源:MorrisLecar_a.py


示例10: evaluate

  def evaluate(self, params, returnOutputs=False):
    """Evaluate several networks (with given params) on training set.
    
    @param params: network params
    @type params: list of Parameters
    @param returnOutputs: return network output values (debug)
    @type returnOutputs: bool, default False
    
    @return output matrix if returnOutputs=True, else None
    """
    if self.popSize != len(params):
      raise ValueError("Need %d Parameter structures (provided %d)" % (
        self.popSize, len(params)))
    
    paramArrayType = Parameters * len(params)
    driver.memcpy_htod(self.params, paramArrayType(*params))

    # TODO: remove
    driver.memset_d8(self.outputs, 0, self.popSize * self.trainSet.size * 4)
    
    self.evaluateKernel.prepared_call(self.evaluateGridDim,
                                      self.trainSetDev,
                                      self.trainSet.size,
                                      self.params,
                                      self.popSize,
                                      self.outputs)

    driver.Context.synchronize()

    self.outputsMat = driver.from_device(self.outputs,
                                         shape=(self.popSize, self.trainSet.size),
                                         dtype=np.float32)
    
    if returnOutputs:
      return self.outputsMat
开发者ID:cpatulea,项目名称:evolution,代码行数:35,代码来源:ann.py


示例11: __init__

    def __init__(self, n_dict, V, dt, debug=False, cuda_verbose=False):
        if cuda_verbose:
            self.compile_options = ["--ptxas-options=-v"]
        else:
            self.compile_options = []

        self.num_neurons = len(n_dict["id"])
        self.dt = np.double(dt)
        self.steps = max(int(round(dt / 1e-5)), 1)
        self.debug = debug

        self.ddt = dt / self.steps

        self.V = V

        self.n = garray.to_gpu(np.asarray(n_dict["initn"], dtype=np.float64))

        self.V_1 = garray.to_gpu(np.asarray(n_dict["V1"], dtype=np.float64))
        self.V_2 = garray.to_gpu(np.asarray(n_dict["V2"], dtype=np.float64))
        self.V_3 = garray.to_gpu(np.asarray(n_dict["V3"], dtype=np.float64))
        self.V_4 = garray.to_gpu(np.asarray(n_dict["V4"], dtype=np.float64))
        self.V_l = garray.to_gpu(np.asarray(n_dict["V_l"], dtype=np.float64))
        self.V_ca = garray.to_gpu(np.asarray(n_dict["V_ca"], dtype=np.float64))
        self.V_k = garray.to_gpu(np.asarray(n_dict["V_k"], dtype=np.float64))
        self.G_l = garray.to_gpu(np.asarray(n_dict["G_l"], dtype=np.float64))
        self.G_ca = garray.to_gpu(np.asarray(n_dict["G_ca"], dtype=np.float64))
        self.G_k = garray.to_gpu(np.asarray(n_dict["G_k"], dtype=np.float64))
        self.Tphi = garray.to_gpu(np.asarray(n_dict["phi"], dtype=np.float64))
        self.offset = garray.to_gpu(np.asarray(n_dict["offset"], dtype=np.float64))

        cuda.memcpy_htod(int(self.V), np.asarray(n_dict["initV"], dtype=np.double))
        self.update = self.get_euler_kernel()
开发者ID:neurokernel,项目名称:neurodriver,代码行数:32,代码来源:MorrisLecar_a.py


示例12: __compile_kernels

  def __compile_kernels(self):
    """ DFS module """
    f = self.forest
    self.find_min_kernel = f.find_min_kernel  
    self.fill_kernel = f.fill_kernel 
    self.scan_reshuffle_tex = f.scan_reshuffle_tex 
    self.comput_total_2d = f.comput_total_2d 
    self.reduce_2d = f.reduce_2d
    self.scan_total_2d = f.scan_total_2d 
    self.scan_reduce = f.scan_reduce 
    
    """ BFS module """
    self.scan_total_bfs = f.scan_total_bfs
    self.comput_bfs_2d = f.comput_bfs_2d
    self.fill_bfs = f.fill_bfs 
    self.reshuffle_bfs = f.reshuffle_bfs 
    self.reduce_bfs_2d = f.reduce_bfs_2d 
    self.get_thresholds = f.get_thresholds 

    """ Other """
    self.predict_kernel = f.predict_kernel 
    self.mark_table = f.mark_table
    const_sorted_indices = f.bfs_module.get_global("sorted_indices_1")[0]
    const_sorted_indices_ = f.bfs_module.get_global("sorted_indices_2")[0]
    cuda.memcpy_htod(const_sorted_indices, np.uint64(self.sorted_indices_gpu.ptr)) 
    cuda.memcpy_htod(const_sorted_indices_, np.uint64(self.sorted_indices_gpu_.ptr)) 
开发者ID:phecy,项目名称:CudaTree,代码行数:26,代码来源:random_tree.py


示例13: calc_bandwidth_h2d

	def calc_bandwidth_h2d( s ):
		t1 = datetime.now()
		cuda.memcpy_htod( s.dev_a, s.a )
		dt = datetime.now() - t1
		dt_float = dt.seconds + dt.microseconds*1e-6

		return s.nbytes/dt_float/gbytes
开发者ID:wbkifun,项目名称:fdtd_accelerate,代码行数:7,代码来源:150-gpus-mpi-range-h5-seperate.py


示例14: __compute_guassian_on_pts

    def __compute_guassian_on_pts(self):
        view = self.view_tile.get_View()

        for dset in self.data_sets:
            _data = np.array(dset.getDataSet(), copy=True)
            _data[:, 0] = (_data[:, 0] - view.left)/view.width()
            _data[:, 1] = (_data[:, 1] - view.bottom)/view.height()

            for row in range(self.grid_size[0]):
                for col in range(self.grid_size[1]):
                    # 3 * SIGMA give the 95%
                    left = 1 / float(self.grid_size[1]) * col - (3 * self.sigma)
                    right = 1 / float(self.grid_size[1]) * (col + 1) + (3 * self.sigma)
                    bottom = 1 / float(self.grid_size[0]) * row - (3 * self.sigma)
                    top = 1 / float(self.grid_size[0]) * (row + 1) + (3 * self.sigma)
                    pts = getFilteredDataSet(_data, (left, right, bottom, top))

                    if len(pts) > 0:
                        self.pts_gpu = cuda.mem_alloc_like(pts)
                        cuda.memcpy_htod(self.pts_gpu, pts)

                        self.gpu_gaussian(self.grid_gpu,  # Grid
                                          self.pts_gpu,  # Points
                                          np.int32(col),  # Block Index x
                                          np.int32(row),  # Block Index y
                                          np.int32(self.grid_size[1]),  # Grid Dimensions x
                                          np.int32(self.grid_size[0]),  # Grid Dimensions y
                                          np.int32(pts.shape[0]),  # Point Length
                                          np.float32(self.dx),  # dx
                                          np.float32(self.dy),  # dy
                                          np.float32(self.sigma),  # Sigma
                                          block=self.block_size)

                        self.pts_gpu.free()
开发者ID:SCIInstitute,项目名称:MLM,代码行数:34,代码来源:gaussian_gpu_grid.py


示例15: interior_buffer

def interior_buffer(source_im, dest_im, b_size, g_size, RGB, neighbors):
	# create Cheetah template and fill in variables for mask kernel
	mask_template = Template(mask_source)
	mask_template.BLOCK_DIM_X = b_size[0]
  	mask_template.BLOCK_DIM_Y = b_size[1]
  	mask_template.WIDTH = dest_im.shape[1]
  	mask_template.HEIGHT = dest_im.shape[0]
  	mask_template.RGB = RGB
  	mask_template.NEIGHBORS = neighbors

  	# compile the CUDA kernel
  	mask_kernel = cuda_compile(mask_template, "mask_kernel")

  	# alloc memory to GPU
  	d_source = cu.mem_alloc(source_im.nbytes)
  	cu.memcpy_htod(d_source, source_im)

  	# sends to GPU filter out interior points in the mask
  	mask_kernel(d_source, block=b_size, grid=g_size)

  	# retrieves interior point buffer from GPU
  	inner_buffer = np.array(dest_im, dtype =np.uint8)
  	cu.memcpy_dtoh(inner_buffer, d_source)

  	# returns the interior buffer
  	return inner_buffer
开发者ID:JMTing,项目名称:cs205,代码行数:26,代码来源:parallel_poisson.py


示例16: __init__

    def __init__(self, n_dict, V, dt, debug=False, cuda_verbose=False):
        if cuda_verbose:
            self.compile_options = ['--ptxas-options=-v']
        else:
            self.compile_options = []

        self.num_neurons = len(n_dict['id'])
        self.dt = np.double(dt)
        self.steps = max(int(round(dt / 1e-5)),1)
        self.debug = debug

        self.ddt = dt / self.steps

        self.V = V

        self.n = garray.to_gpu(np.asarray(n_dict['initn'], dtype=np.float64))

        self.V_1 = garray.to_gpu(np.asarray(n_dict['V1'], dtype=np.float64))
        self.V_2 = garray.to_gpu(np.asarray(n_dict['V2'], dtype=np.float64))
        self.V_3 = garray.to_gpu(np.asarray(n_dict['V3'], dtype=np.float64))
        self.V_4 = garray.to_gpu(np.asarray(n_dict['V4'], dtype=np.float64))
        self.Tphi = garray.to_gpu(np.asarray(n_dict['phi'], dtype=np.float64))
        self.offset = garray.to_gpu(np.asarray(n_dict['offset'],
                                               dtype=np.float64))

        cuda.memcpy_htod(int(self.V), np.asarray(n_dict['initV'], dtype=np.double))
        self.update = self.get_euler_kernel()
开发者ID:neurokernel,项目名称:neurodriver,代码行数:27,代码来源:MorrisLecarCopy.py


示例17: compile_for_GPU

def compile_for_GPU(function_package, kernel_function_name='default'):
	kernel_code = ''
	if kernel_function_name == 'default':
		kernel_code = attachment
		source_module_dict[kernel_function_name] = CustomSourceModule(kernel_code)
	else:
		fp = function_package
		
		from vivaldi_translator import translate_to_CUDA
		function_name = fp.function_name

		Vivaldi_code = function_code_dict[function_name]
		
		function_code = translate_to_CUDA(Vivaldi_code=Vivaldi_code, function_name=function_name, function_arguments=fp.function_args)
		
		kernel_code = attachment + 'extern "C"{\n'
		kernel_code += function_code
		kernel_code += '\n}'

		if True: # print for debugging
			f = open('asdf.cu','w')
			f.write(kernel_code)
			f.close()

		#print function_code
		args = [kernel_code]
		source_module_dict[kernel_function_name] = CustomSourceModule(kernel_code)

		temp,_ = source_module_dict[kernel_function_name].get_global('DEVICE_NUMBER')
		cuda.memcpy_htod(temp, numpy.int32(device_number))
		
		func_dict[kernel_function_name] = source_module_dict[kernel_function_name].get_function(kernel_function_name)
		
		create_helper_textures(source_module_dict[kernel_function_name])
开发者ID:Anukura,项目名称:Vivaldi,代码行数:34,代码来源:GPU_unit.py


示例18: set

    def set(self, ary, device=None):
        """
        copy host array to device.
        Arguments:
            ary: host array, needs to be contiguous
            device: device id, if not the one attached to current context
        Returns:
            self
        """
        assert ary.size == self.size
        assert self.is_contiguous, "Array in set() must be contiguous"
        if ary.dtype is not self.dtype:
            ary = ary.astype(self.dtype)
        assert ary.strides == self.strides

        if device is None:
            drv.memcpy_htod(self.gpudata, ary)
        else:
            # with multithreaded datasets, make a context before copying
            # and destroy it again once done.
            ctx = drv.Device(device).make_context()
            drv.memcpy_htod(self.gpudata, ary)
            ctx.pop()
            del ctx

        return self
开发者ID:KayneWest,项目名称:nervanagpu,代码行数:26,代码来源:nervanagpu.py


示例19: _read_LPU_input

    def _read_LPU_input(self, in_gpot_dict, in_spike_dict):
        """
        Put inputs from other LPUs to buffer.

        """

        for other_lpu, gpot_data in in_gpot_dict.iteritems():
            i = self.other_lpu_map[other_lpu]
            if self.num_input_gpot_neurons[i] > 0:
                cuda.memcpy_htod(int(int(self.buffer.gpot_buffer.gpudata) \
                    +(self.buffer.gpot_current * self.buffer.gpot_buffer.ld \
                    + self.my_num_gpot_neurons + self.cum_virtual_gpot_neurons[i]) \
                    * self.buffer.gpot_buffer.dtype.itemsize), gpot_data)
                if self.debug:
                    self.in_gpot_files[other_lpu].root.array.append(gpot_data.reshape(1,-1))
            
        
        #Will need to change this if only spike indexes are transmitted
        for other_lpu, sparse_spike in in_spike_dict.iteritems():
            i = self.other_lpu_map[other_lpu]
            if self.num_input_spike_neurons[i] > 0:
                full_spike = np.zeros(self.num_input_spike_neurons[i],dtype=np.int32)
                if len(sparse_spike)>0:
                    idx = np.asarray([self.input_spike_idx_map[i][k] \
                                      for k in sparse_spike], dtype=np.int32)
                    full_spike[idx] = 1

                cuda.memcpy_htod(int(int(self.buffer.spike_buffer.gpudata) \
                    +(self.buffer.spike_current * self.buffer.spike_buffer.ld \
                    + self.my_num_spike_neurons + self.cum_virtual_spike_neurons[i]) \
                    * self.buffer.spike_buffer.dtype.itemsize), full_spike)
开发者ID:LuisMoralesAlonso,项目名称:neurokernel,代码行数:31,代码来源:LPU.py


示例20: __init__

    def __init__(self, pts, axis, split, sigma):
        if split[0] < 2 or split[1] < 2:
            raise ValueError("Split needs to be at least 2x2")

        if not pts.flags['C_CONTIGUOUS']:
            pts = np.require(pts, dtype=pts.dtype, requirements=['C'])
            if not pts.flags['C_CONTIGUOUS']:
                raise Exception("Points are not contiguous")

        self.axis = axis
        self.sigma = sigma
        self.pts = pts
        self.pts_gpu = None

        # Initiates all of cuda stuff
        self.grid = np.zeros(split).astype(pts.dtype)
        self.grid_gpu = cuda.mem_alloc_like(self.grid)
        cuda.memcpy_htod(self.grid_gpu, self.grid)

        kernel = SourceModule(self.__cuda_code)
        self.gpu_gaussian = kernel.get_function("gpu_gaussian")

        self.dx = 1 / float(split[0] - 1)
        self.dy = 1 / float(split[1] - 1)

        self.grid_size, self.block_size = self.__setup_cuda_sizes(split)
开发者ID:SCIInstitute,项目名称:MLM,代码行数:26,代码来源:gaussian_gpu.py



注:本文中的pycuda.driver.memcpy_htod函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python driver.memcpy_htod_async函数代码示例发布时间:2022-05-25
下一篇:
Python driver.memcpy_dtoh_async函数代码示例发布时间:2022-05-25
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap