• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python pyopencl.enqueue_nd_range_kernel函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中pyopencl.enqueue_nd_range_kernel函数的典型用法代码示例。如果您正苦于以下问题:Python enqueue_nd_range_kernel函数的具体用法?Python enqueue_nd_range_kernel怎么用?Python enqueue_nd_range_kernel使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了enqueue_nd_range_kernel函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: do_opencl_pow

def do_opencl_pow(hash, target):
	output = numpy.zeros(1, dtype=[('v', numpy.uint64, 1)])
	if (ctx == False):
		return output[0][0]
	
	data = numpy.zeros(1, dtype=hash_dt, order='C')
	data[0]['v'] = ("0000000000000000" + hash).decode("hex")
	data[0]['target'] = target
	
	hash_buf = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data)
	dest_buf = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY, output.nbytes)
	
	kernel = program.kernel_sha512
	worksize = kernel.get_work_group_info(cl.kernel_work_group_info.WORK_GROUP_SIZE, cl.get_platforms()[0].get_devices()[1])

	kernel.set_arg(0, hash_buf)
	kernel.set_arg(1, dest_buf)

	start = time.time()
	progress = 0
	globamt = worksize*2000

	while output[0][0] == 0:
		kernel.set_arg(2, pack("<Q", progress))
		cl.enqueue_nd_range_kernel(queue, kernel, (globamt,), (worksize,))
		cl.enqueue_read_buffer(queue, dest_buf, output)
		queue.finish()
		progress += globamt
		sofar = time.time() - start
		print sofar, progress / sofar, "hashes/sec"
	taken = time.time() - start
	print progress, taken
	return output[0][0]
开发者ID:N0U,项目名称:PyBitmessage,代码行数:33,代码来源:openclpow.py


示例2: do_opencl_pow

def do_opencl_pow(hash, target):
    global ctx, queue, program, gpus, hash_dt

    output = numpy.zeros(1, dtype=[("v", numpy.uint64, 1)])
    if ctx == False:
        return output[0][0]

    data = numpy.zeros(1, dtype=hash_dt, order="C")
    data[0]["v"] = ("0000000000000000" + hash).decode("hex")
    data[0]["target"] = target

    hash_buf = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data)
    dest_buf = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY, output.nbytes)

    kernel = program.kernel_sha512
    worksize = kernel.get_work_group_info(cl.kernel_work_group_info.WORK_GROUP_SIZE, gpus[0])

    kernel.set_arg(0, hash_buf)
    kernel.set_arg(1, dest_buf)

    start = time.time()
    progress = 0
    globamt = worksize * 2000

    while output[0][0] == 0:
        kernel.set_arg(2, pack("<Q", progress))
        cl.enqueue_nd_range_kernel(queue, kernel, (globamt,), (worksize,))
        cl.enqueue_read_buffer(queue, dest_buf, output)
        queue.finish()
        progress += globamt
        sofar = time.time() - start
    # 		logger.debug("Working for %.3fs, %.2f Mh/s", sofar, (progress / sofar) / 1000000)
    taken = time.time() - start
    # 	logger.debug("Took %d tries.", progress)
    return output[0][0]
开发者ID:Basti1993,项目名称:PyBitmessage,代码行数:35,代码来源:openclpow.py


示例3: do_opencl_pow

def do_opencl_pow(hash, target):
    output = numpy.zeros(1, dtype=[('v', numpy.uint64, 1)])
    if (len(enabledGpus) == 0):
        return output[0][0]

    data = numpy.zeros(1, dtype=hash_dt, order='C')
    data[0]['v'] = ("0000000000000000" + hash).decode("hex")
    data[0]['target'] = target

    hash_buf = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data)
    dest_buf = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY, output.nbytes)

    kernel = program.kernel_sha512
    worksize = kernel.get_work_group_info(cl.kernel_work_group_info.WORK_GROUP_SIZE, enabledGpus[0])

    kernel.set_arg(0, hash_buf)
    kernel.set_arg(1, dest_buf)

    start = time.time()
    progress = 0
    globamt = worksize*2000

    while output[0][0] == 0 and shutdown == 0:
        kernel.set_arg(2, pack("<Q", progress))
        cl.enqueue_nd_range_kernel(queue, kernel, (globamt,), (worksize,))
        cl.enqueue_read_buffer(queue, dest_buf, output)
        queue.finish()
        progress += globamt
        sofar = time.time() - start
#       logger.debug("Working for %.3fs, %.2f Mh/s", sofar, (progress / sofar) / 1000000)
    if shutdown != 0:
        raise Exception ("Interrupted")
    taken = time.time() - start
#   logger.debug("Took %d tries.", progress)
    return output[0][0]
开发者ID:Bitmessage,项目名称:PyBitmessage,代码行数:35,代码来源:openclpow.py


示例4: max_length_real4

def max_length_real4(ipt):
     out = CLReal(len(ipt)) 
     kern = _lengthkern_real4.kern
     kern.set_arg(0, ipt._buffer)
     kern.set_arg(1, out._buffer)
     cl.enqueue_nd_range_kernel(ipt._ctrl.clqueue, kern, (len(ipt),), None)
     return max_reduce(out)
开发者ID:hagisgit,项目名称:SLIC,代码行数:7,代码来源:tools.py


示例5: __call__

 def __call__(self, thread_count, work_group_size, *args):
     fun = self.compile()
     for i, arg in enumerate(args):
         fun.set_arg(i, arg)
     with timed_region("ParLoop kernel"):
         cl.enqueue_nd_range_kernel(_queue, fun, (thread_count,),
                                    (work_group_size,), g_times_l=False).wait()
开发者ID:GitPaean,项目名称:PyOP2,代码行数:7,代码来源:opencl.py


示例6: filterPrepare

 def filterPrepare(self, e, data, keys, ndata, events):
     import numpy as np
     import pyopencl as cl
     mf = cl.mem_flags
     
     ndata = data.size
     if keys.size != ndata: raise Exception()
     
     filtbytes = np.bool8(False).nbytes * ndata
     
     if not isinstance(data, cl.Buffer):
         data_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf= data)
     else:
         data_buf = data
     
     if not isinstance(keys, cl.Buffer):
         keys_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf= keys)
     else:
         keys_buf = keys
     
     filt_buf = cl.Buffer(self.ctx, mf.READ_WRITE, filtbytes)
     
     kernel = self.prg.filterPrepare
     kernel.set_args(data_buf, keys_buf, np.uint64(ndata), np.uint8(33), np.uint8(66), filt_buf)
     global_dims = self.get_global(self.get_grid_dims(ndata))
     
     print "filterPrepare"
     if e is None:
         e  = [ cl.enqueue_nd_range_kernel(self.queue, kernel, global_dims, self.localDims), ]
     else:
         e  = [ cl.enqueue_nd_range_kernel(self.queue, kernel, global_dims, self.localDims, wait_for=e), ]
     events += e
     
     return (e, data_buf, keys_buf, filt_buf)
开发者ID:Kobtul,项目名称:documents,代码行数:34,代码来源:filter.py


示例7: prefixSumUp

 def prefixSumUp(self, e, data, ndata, data2, ndata2, events):
     import numpy as np
     import pyopencl as cl
     mf = cl.mem_flags
     
     if not isinstance(data, cl.Buffer):
         data_buf = cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=data)
     else:
         data_buf = data
     
     if not isinstance(data2, cl.Buffer):
         data2_buf = cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=data2)
     else:
         data2_buf = data2
             
     kernel = self.prg.prefixSumUp
     kernel.set_args(data_buf, np.uint64(ndata), data2_buf, np.uint64(ndata2))
     
     global_dims = self.get_global(self.get_grid_dims(ndata))
     
     print "prefixSumUp"
     if e is None:
         e  = ( cl.enqueue_nd_range_kernel(self.queue, kernel, global_dims, self.localDims, wait_for=e), )
     else:
         e  = ( cl.enqueue_nd_range_kernel(self.queue, kernel, global_dims, self.localDims), )
     events += e
     
     return (e, data_buf, data2_buf)
开发者ID:Kobtul,项目名称:documents,代码行数:28,代码来源:filter.py


示例8: exec_lsz_safe

 def exec_lsz_safe(self, localsize):
     """execute the kernel with specific localsize.
     Safe also for lernels with local variables"""
     oldloc = int(self._localsize)
     self.localsize = localsize
     cl.enqueue_nd_range_kernel(self._solverobj.clqueue, self._clkernel, (self.globalsize,), (self.localsize,))
     self._solverobj.clqueue.finish()
     self.localsize = oldloc 
开发者ID:hagisgit,项目名称:qcl,代码行数:8,代码来源:QclKernel.py


示例9: test_algorithm

    def test_algorithm(self):
        print "\n**************************"
        print "test_pbrs:"
        passed = 0
        buffersize_in = 188*8
        buffersize_out = 188*8
        # opencl buffer uint
        self.inputbuffer = cl.Buffer(self.ctx , cl.mem_flags.READ_WRITE, size=buffersize_in*4)
        # opencl buffer uint
        self.outputbuffer = cl.Buffer(self.ctx , cl.mem_flags.READ_WRITE, size=buffersize_out*4)

        for k in self.kernelname:
            kernel = self.load_kernel(self.filename, k)
            passed = 0
            self.fd_input = open('test_bench_pbrs_input.csv', 'r')
            self.fd_output = open('test_bench_pbrs_output.csv', 'r')
            for j in range(0,6):
                encoded_data = numpy.array(numpy.zeros(buffersize_out/4), dtype=numpy.uint32)
                data_to_encode = string.replace(self.fd_input.readline(),'\n','')
                reference_data = string.replace(self.fd_output.readline(),'\n','')
                for i in range(0,7):
                    data_to_encode = "%s,%s" % (data_to_encode, string.replace(self.fd_input.readline(),'\n',''))
                    reference_data = "%s,%s" % (reference_data, string.replace(self.fd_output.readline(),'\n',''))

                data_to_encode = numpy.fromstring(numpy.fromstring(data_to_encode, dtype=numpy.uint8, sep=",").tostring(), dtype=numpy.uint32)
                reference_data = numpy.fromstring(reference_data, dtype=numpy.uint8, sep=",")

                cl.enqueue_copy(self.queue, self.inputbuffer, data_to_encode).wait()
                kernel.set_args(self.inputbuffer, self.outputbuffer)
                cl.enqueue_nd_range_kernel(self.queue,kernel,(8,),(8,),None ).wait()
                cl.enqueue_copy(self.queue, encoded_data, self.outputbuffer).wait()
                encoded_data = (numpy.fromstring(encoded_data.tostring(), dtype=numpy.uint8))

                
                if encoded_data.tostring() == reference_data.tostring():
                    passed += 1
                    print "Test %d PASSED" % (j+1)
                else:
                    print "Test %d FAILED" % (j+1)
                    print "input data:"
                    print numpy.fromstring(data_to_encode.tostring(), dtype=numpy.uint8)
                    print "encoded data:"
                    print numpy.fromstring(encoded_data.tostring(), dtype=numpy.uint8)
                    print "reference data:"
                    print reference_data
                    print "error data:"
                    print (reference_data - numpy.fromstring(encoded_data.tostring(), dtype=numpy.uint8))
            print "%d pass out of 6" % passed
            self.fd_input.close()
            self.fd_output.close()
            if passed == 6:
                print "All pbrs tests PASS\n"
                return True
            else:
                print "at least one pbrs test FAILED\n"
                return False
开发者ID:das-labor,项目名称:dvbt,代码行数:56,代码来源:create_pbrs_kernel.py


示例10: max_reduce_real4

def max_reduce_real4(ipt):
     x = CLReal(len(ipt)) 
     y = CLReal(len(ipt))
     z = CLReal(len(ipt))
     kern = _splitkern_real4.kern
     kern.set_arg(0, ipt._buffer)
     kern.set_arg(1, x._buffer)
     kern.set_arg(2, y._buffer)
     kern.set_arg(3, z._buffer)
     cl.enqueue_nd_range_kernel(ipt._ctrl.clqueue, kern, (len(ipt),), None)
     return max_reduce(x), max_reduce(y), max_reduce(z)
开发者ID:hagisgit,项目名称:SLIC,代码行数:11,代码来源:tools.py


示例11: prefixSum

 def prefixSum(self, e, data, keys, ndata, low, hi, events):
     import numpy as np
     import pyopencl as cl
     mf = cl.mem_flags
     
     if not isinstance(data, cl.Buffer):
         data_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf= data)
     else:
         data_buf = data
     
     if not isinstance(keys, cl.Buffer):
         keys_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf= keys)
     else:
         keys_buf = keys
     
     grid_dims = self.get_grid_dims(ndata)
     psumbytes = ndata * np.uint64(0).nbytes
     bsumbytes =  int(np.prod(grid_dims) * np.uint64(0).nbytes)
     nbsumbytes =  np.uint64(0).nbytes
     
     psum_buf = cl.Buffer(self.ctx, mf.READ_WRITE, psumbytes)
     bsum_buf = cl.Buffer(self.ctx, mf.READ_WRITE, bsumbytes)
     nbsum_buf = cl.Buffer(self.ctx, mf.READ_WRITE, nbsumbytes)
     
     low = PrefixSum.HOST_TYPE_KEYS(low)
     hi = PrefixSum.HOST_TYPE_KEYS(hi)
     
     kernel = self.prg.prefixSumDown
     kernel.set_args(data_buf, keys_buf, np.uint64(ndata), low, hi, psum_buf, bsum_buf, nbsum_buf)
     
     global_dims = self.get_global(grid_dims)
     
     print "prefixSumDown %s %s" % (str(global_dims), str(self.localDims))
     if e is None:
         e  = ( cl.enqueue_nd_range_kernel(self.queue, kernel, global_dims, self.localDims, wait_for=e), )
     else:
         e  = ( cl.enqueue_nd_range_kernel(self.queue, kernel, global_dims, self.localDims), )
     events += e
     
     nbsum = np.zeros(1, dtype = np.uint64)
     events += (cl.enqueue_copy(self.queue, nbsum, nbsum_buf, wait_for=e),)
     
     if nbsum>1:
         (e, bsum_buf, bsum1_buf, nbsum1_buf, ndata2) = self.prefixSumDownInplace(e, bsum_buf, nbsum.item(), events)
     else:
         ndata2 = np.zeros(1, dtype = np.uint64)
         events += (cl.enqueue_copy(self.queue, ndata2, bsum_buf, wait_for=e),)
         ndata2 = ndata2.item()
         print ndata2
     
     self.prefixSumUp(e, psum_buf, ndata, bsum_buf, nbsum, events)
     
     return (e, data_buf, keys_buf, psum_buf, bsum_buf, nbsum_buf, ndata2)
开发者ID:Kobtul,项目名称:documents,代码行数:53,代码来源:filter.py


示例12: solve

    def solve(self,puzzle,simulations = 16384, iterations = 35, workGroupSize = 128):
        self.simulations = simulations
        self.iterations = iterations
        self.workGroupSize = workGroupSize
        self.workGroups = int(self.simulations / self.workGroupSize)
        self.width = np.int8(puzzle['width'])
        self.height = np.int8(puzzle['height'])
        
        #initialise buffers
        self.initBuffers(puzzle)
        
        #create kernel
        self.kernel = cl.Kernel(self.program,"montecarlo")
        self.kernel.set_args(self.lengthsBuffer,self.groupLengthsBuffer,self.puzzlesBuffer,self.solutionsBuffer,self.height,self.width,np.int32(self.iterations))
        
        #execute program for a number of iterations
        cl.enqueue_nd_range_kernel(self.queue,self.kernel,(self.simulations,),(self.workGroupSize,))
        
        #unmap group lengths buffer from device
        cl.enqueue_map_buffer(self.queue,self.groupLengthsBuffer,cl.map_flags.WRITE,0,self.groupLengths.shape,self.groupLengths.dtype)
        self.groupLengths = self.groupLengthsBuffer.get_host_array(self.groupLengths.shape,dtype=self.groupLengths.dtype)

        #unmap solutions buffer from device
        cl.enqueue_map_buffer(self.queue,self.solutionsBuffer,cl.map_flags.WRITE,0,self.solutionsFlattened.shape,self.solutions.dtype)
        self.solutions = self.solutionsBuffer.get_host_array(self.solutions.shape,dtype=self.solutions.dtype)
        
        #release buffers
        self.lengthsBuffer.release()
        self.groupLengthsBuffer.release()
        self.puzzlesBuffer.release()
        self.solutionsBuffer.release()

        #get the best solution
        i = self.groupLengths.argmin()
        bestSolution = np.array(self.solutions[i])
        
        #convert solution to list format used by challenge
        solution = []
        for row in range(0,puzzle['height']):
            for col in range(0,puzzle['width']):
                if bestSolution[row][col]!=-1:
                    s = bestSolution[row][col]
                    
                    #add to solution list
                    solution.append({'X': int(col),'Y': int(row),'Size':int(s)})
                    
                    #clear cells in solution
                    for i in range(0,s):
                        for j in range(0,s):
                            bestSolution[row+i][col+j]=-1
        
        return solution
开发者ID:ohlord,项目名称:cimpress,代码行数:52,代码来源:CLSolve.py


示例13: filter

    def filter(self, data, keys, low, hi, events):
        import numpy as np
        import pyopencl as cl
        mf = cl.mem_flags
        
        ndata = data.size
        
        (e, data_buf, keys_buf, indices_buf, bsum_buf, nbsum_buf, ndata2) = self.prefixSum(None, data, keys, ndata, low, hi, events)
        
        filt = np.zeros(ndata, dtype = np.bool8)
        indices = np.zeros(ndata, dtype = np.uint64)
        data2 = np.zeros(ndata2, dtype = PrefixSum.HOST_TYPE_DATA)
        keys2 = np.zeros(ndata2, dtype = PrefixSum.HOST_TYPE_KEYS)
        
        ndata2bytes = np.uint64(0).nbytes
        
        if PrefixSum.RETURN_FILTER == 1:
            filt_buf = cl.Buffer(self.ctx, mf.READ_WRITE, filt.nbytes)
        print data2.nbytes
        data2_buf = cl.Buffer(self.ctx, mf.READ_WRITE, data2.nbytes)
        keys2_buf = cl.Buffer(self.ctx, mf.READ_WRITE, keys2.nbytes)
        ndata2_buf = cl.Buffer(self.ctx, mf.READ_WRITE, ndata2bytes)
        
        low = PrefixSum.HOST_TYPE_KEYS(low)
        hi = PrefixSum.HOST_TYPE_KEYS(hi)

        kernel = self.prg.filter
        if PrefixSum.RETURN_FILTER == 1:
            kernel.set_args(data_buf, keys_buf, indices_buf, np.uint64(ndata), low, hi, filt_buf, data2_buf, keys2_buf, ndata2_buf)
        else:
            kernel.set_args(data_buf, keys_buf, indices_buf, np.uint64(ndata), low, hi, data2_buf, keys2_buf, ndata2_buf)
        
        global_dims = self.get_global(self.get_grid_dims(ndata))
        
        print "filter"
        if e is None:
            e  = ( cl.enqueue_nd_range_kernel(self.queue, kernel, global_dims, self.localDims, wait_for=e), )
        else:
            e  = ( cl.enqueue_nd_range_kernel(self.queue, kernel, global_dims, self.localDims), )
        events += e
        
        if PrefixSum.RETURN_FILTER == 1:
            events += ( cl.enqueue_copy(self.queue, filt, filt_buf, wait_for=e), 
                        cl.enqueue_copy(self.queue, indices, indices_buf, wait_for=e),
                        cl.enqueue_copy(self.queue, data2, data2_buf, wait_for=e),
                        cl.enqueue_copy(self.queue, keys2, keys2_buf, wait_for=e) )
        else:
            events += ( cl.enqueue_copy(self.queue, indices, indices_buf, wait_for=e),
                        cl.enqueue_copy(self.queue, data2, data2_buf, wait_for=e),
                        cl.enqueue_copy(self.queue, keys2, keys2_buf, wait_for=e) )
        
        return (filt, indices, data2, keys2)
开发者ID:Kobtul,项目名称:documents,代码行数:52,代码来源:filter.py


示例14: _exec_chunked_unsafe

 def _exec_chunked_unsafe(self, chunksize=0):
     """Unsafe for kernels with local variables."""
     if chunksize > 0:
         self._prep_chunked_exec(chunksize)
     lenarr = self.leadingvar.length
     ncnk = int(ceil(float(lenarr)/float(self._cnksz)))
     cnksz = self._cnksz
     for i in range(ncnk):
         if (i == (ncnk - 1)) and not(lenarr % cnksz == 0):
             cnksz = lenarr % cnksz
         self._solverobj.__setattr__(self._cnk_name, i)
         cl.enqueue_nd_range_kernel(self._solverobj.clqueue, self._clkernel, (cnksz,), None)
     self._solverobj.clqueue.finish() 
开发者ID:hagisgit,项目名称:qcl,代码行数:13,代码来源:QclKernel.py


示例15: change_display

def change_display(image) :

    image_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=image)
    mem = cl.GLBuffer(ctx, mf.WRITE_ONLY, numpy.float32(buf))

    cl.enqueue_acquire_gl_objects(queue, [mem])
    add_knl = prog.add
    add_knl.set_args(image_buf, mem)
    cl.enqueue_nd_range_kernel(queue, add_knl, image.shape, None)
    cl.enqueue_release_gl_objects(queue, [mem])

    queue.finish()
    glFlush()
开发者ID:Blother,项目名称:Python_Interop,代码行数:13,代码来源:simple_interop.py


示例16: test_algorithm

    def test_algorithm(self):
        print "\n**************************"
        print "test_reedsolomon:"
        passed = 0
        linecnt = 1

        # opencl buffer uint
        self.inputbuffer = cl.Buffer(self.ctx , cl.mem_flags.READ_WRITE, size=48*4)
        # opencl buffer uint
        self.outputbuffer = cl.Buffer(self.ctx , cl.mem_flags.READ_WRITE, size=51*4)

        for k in self.kernelname:
            kernel = self.load_kernel(self.filename, k)
            self.fd_input = open('test_bench_rs_input.csv', 'r')
            self.fd_output = open('test_bench_rs_output.csv', 'r')
            for line in self.fd_input:
                data_to_encode = numpy.fromstring(line, dtype=numpy.uint8, sep=",").tostring()
                data_to_encode = numpy.fromstring(data_to_encode, dtype=numpy.uint32)

                encoded_data = numpy.array(numpy.zeros(51), dtype=numpy.uint32)
                reference_data = numpy.fromstring(self.fd_output.readline(), dtype=numpy.uint8, sep=",")

                cl.enqueue_copy(self.queue, self.inputbuffer, data_to_encode).wait()
                kernel.set_args(self.inputbuffer, self.outputbuffer)
                cl.enqueue_nd_range_kernel(self.queue,kernel,(1,),None ).wait()
                cl.enqueue_copy(self.queue, encoded_data, self.outputbuffer).wait()

                if encoded_data.tostring() == reference_data.tostring():
                    passed += 1
                    print "Test %d PASSED" % linecnt
                else:
                    print "Test %d FAILED" % linecnt
                    print "input data:"
                    print numpy.fromstring(data_to_encode.tostring(), dtype=numpy.uint8)
                    print "encoded data:"
                    print numpy.fromstring(encoded_data.tostring(), dtype=numpy.uint8)
                    print "reference data:"
                    print reference_data
                    print "error data:"
                    print (reference_data - numpy.fromstring(encoded_data.tostring(), dtype=numpy.uint8))
                linecnt += 1
        print "%d pass out of %d" % (passed,(linecnt-1))
        self.fd_input.close()
        self.fd_output.close()
        if passed == (linecnt-1):
            print "All reedsolomon tests PASS\n"
            return True
        else:
            print "at least one reedsolomon test FAILED\n"
            return False
开发者ID:das-labor,项目名称:dvbt,代码行数:50,代码来源:create_rs_kernel.py


示例17: updateEt_vanilla

    def updateEt_vanilla(self, algo="SHG"):
        root.debug("Updating Et using vanilla algorithm")
        t0 = time.clock()
        #         transform = FFT(self.ctx, self.q, (self.Esig_w_tau_cla,) , (self.Esig_t_tau_p_cla,) , axes = [1])
        #         events = transform.enqueue(forward = False)

        #         self.Esig_t_tau_p_cla.set(np.fft.ifft(self.Esig_w_tau_cla.get(), axis=1).astype(self.dtype_c).copy())

        if self.useCL == True:
            events = self.Esig_t_tau_p_fft.enqueue(forward=False)
            for e in events:
                e.wait()
            if algo == "SD":
                krn = self.progs.progs["updateEtVanillaSumSD"].updateEtVanillaSumSD
                krn.set_scalar_arg_dtypes((None, None, np.int32))
                krn.set_args(self.Esig_t_tau_p_cla.data, self.Et_cla.data, self.N)
                ev = cl.enqueue_nd_range_kernel(self.q, krn, self.Et.shape, None)
                ev.wait()

                Et = self.Et_cla.get()
                self.Et_cla.set(-np.conj(Et).astype(self.dtype_c).copy())

            #                 Esig_w_tau = self.Esig_w_tau_cla.get()
            #                 Gm  = np.conj(Esig_w_tau.sum(axis=1))[::-1]
            #                 self.Et_cla.set(Gm.copy())

            else:
                krn = self.progs.progs["updateEtVanillaSumSHG"].updateEtVanillaSumSHG
                krn.set_scalar_arg_dtypes((None, None, np.int32))
                krn.set_args(self.Esig_t_tau_p_cla.data, self.Et_cla.data, self.N)
                ev = cl.enqueue_nd_range_kernel(self.q, krn, self.Et.shape, None)
                ev.wait()

            krn = self.progs.progs["updateEtVanillaNorm"].updateEtVanillaNorm
            krn.set_scalar_arg_dtypes((None, np.int32))
            krn.set_args(self.Et_cla.data, self.N)
            ev = cl.enqueue_nd_range_kernel(self.q, krn, [1], None)
            ev.wait()
        else:
            self.Esig_t_tau_p_cla.set(np.fft.ifft(self.Esig_w_tau_cla.get(), axis=1).astype(self.dtype_c).copy())
            Esig_t_tau_p = self.Esig_t_tau_p_cla.get()
            if algo == "SD":
                Et = np.sqrt(Esig_t_tau_p.sum(axis=0))
            #                 Et = (Esig_t_tau_p.sum(axis=0))
            else:
                Et = Esig_t_tau_p.sum(axis=0)
            Et = Et / np.abs(Et).max()
            self.Et_cla.set(Et)

        root.debug("".join(("Time spent: ", str(time.clock() - t0))))
开发者ID:filiplindau,项目名称:Frog,代码行数:50,代码来源:FrogCalculationCL.py


示例18: calc_weights_gradient

    def calc_weights_gradient( self ):
        """
        Calculate gradient of weights.
        
        This method should be called only for processed layers as it's used
        inputs array which is valid only at processing time.
        """

        for l in self._next_layers:
            if not l[0].processed:
                l[0].calc_weights_gradient()

        queue = self.opencl.queue
        kernel = self.opencl.kernel_calc_layer_gradient

        kernel.set_arg( 2, self._inputs_offset )
        kernel.set_arg( 3, self._neurons_offset )
        kernel.set_arg( 4, self._inputs_per_neuron )
        kernel.set_arg( 5, self._weights_offset )
        kernel.set_arg( 7, self._weights_count )
        kernel.set_arg( 8, pyopencl.LocalMemory( int( 
                    4 * ( self._inputs_per_neuron + 1 + self.opencl.max_local_size[ 0 ] // self._inputs_per_neuron ) ) ) )

        self._calc_gradient_event = pyopencl.enqueue_nd_range_kernel( queue, kernel,
            ( int( self._weights_buf_size ), ), ( self.opencl.max_local_size[ 0 ], ),
            wait_for = self._calc_gradient_wait_for
            )
        del self._calc_gradient_wait_for[:]

        kernel = self.opencl.kernel_propagate_errors
        kernel.set_arg( 2, self._neurons_offset )
        kernel.set_arg( 5, self._neuron_count )
        kernel.set_arg( 7, self._inputs_per_neuron )

        i_s = numpy.int32( 1 )
        for l in self._prev_layers:
            kernel.set_arg( 3, l[0]._neurons_offset + l[1] )
            kernel.set_arg( 4, l[2] )
            kernel.set_arg( 6, self._weights_offset + i_s )

            l[0]._calc_gradient_wait_for.append( pyopencl.enqueue_nd_range_kernel( queue, kernel,
                ( int( l[2] * 64 ), ), ( 64, ),
                wait_for = ( self._calc_gradient_event, )
                ) )

            i_s += l[2]

        self._processed = True
开发者ID:remtcs,项目名称:gpgpu-neuralnet,代码行数:48,代码来源:layer.py


示例19: gpu_amend_values

def gpu_amend_values(queue, kernels, gpu_params, buffers, amendments):
    """
        Transfers requested amendments (after collision detection check) to the GPU,
        where a kernel applies them to the data
    """
    intermediary_events = []
    packet = amendments.get_packet()
    if packet[amendments.amount_i] > 0:
        events = [
            cl.enqueue_copy(queue, buffers["global_amendments_n"],
                            packet[amendments.amount_i]),
            cl.enqueue_copy(queue, buffers["global_amendment_indices"],
                            packet[amendments.indices_i]),
            cl.enqueue_copy(queue, buffers["global_amendment_values"],
                            packet[amendments.values_i])]

        # X groups of 64 items (amendments.amount work items)
        intermediary_events.append(
            cl.enqueue_nd_range_kernel(
                queue, kernels["k_update_values"],
                (int(np.ceil(amendments.amount / gpu_params["preferred_multiple"]) *
                     gpu_params["preferred_multiple"]),),
                (gpu_params["preferred_multiple"],), global_work_offset=None,
                wait_for=events))
    return intermediary_events
开发者ID:naummo,项目名称:swarm_maze_opencl_solver,代码行数:25,代码来源:opencl_computations_demo.py


示例20: calc_chi2

    def calc_chi2(self, queue, interspace, q, Iq, 
            rind, rxyz, lind, lxyz, origin, voxelspacing, fifj, targetIq, sq, chi2):

        kernel = self.kernels.calc_chi2
        workgroupsize = 16

        gws = (queue.device.max_compute_units * workgroupsize * 512,)
        lws = (workgroupsize,)

        floatsize = 4
        tmpIq = cl.LocalMemory(floatsize * q.shape[0] * workgroupsize)

        shape = np.zeros(4, dtype=np.int32)
        shape[:-1] = interspace.shape
        shape[-1] = interspace.size

        nq = np.int32(q.shape[0])
        nind1 = np.int32(rind.shape[0])
        nind2 = np.int32(lind.shape[0])

        fifj_shape = np.zeros(4, dtype=np.int32)
        fifj_shape[:-1] = fifj.shape
        fifj_shape[-1] = fifj.size

        kernel.set_args(interspace.data, q.data, Iq.data, tmpIq, rind.data, rxyz.data,
                lind.data, lxyz.data, origin, voxelspacing, fifj.data, targetIq.data, sq.data, chi2.data,
                shape, nq, nind1, nind2, fifj_shape)
        status = cl.enqueue_nd_range_kernel(queue, kernel, gws, lws)

        return status
开发者ID:latrocinia,项目名称:saxstools,代码行数:30,代码来源:kernels.py



注:本文中的pyopencl.enqueue_nd_range_kernel函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python pyopencl.enqueue_read_buffer函数代码示例发布时间:2022-05-27
下一篇:
Python pyopencl.enqueue_copy函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap