本文整理汇总了Python中numbapro.cuda.grid函数的典型用法代码示例。如果您正苦于以下问题:Python grid函数的具体用法?Python grid怎么用?Python grid使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了grid函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: compute_sample_kernel
def compute_sample_kernel(factors, longest_wavelet, offsets_per_wavelength, output, num_rows):
num_wavelengths = longest_wavelet - 2
output[cuda.gridDim.x] = 0.0
for row_index in range(num_rows):
output[cuda.grid(1)] += get_value_gpu(factors, row_index, cuda.gridDim.x, longest_wavelet,
num_wavelengths, offsets_per_wavelength)
output[cuda.grid(1)] += factors[-1]
开发者ID:RelentlessResults,项目名称:decompose001,代码行数:7,代码来源:precomputed_representation.py
示例2: c_distribute
def c_distribute(rands, low, high):
i = cuda.grid(1)
if i >= rands.shape[0]:
return
rands[i] = (1.0 - rands[i]) * low + rands[i] * high
开发者ID:dignifiedquire,项目名称:numbapro-examples,代码行数:7,代码来源:blackscholes_cuda_rnd.py
示例3: const_m
def const_m(out, const):
n = out.shape[0]
m = out.shape[1]
i,j = cuda.grid(2)
if i < n and j < m:
out[i,j] = const
开发者ID:Abel-Ding,项目名称:gpupy,代码行数:7,代码来源:__init__.py
示例4: tanh_m
def tanh_m(a, out):
n = out.shape[0]
m = out.shape[1]
i,j = cuda.grid(2)
if i < n and j < m:
out[i,j] = tanh(a[i,j])
开发者ID:Abel-Ding,项目名称:gpupy,代码行数:7,代码来源:__init__.py
示例5: abs_m
def abs_m(a, out):
n = out.shape[0]
m = out.shape[1]
i,j = cuda.grid(2)
if i < n and j < m:
out[i,j] = fabs(a[i,j])
开发者ID:Abel-Ding,项目名称:gpupy,代码行数:7,代码来源:__init__.py
示例6: log_m
def log_m(a, out):
n = out.shape[0]
m = out.shape[1]
i,j = cuda.grid(2)
if i < n and j < m:
out[i,j] = log(a[i,j])
开发者ID:Abel-Ding,项目名称:gpupy,代码行数:7,代码来源:__init__.py
示例7: exp_m
def exp_m(a, out):
n = out.shape[0]
m = out.shape[1]
i,j = cuda.grid(2)
if i < n and j < m:
out[i,j] = exp(a[i,j])
开发者ID:Abel-Ding,项目名称:gpupy,代码行数:7,代码来源:__init__.py
示例8: kernel
def kernel(dst, src):
'''A simple kernel that adds 1 to every item
'''
i = cuda.grid(1)
if i >= dst.shape[0]:
return
dst[i] = src[i] + 1
开发者ID:lundybernard,项目名称:cuda_practice,代码行数:7,代码来源:sli_test_multigpu_mt.py
示例9: vec_add_ilp_x4
def vec_add_ilp_x4(a, b, c):
# read
i = cuda.grid(1)
ai = a[i]
bi = b[i]
bw = cuda.blockDim.x
gw = cuda.gridDim.x
stride = gw * bw
j = i + stride
aj = a[j]
bj = b[j]
k = j + stride
ak = a[k]
bk = b[k]
l = k + stride
al = a[l]
bl = b[l]
# compute
ci = core(ai, bi)
cj = core(aj, bj)
ck = core(ak, bk)
cl = core(al, bl)
# write
c[i] = ci
c[j] = cj
c[k] = ck
c[l] = cl
开发者ID:Aahung,项目名称:numbapro-examples,代码行数:33,代码来源:testilp.py
示例10: cu_matmul_sm
def cu_matmul_sm(A, B, C, n, tpb, bpg):
# decalre shared memory
sA = cuda.shared.array(shape=block_dim, dtype=float32)
sB = cuda.shared.array(shape=block_dim, dtype=float32)
# we now need the thread ID within a block as well as the global thread ID
tx = cuda.threadIdx.x
ty = cuda.threadIdx.y
x, y = cuda.grid(2)
# pefort partial operations in block-szied tiles
# saving intermediate values in an accumulator variable
acc = 0.0
for i in range(bpg):
# Stage 1: Prefil shared memory with current block from matrix A and matrix B
sA[tx, ty] = A[x, ty + i * tpb]
sB[tx, ty] = B[tx + i * tpb, y]
# Block calculations till shared mmeory is filled
cuda.syncthreads()
# Stage 2: Compute partial dot product and add to accumulator
if x < n and y < n:
for j in range(tpb):
acc += sA[tx, j] * sB[j, ty]
# Blcok until all threads have completed calcuaiton before next loop iteration
cuda.syncthreads()
# Put accumulated dot product into output matrix
if x < n and y < n:
C[x, y] = acc
开发者ID:morrisyoung,项目名称:CUDA_Python_starter,代码行数:32,代码来源:test_mm.py
示例11: pruneGPU
def pruneGPU(input_d, num_elements, min_sup):
tx = cuda.threadIdx.x
index = cuda.grid(1)
if index < num_elements:
if input_d[index] < min_sup:
input_d[index] = 0
开发者ID:jalatif,项目名称:Python_Massively_Parallel_FP_Tree,代码行数:7,代码来源:apriori.py
示例12: _gaussian_cuda32
def _gaussian_cuda32(fac, n_rep, t, n_t, a_facGo, b_facGo, c_facGo):
i, j = cuda.grid(2)
if i >= n_rep or j >= n_t:
return
# Fill in 2D fac data structure
fac[i, j] = a_facGo[i] * exp(-(t[j] - b_facGo[i])**2 /(2 * c_facGo[i]**2))
开发者ID:peppi107,项目名称:Response-Inhibition-Model,代码行数:7,代码来源:trials.py
示例13: produce_chId_lit_gpu
def produce_chId_lit_gpu(rid, literal, chunk_id, length):
i = cuda.grid(1)
if i <length:
chunk_id[i] = rid[i]/31
literal[i] = (literal[i]|1<<31) #the left bit set to 1
off_set = 30-rid[i]%31
literal[i] = (literal[i]|1<<off_set)
开发者ID:DarinSSC,项目名称:BitmapIndex_GUI,代码行数:7,代码来源:bitmap_pickle.py
示例14: get_reduced
def get_reduced(literal, start_pos, reduced_length, reduced_literal, input_data, chunk_id, reduced_input_data, reduced_chunk_id):
i = cuda.grid(1)
if i < reduced_length:
for lit in literal[start_pos[i]:start_pos[i+1]]:
reduced_literal[i] |= lit
reduced_input_data[i] = input_data[start_pos[i]]
reduced_chunk_id[i] = chunk_id[start_pos[i]]
开发者ID:DarinSSC,项目名称:BitmapIndex_GUI,代码行数:7,代码来源:bitmap_pickle.py
示例15: maxPoly
def maxPoly(x0, coef, tol, nParam, argMax):
# Thread IDs
i = cuda.grid(1)
# The Kernel should only execute if i < nParam
if i >= nParam:
return
else:
# Iterate to convergence
x = x0
diff = tol+1
while diff > tol:
# Compute the first derivative
firstDeriv = 2*coef[i]*x + 2.3
# Compute the second derivative
secondDeriv = 2*coef[i]
# Newton step
xNew = x - firstDeriv/secondDeriv
# Compute difference for convergence check and update
diff = abs(xNew-x)
x = xNew
# Function output
argMax[i] = x
开发者ID:ealdrich,项目名称:BasicGPU,代码行数:30,代码来源:maxPoly.py
示例16: m_mn_add_pointwise
def m_mn_add_pointwise(a,b,out):
n = a.shape[0]
m = a.shape[1]
i,j = cuda.grid(2)
if i < n and j < m:
out[i,j] = a[i,j]+b[i,0]
开发者ID:Abel-Ding,项目名称:gpupy,代码行数:7,代码来源:__init__.py
示例17: m_mn_sadd_pointwise
def m_mn_sadd_pointwise(a,b,alpha,beta,out):
n = a.shape[0]
m = a.shape[1]
i,j = cuda.grid(2)
if i < n and j < m:
out[i,j] = alpha*a[i,j]+beta*b[i,0]
开发者ID:Abel-Ding,项目名称:gpupy,代码行数:7,代码来源:__init__.py
示例18: mmultiply_pointwise
def mmultiply_pointwise(a,b,out):
n = a.shape[0]
m = a.shape[1]
i,j = cuda.grid(2)
if i < n and j < m:
out[i,j] = a[i,j]*b[i,j]
开发者ID:Abel-Ding,项目名称:gpupy,代码行数:7,代码来源:__init__.py
示例19: getIdx_gpu
def getIdx_gpu(fill_word, reduced_literal, index, compact_flag, length):
i = cuda.grid(1)
if i<length:
index[i*2] = fill_word[i]
index[i*2+1] = reduced_literal[i]
if not fill_word[i]:
compact_flag[i*2] = 0
开发者ID:DarinSSC,项目名称:WAH_on_GPU,代码行数:7,代码来源:bitmap.py
示例20: reduce_by_key_gpu
def reduce_by_key_gpu(literal, flag, is_finish, hop, length):
i = cuda.grid(1)
if i < length-hop:
if (not is_finish[i]) and (not flag[i+hop]):
literal[i] |= literal[i+hop]
else:
is_finish[i] = 1
开发者ID:DarinSSC,项目名称:WAH_on_GPU,代码行数:7,代码来源:bitmap_constructor_gpu.py
注:本文中的numbapro.cuda.grid函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论