本文整理汇总了Python中numpy.long函数的典型用法代码示例。如果您正苦于以下问题:Python long函数的具体用法?Python long怎么用?Python long使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了long函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: ExpandDims
def ExpandDims(inputs, axis=-1, **kwargs):
"""ExpandDims interface of NDArray.
Parameters
----------
inputs : Tensor
The input tensor.
axis : int
The insert position of new dimension. Default is ``-1`` (Push Back).
Returns
-------
Tensor
The output tensor.
Examples
--------
>>> a = Tensor(shape=[1, 2, 3, 4]).Variable()
>>> print ExpandDims(a).shape
>>> print ExpandDims(a, axis=2).shape
"""
CheckInputs(inputs, 1)
arguments = ParseArguments(locals())
output = Tensor.CreateOperator(nout=1, op_type='ExpandDims', **arguments)
if inputs.shape is not None:
output.shape = inputs.shape[:]
if axis == -1 or axis >= len(inputs.shape):
output.shape.append(np.long(1))
else: output.shape.insert(axis, np.long(1))
return output
开发者ID:neopenx,项目名称:Dragon,代码行数:35,代码来源:ndarray.py
示例2: set_proj_plane_info
def set_proj_plane_info(self, xsize, ysize, lonra, latra):
if lonra is None:
lonra = [-180., 180.]
else:
# shift lonra[1] into the range [lonra[0], lonra[0]+360]
lonra_span = np.mod(lonra[1] - lonra[0], 360)
if lonra_span == 0:
lonra_span = 360
lonra[1] = lonra[0] + lonra_span
if latra is None:
latra = [-90., 90.]
if (
len(lonra) != 2
or len(latra) != 2
or latra[0] < -90
or latra[1] > 90
or latra[0] >= latra[1]
):
raise TypeError(
"Wrong argument lonra or latra. Must be lonra=[a,b],latra=[c,d] "
"c<d, c>=-90, d<=+90"
)
lonra = self._flip * np.float64(lonra)[:: self._flip]
latra = np.float64(latra)
xsize = np.long(xsize)
if ysize is None:
ratio = (latra[1] - latra[0]) / (lonra[1] - lonra[0])
ysize = np.long(round(ratio * xsize))
else:
ysize = np.long(ysize)
ratio = float(ysize) / float(xsize)
super(CartesianProj, self).set_proj_plane_info(
xsize=xsize, lonra=lonra, latra=latra, ysize=ysize, ratio=ratio
)
开发者ID:arahlin,项目名称:healpy,代码行数:34,代码来源:projector.py
示例3: set_proj_plane_info
def set_proj_plane_info(self,xsize,ysize,lonra,latra):
if lonra is None: lonra = [-180.,180.]
if latra is None: latra = [-90.,90.]
if (len(lonra)!=2 or len(latra)!=2 or lonra[0]<-180. or lonra[1]>180.
or latra[0]<-90 or latra[1]>90 or lonra[0]>=lonra[1] or latra[0]>=latra[1]):
raise TypeError("Wrong argument lonra or latra. Must be lonra=[a,b],latra=[c,d] "
"with a<b, c<d, a>=-180, b<=180, c>=-90, d<=+90")
lonra = self._flip*np.float64(lonra)[::self._flip]
latra = np.float64(latra)
xsize = np.long(xsize)
if ysize is None:
ratio = (latra[1]-latra[0])/(lonra[1]-lonra[0])
ysize = np.long(round(ratio*xsize))
else:
ysize = np.long(ysize)
ratio = float(ysize)/float(xsize)
if max(xsize,ysize) > 2000:
if max(xsize,ysize) == xsize:
xsize = 2000
ysize = np.long(round(ratio*xsize))
else:
ysize = 2000
xsize = np.long(round(ysize/ratio))
super(CartesianProj,self).set_proj_plane_info(xsize=xsize, lonra=lonra, latra=latra,
ysize=ysize, ratio=ratio)
开发者ID:apontzen,项目名称:healpy,代码行数:25,代码来源:projector.py
示例4: test_intp
def test_intp(self,level=rlevel):
"""Ticket #99"""
i_width = np.int_(0).nbytes*2 - 1
long('0x' + 'f'*i_width,16)
#self.failUnlessRaises(OverflowError,np.intp,'0x' + 'f'*(i_width+1),16)
#self.failUnlessRaises(ValueError,np.intp,'0x1',32)
assert_equal(255,np.long('0xFF',16))
assert_equal(1024,np.long(1024))
开发者ID:plaes,项目名称:numpy,代码行数:8,代码来源:test_regression.py
示例5: _create_objects
def _create_objects(self, diaobject_data):
"""
Create a dict of diaObjects formatted according to the
appropriate avro schema
Parameters
----------
diaobject_data is a numpy recarray containing all of the
data needed for the diaObject
Returns
-------
A dict keyed on uniqueId (the CatSim unique identifier for each
astrophysical source). Each value is a properly formatted
diaObject corresponding to its key.
"""
diaobject_dict = {}
for i_object in range(len(diaobject_data)):
diaobject = diaobject_data[i_object]
avro_diaobject = {}
avro_diaobject['flags'] = np.long(self._rng.randint(10, 1000))
avro_diaobject['diaObjectId'] = np.long(diaobject['uniqueId'])
avro_diaobject['ra'] = diaobject['ra']
avro_diaobject['decl'] = diaobject['dec']
ra_dec_cov = {}
ra_dec_cov['raSigma'] = self._rng.random_sample()*0.001
ra_dec_cov['declSigma'] = self._rng.random_sample()*0.001
ra_dec_cov['ra_decl_Cov'] = self._rng.random_sample()*0.001
avro_diaobject['ra_decl_Cov'] = ra_dec_cov
avro_diaobject['radecTai'] = diaobject['TAI']
avro_diaobject['pmRa'] = diaobject['pmRA']
avro_diaobject['pmDecl'] = diaobject['pmDec']
avro_diaobject['parallax'] = diaobject['parallax']
pm_parallax_cov = {}
for field in ('pmRaSigma', 'pmDeclSigma', 'parallaxSigma', 'pmRa_pmDecl_Cov',
'pmRa_parallax_Cov', 'pmDecl_parallax_Cov'):
pm_parallax_cov[field] = 0.0
avro_diaobject['pm_parallax_Cov'] = pm_parallax_cov
avro_diaobject['pmParallaxLnL'] = self._rng.random_sample()
avro_diaobject['pmParallaxChi2'] = self._rng.random_sample()
avro_diaobject['pmParallaxNdata'] = 0
diaobject_dict[diaobject['uniqueId']] = avro_diaobject
return diaobject_dict
开发者ID:lsst,项目名称:sims_catUtils,代码行数:51,代码来源:avroAlertGenerator.py
示例6: _get_window_sub
def _get_window_sub(self, window='None'):
"""Returns the window time series and amplitude normalization term
:param window: window string
:return: w, amplitude_norm
"""
window = window.split(':')
if window[0] in ['Hamming', 'Hann']:
w = np.hanning(self.samples)
elif window[0] == 'Force':
w = np.zeros(self.samples)
force_window = float(window[1])
to1 = np.long(force_window * self.samples)
w[:to1] = 1.
elif window[0] == 'Exponential':
w = np.arange(self.samples)
exponential_window = float(window[1])
w = np.exp(np.log(exponential_window) * w / (self.samples - 1))
else: # window = 'None'
w = np.ones(self.samples)
if window[0] == 'Force':
amplitude_norm = 2 / len(w)
else:
amplitude_norm = 2 / np.sum(w)
return w, amplitude_norm
开发者ID:openmodal,项目名称:openmodal,代码行数:29,代码来源:frf.py
示例7: order_paths_by_preference
def order_paths_by_preference(self):
#FIND ORDERING:
paths_desc=[]
for id in self.sorts_list:
paths_desc.append((id,np.int32))
for id in self.id_list:
paths_desc.append((id,'a255'))
paths_ordering=np.empty((len(self.paths_list),), dtype=paths_desc)
for file_id, file in enumerate(self.paths_list):
paths_ordering['path'][file_id]=file['path'].split('|')[0]
#Convert path name to 'unique' integer using hash.
#The integer will not really be unique but collisions
#should be extremely rare for similar strings with only small variations.
paths_ordering['path_id'][file_id]=hash(
paths_ordering['path'][file_id]
)
paths_ordering['checksum'][file_id]=file['path'].split('|')[1]
paths_ordering['version'][file_id]=np.long(file['version'][1:])
paths_ordering['file_type'][file_id]=file['file_type']
paths_ordering['data_node'][file_id]=retrieval_utils.get_data_node(file['path'],paths_ordering['file_type'][file_id])
#Sort paths from most desired to least desired:
#First order desiredness for least to most:
data_node_order=copy.copy(self.data_node_list)[::-1]#list(np.unique(paths_ordering['data_node']))
file_type_order=copy.copy(self.file_type_list)[::-1]#list(np.unique(paths_ordering['file_type']))
for file_id, file in enumerate(self.paths_list):
paths_ordering['data_node_id'][file_id]=data_node_order.index(paths_ordering['data_node'][file_id])
paths_ordering['file_type_id'][file_id]=file_type_order.index(paths_ordering['file_type'][file_id])
#'version' is implicitly from least to most
#sort and reverse order to get from most to least:
return np.sort(paths_ordering,order=self.sorts_list)[::-1]
开发者ID:aerler,项目名称:cdb_query,代码行数:33,代码来源:create_soft_links.py
示例8: OneHot
def OneHot(inputs, depth, on_value=1, off_value=0, **kwargs):
"""Generate the one-hot representation of inputs.
Parameters
----------
inputs : Tensor
The input tensor.
depth : int
The depth of one-hot representation.
on_value : int
The value when ``indices[j] = i``.
off_value : int
The value when ``indices[j] != i``.
Returns
-------
Tensor
The output tensor.
"""
CheckInputs(inputs, 1)
arguments = ParseArguments(locals())
output = Tensor.CreateOperator(nout=1, op_type='OneHot', **arguments)
if inputs.shape is not None:
output.shape = inputs.shape[:]
output.shape.append(np.long(depth))
return output
开发者ID:neopenx,项目名称:Dragon,代码行数:30,代码来源:ndarray.py
示例9: detectionOutput_fprop
def detectionOutput_fprop(self, conf_view, loc_view, detection, prior_boxes,
proposals, nms_top_k, image_top_k, score_threshold, nms_threshold):
conf = c_longlong(conf_view._tensor.ctypes.data)
loc = c_longlong(loc_view._tensor.ctypes.data)
detection = c_longlong(detection._tensor.ctypes.data)
prior_boxes = c_longlong(prior_boxes._tensor.ctypes.data)
L, num_class, bs = conf_view.shape
proposals = c_longlong(proposals._tensor.ctypes.data)
result = np.zeros((bs, image_top_k, 6), dtype=np.float32)
result_ptr = c_longlong(result.ctypes.data)
result_len = np.zeros(bs, dtype=np.int64)
result_len_ptr = c_longlong(result_len.ctypes.data)
self.mklEngine.detection_fprop(conf, loc, result_ptr, prior_boxes,
result_len_ptr, c_longlong(L), c_longlong(num_class),
c_longlong(bs), c_longlong(nms_top_k),
c_longlong(image_top_k),
c_float(score_threshold),
c_float(nms_threshold))
batch_all_detections = [None] * self.bsz
for i in range(bs):
leng = np.long(result_len[i])
res_batch = np.zeros((leng, 6))
res_batch[:] = result[i, 0:leng, :]
batch_all_detections[i] = res_batch
return batch_all_detections
开发者ID:NervanaSystems,项目名称:neon,代码行数:26,代码来源:nervanamkl.py
示例10: run
def run(self):
self.pos = 0
samples = np.ctypeslib.as_array(self.raw_samples.get_obj())
samples = samples.reshape(self.BUF_LEN, self.channels)
# This t is a global variable shared with display
while self.on:
if not self.cmds.empty():
cmd = self.cmds.get()
if cmd == '+':
self.ready()
elif cmd == '-':
self.reset()
elif cmd == "Exit!":
self.on = False
else:
print cmd
if not self.paused:
for i in range(self.channels):
bytes = self.data.recv()
# construct y value
height = (bytes[0] << 16) + (bytes[1] << 8) + bytes[2]
# convert to signed long
if (height >= 0x800000): # = 2^23
height = height - 0x1000000 # = 2^24
height = np.long(height)
samples[self.pos, i] = height
self.pos += 1
if (self.pos == self.BUF_LEN):
self.pos = 0
开发者ID:weyichen,项目名称:Sonic_Oxen,代码行数:32,代码来源:process_GUI.py
示例11: xml2field
def xml2field(self, elem, name=None):
typElem = elem.find('datatype')
dType = typElem.text
dDim = typElem.attrib['length']
dDim = np.asarray([np.long(d) for d in dDim.split()])[::-1]
dLen = np.prod(dDim)
if name is None:
name = elem.attrib['name']
valElem = elem.find('value')
if dType == 'pointer':
self.xml2field(valElem.find('parameter'), elem.attrib['name'])
return
if dType == 'struct':
o = Xml2Py(None, valElem[0])
setattr(self, name, o)
return
val = elem.find('value').text
if dLen > 1:
val = val.strip('[]').split(',')
conv = {'int': np.int, 'long': np.long, 'float': np.float, 'double': np.double, 'string': lambda s: s}
try:
if (dLen > 1):
val = np.asarray([conv[dType](v) for v in val]).reshape(dDim)
else:
val = conv[dType](val)
except KeyError:
print('WARNING: Unsupported data type {} in field {}! Ignoring...'.format(dType, name))
return
setattr(self, name, val)
开发者ID:mohseniaref,项目名称:PyRAT,代码行数:35,代码来源:tools.py
示例12: order_paths_by_preference
def order_paths_by_preference(sorts_list,id_list,paths_list,file_type_list,data_node_list,check_dimensions,
semaphores=dict(),time_var='time',session=None,remote_netcdf_kwargs=dict()):
#FIND ORDERING:
paths_desc = []
for id in sorts_list:
paths_desc.append((id,np.int64))
for id in id_list:
paths_desc.append((id,'a255'))
paths_ordering = np.empty((len(paths_list),), dtype=paths_desc)
if check_dimensions:
dimension_type_list = ['unqueryable',]
for file_id, file in enumerate(paths_list):
paths_ordering['path'][file_id] = file['path'].split('|')[0]
#Convert path name to 'unique' integer using hash.
#The integer will not really be unique but collisions
#should be extremely rare for similar strings with only small variations.
paths_ordering['path_id'][file_id] = hash(
paths_ordering['path'][file_id]
)
for unique_file_id in unique_file_id_list:
paths_ordering[unique_file_id][file_id] = file['path'].split('|')[unique_file_id_list.index(unique_file_id)+1]
paths_ordering['version'][file_id] = np.long(file['version'][1:])
paths_ordering['file_type'][file_id] = file['file_type']
paths_ordering['data_node'][file_id] = remote_netcdf.get_data_node(file['path'],paths_ordering['file_type'][file_id])
if check_dimensions:
#Dimensions types. Find the different dimensions types:
if not paths_ordering['file_type'][file_id] in queryable_file_types:
paths_ordering['dimension_type_id'][file_id] = dimension_type_list.index('unqueryable')
else:
remote_data = remote_netcdf.remote_netCDF(paths_ordering['path'][file_id],
paths_ordering['file_type'][file_id],
semaphores=semaphores,
session=session,
**remote_netcdf_kwargs)
dimension_type = remote_data.safe_handling(netcdf_utils.find_dimension_type,time_var=time_var)
if not dimension_type in dimension_type_list: dimension_type_list.append(dimension_type)
paths_ordering['dimension_type_id'][file_id] = dimension_type_list.index(dimension_type)
if check_dimensions:
#Sort by increasing number. Later when we sort, we should get a uniform type:
dimension_type_list_number = [ sum(paths_ordering['dimension_type_id']==dimension_type_id)
for dimension_type_id,dimension_type in enumerate(dimension_type_list)]
sort_by_number = np.argsort(dimension_type_list_number)[::-1]
paths_ordering['dimension_type_id'] = sort_by_number[paths_ordering['dimension_type_id']]
#Sort paths from most desired to least desired:
#First order desiredness for least to most:
data_node_order = copy.copy(data_node_list)[::-1]#list(np.unique(paths_ordering['data_node']))
file_type_order = copy.copy(file_type_list)[::-1]#list(np.unique(paths_ordering['file_type']))
for file_id, file in enumerate(paths_list):
paths_ordering['data_node_id'][file_id] = data_node_order.index(paths_ordering['data_node'][file_id])
paths_ordering['file_type_id'][file_id] = file_type_order.index(paths_ordering['file_type'][file_id])
#'version' is implicitly from least to most
#sort and reverse order to get from most to least:
return np.sort(paths_ordering,order = sorts_list)[::-1]
开发者ID:laliberte,项目名称:netcdf4_soft_links,代码行数:60,代码来源:create_soft_links.py
示例13: ref_mjd
def ref_mjd(fits_file, hdu=1):
"""Read MJDREFF+ MJDREFI or, if failed, MJDREF, from the FITS header.
Parameters
----------
fits_file : str
Returns
-------
mjdref : numpy.longdouble
the reference MJD
Other Parameters
----------------
hdu : int
"""
import collections
if isinstance(fits_file, collections.Iterable) and\
not is_string(fits_file): # pragma: no cover
fits_file = fits_file[0]
logging.info("opening %s" % fits_file)
try:
ref_mjd_int = np.long(read_header_key(fits_file, 'MJDREFI'))
ref_mjd_float = np.longdouble(read_header_key(fits_file, 'MJDREFF'))
ref_mjd_val = ref_mjd_int + ref_mjd_float
except: # pragma: no cover
ref_mjd_val = np.longdouble(read_header_key(fits_file, 'MJDREF'))
return ref_mjd_val
开发者ID:a321bhi,项目名称:MaLTPyNT,代码行数:30,代码来源:base.py
示例14: run
def run(self):
# Run until turned off
while 1:
if self.on():
# Read bytes in chunks of meaningful size
if self.ser.inWaiting() > 3:
bytes = bytearray(3)
self.ser.readinto(bytes)
height = (bytes[0] << 16) + (bytes[1] << 8) + bytes[2]
# convert to signed long
if (height >= 0x800000): # = 2^23
height = height - 0x1000000 # = 2^24
height = np.long(height)
sampleLock.acquire()
samples[self.pos, self.channel] = height
sampleLock.release()
# Update array indices
self.channel += 1
if self.channel == channels:
self.channel = 0
self.pos += 1
if self.pos == BUF_LEN:
self.pos = 0
开发者ID:weyichen,项目名称:Sonic_Oxen,代码行数:26,代码来源:new_thread_GUI.py
示例15: produce_regions
def produce_regions(masks, visualize=False):
"""given the proposal segmentation masks for an image as a [width, height, proposal_num]
matrix outputs all regions in the image"""
width, height, n_prop = masks.shape
t = ('u8,'*int(np.math.ceil(float(n_prop) / 64)))[:-1]
bv = np.zeros((width, height), dtype=np.dtype(t))
for i in range(n_prop):
m = masks[:, :, i]
a = 'f%d' % (i / 64)
h = m * np.long(2 ** (i % 64))
if n_prop >= 64:
bv[a] += h
else:
bv += h
un = np.unique(bv)
regions = np.zeros((width, height), dtype="uint16")
for i, e in enumerate(un):
regions[bv == e] = i
if visualize:
plt.figure()
plt.imshow(regions)
plt.set_cmap('prism')
plt.colorbar()
return regions
开发者ID:amiltonwong,项目名称:pottics,代码行数:27,代码来源:regions.py
示例16: run
def run(self):
self.pos = 0
# This t is a global variable shared with display
while 1:
if self.on():
dataLock.acquire()
packages = len(data)
dataLock.release()
if packages > channels:
for i in range(channels):
# Don't read and write data simultaneously; acquire lock
dataLock.acquire()
bytes = data.popleft()
dataLock.release()
# construct y value
height = (bytes[0] << 16) + (bytes[1] << 8) + bytes[2]
# convert to signed long
if (height >= 0x800000): # = 2^23
height = height - 0x1000000 # = 2^24
height = np.long(height)
sampleLock.acquire()
samples[self.pos,i] = height
sampleLock.release()
self.pos += 1
if (self.pos == BUF_LEN):
self.pos = 0
开发者ID:weyichen,项目名称:Sonic_Oxen,代码行数:29,代码来源:ecgGraph_wx_GUI.py
示例17: start_jobs
def start_jobs():
"""
Restores the plots if requested and if the persistent files exist and
starts the qt timer of the 1st plot.
"""
for plot in _plots:
if plot.persistentName:
plot.restore_plots()
plot.fig.canvas.set_window_title(plot.title)
runCardVals.iteration = np.long(0)
noTimer = len(_plots) == 0 or\
(plt.get_backend().lower() in (x.lower() for x in
mpl.rcsetup.non_interactive_bk))
if noTimer:
print("The job is running... ")
while True:
msg = '{0} of {1}'.format(
runCardVals.iteration+1, runCardVals.repeats)
if os.name == 'posix':
sys.stdout.write("\r\x1b[K " + msg)
else:
sys.stdout.write("\r ")
print(msg+' ')
sys.stdout.flush()
res = dispatch_jobs()
if res:
return
else:
plot = _plots[0]
plot.areProcessAlreadyRunning = False
plot.timer = plot.fig.canvas.new_timer()
plot.timer.add_callback(plot.timer_callback)
plot.timer.start()
开发者ID:kklmn,项目名称:xrt,代码行数:34,代码来源:runner.py
示例18: Stack
def Stack(inputs, axis=0, **kwargs):
"""Stack the inputs along the given axis.
All dimensions of inputs should be same.
The ``axis`` can be negative.
Parameters
----------
inputs : list of Tensor
The inputs.
axis : int
The axis to stack.
Returns
-------
Tensor
The output tensor.
"""
CheckInputs(inputs, 1, INT_MAX)
arguments = ParseArguments(locals())
arguments['num_input'] = len(inputs)
output = Tensor.CreateOperator(nout=1, op_type='Stack', **arguments)
if all(input.shape is not None for input in inputs):
while axis < 0: axis += (len(inputs[0].shape) + 1)
output.shape = inputs[0].shape
output.shape.insert(axis, np.long(len(inputs)))
return output
开发者ID:neopenx,项目名称:Dragon,代码行数:32,代码来源:ndarray.py
示例19: dimshuffle
def dimshuffle(self, *args, **kwargs):
"""Shuffle the dimensions. [**Theano Style**]
Parameters
----------
dimensions : list
The desired dimensions.
Returns
-------
Tensor
The dimshuffled output.
"""
dimensions = list(args)
perms = []
for dim in dimensions:
if dim != 'x':
if not isinstance(dim, int):
raise ValueError('The type of dimension should be int.')
perms.append(dim)
# transpose
output = Tensor.CreateOperator(inputs=self, nout=1,
op_type='Transpose', perms=perms, **kwargs)
if self.shape is not None:
if len(self.shape) != len(perms):
raise ValueError('The ndim of inputs is {}, but perms provide {}'. \
format(len(self.shape), len(perms)))
output.shape = self.shape[:]
for i, axis in enumerate(perms):
output.shape[i] = self.shape[axis]
# expand dims
for i in xrange(len(dimensions) - len(perms)):
flag = False
input_shape = output.shape
axis = -1
for idx in xrange(len(dimensions)):
if idx >= len(perms): continue
cur_dim = perms[idx]; exp_dim = dimensions[idx]
if cur_dim != exp_dim:
axis = idx
output = Tensor.CreateOperator(inputs=output, nout=1,
op_type='ExpandDims', axis=axis)
perms.insert(axis, 'x')
flag = True
break
if not flag:
axis = len(perms)
output = Tensor.CreateOperator(inputs=output, nout=1,
op_type='ExpandDims', axis=len(perms))
perms.append('x')
if self.shape is not None:
output.shape = input_shape[:]
output.shape.insert(axis, np.long(1))
return output
开发者ID:neopenx,项目名称:Dragon,代码行数:59,代码来源:tensor.py
示例20: run
def run(data, p, m, n, b, c):
"""
Starts the main LSH process.
Parameters
----------
zdata : RDD[Vector]
RDD of data points. Acceptable vector types are numpy.ndarray
or PySpark SparseVector.
p : integer, larger than the largest value in data.
m : integer, number of bins for hashing.
n : integer, number of rows to split the signatures into.
b : integer, number of bands.
c : integer, minimum allowable cluster size.
"""
#data.zipWithIndex()
zdata = data
seeds = np.vstack([np.random.random_integers(p, size = n), np.random.random_integers(0, p, size = n)]).T
hashes = [functools.partial(minhash, a = s[0], b = s[1], p = p, m = m) for s in seeds]
# Start by generating the signatures for each data point.
# Output format is:
# <(vector idx, band idx), minhash>
sigs = zdata.flatMap(lambda x: [[(x[1], i % b), hashes[i](x[0])] for i, h in enumerate(hashes)]).cache()
# Put together the vector minhashes in the same band.
# Output format is:
# <(band idx, minhash list), vector idx>
bands = sigs.groupByKey() \
.map(lambda x: [(x[0][1], hash(frozenset(x[1].data))), x[0][0]]) \
.groupByKey().cache()
# Should we filter?
if c > 0:
bands = bands.filter(lambda x: len(x[1]) > c).cache()
# Remaps each element to a cluster / bucket index.
# Output format is:
# <vector idx, bucket idx>
vector_bucket = bands.map(lambda x: frozenset(sorted(x[1]))).distinct() \
.zipWithIndex().flatMap(lambda x: map(lambda y: (np.long(y), x[1]), x[0])) \
.cache()
# Reverses indices, to key the vectors by their buckets.
# Output format is:
# <bucket idx, vector idx>
bucket_vector = vector_bucket.map(lambda x: (x[1], x[0])).cache()
# Joins indices up with original data to provide clustering results.
# Output format is:
# <bucket idx, list of vectors>
buckets = zdata.map(lambda x: (x[1], x[0])).join(vector_bucket) \
.map(lambda x: (x[1][1], x[1][0])).groupByKey().cache()
# Computes Jaccard similarity of each bucket.
scores = buckets.map(distance_metric).cache()
# Return a wrapper object around the metrics of interest.
return PyLSHModel(sigs, bands, vector_bucket, bucket_vector, buckets, scores)
开发者ID:USF-ML2,项目名称:Yahoooooooo-,代码行数:58,代码来源:lsh.py
注:本文中的numpy.long函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论