本文整理汇总了Python中numpy.argpartition函数的典型用法代码示例。如果您正苦于以下问题:Python argpartition函数的具体用法?Python argpartition怎么用?Python argpartition使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了argpartition函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: predict_variance_inf_phase1
def predict_variance_inf_phase1(budget, hum_train_means, temp_train_means, hum_train_vars, temp_train_vars):
"""Method to make predictions based on max-variance active inference."""
start_hum = 0
window_hum = None
window_temp = None
i = 0
hum_preds = np.ones((50, 96))
temp_preds = np.ones((50, 96))
for t in global_times:
if budget > 0:
window_hum = np.argpartition(hum_train_vars[t], -budget)[-budget:]
window_temp = np.argpartition(temp_train_vars[t], -budget)[-budget:]
else:
window_hum = np.array([])
window_temp = np.array([])
hum_pred, temp_pred = makePreds_phase1(window_hum, window_temp, hum_train_means, temp_train_means, i, t)
hum_preds[:, i] = copy.deepcopy(hum_pred)
temp_preds[:, i] = copy.deepcopy(temp_pred)
i += 1
hum_mean_err = mean_absolute_error(hum_test, hum_preds)
temp_mean_err = mean_absolute_error(temp_test, temp_preds)
return hum_preds, temp_preds, hum_mean_err, temp_mean_err
开发者ID:ironhide23586,项目名称:Sensor-Network-CS583,代码行数:29,代码来源:Phase3_00.py
示例2: doKNN
def doKNN(k):
dm = cdist(teXf, trXf,'euclidean')
cfm = np.zeros((10,10), dtype = int)
for a in range(0,len(dm)):
knn = np.argpartition(dm[a], k)[:k]
preds = trY[knn]
counts = np.bincount(preds)
pred = -1
if len(counts)>=2:
top2 = np.argpartition(-counts, 1)
if counts[top2[0]] == counts[top2[1]]:
d = 99999
for i in xrange(0,len(knn)):
val = dm[a][i]
if val < d:
d = dm[a][i]
pred = trY[knn[i]]
else:
pred = top2[0]
else:
pred = 0
#print pred
#mnist.visualize(teX[a])
cfm[teY[a]][pred] += 1
#print cfm
#print "ER: ", 1 - np.sum(np.diagonal(cfm))/np.sum(cfm)
return cfm
开发者ID:ealiasannila,项目名称:iml,代码行数:28,代码来源:p3.py
示例3: precision_test_function
def precision_test_function(theano_inputs):
k = 10
scores1, scores2, c_select, n_used_items = theano_test_function(*theano_inputs)
ids1 = np.argpartition(-scores1, range(k), axis=-1)[0, :k]
ids2 = np.argpartition(-scores2, range(k), axis=-1)[0, :k]
return ids1, ids2, c_select, n_used_items
开发者ID:yang-tradelab,项目名称:sequence-based-recommendations,代码行数:7,代码来源:fism_cluster.py
示例4: similarity_matrix
def similarity_matrix(self):
""" Calculate the similarity matrix given all samples used for GTM map training
:return: similarity_matrix: Matrix assessing the similarity between samples used for GTM map training
"""
print "Calculating similarity matrix..."
# Find one tenth of the highest and lowest probability distribution values for each sample in the latent space
sim_size = int(round(self.latent_space_size/10))
responsibility_indexes = np.zeros((sim_size * 2, self.input_data.shape[0]))
corr_input = np.zeros((sim_size * 2, self.input_data.shape[0]))
for i in xrange(0, self.input_data.shape[0]):
responsibility_indexes[0:sim_size, i] = np.argpartition(self.gtm_responsibility[:, i],
-sim_size)[-sim_size:]
responsibility_indexes[sim_size:, i] = np.argpartition(self.gtm_responsibility[:, i], sim_size)[0:sim_size]
responsibility_indexes = responsibility_indexes.astype(int)
# Create correlation input matrix for similarity assessment
for i in xrange(0, self.input_data.shape[0]):
corr_input[:, i] = self.gtm_responsibility[responsibility_indexes[:, i], i]
# Calculate correlation between all samples and build similarity matrix
similarity_matrix = np.corrcoef(np.transpose(corr_input))
# Plot heat map of the similarity matrix accordingly
[x, y] = np.meshgrid(np.linspace(1, self.input_data.shape[0], self.input_data.shape[0]),
np.linspace(1, self.input_data.shape[0], self.input_data.shape[0]))
x = np.ravel(x)
y = np.ravel(y)
sim_lat = np.array([x, y])
print "Plotting color mesh image..."
plt.pcolormesh(np.reshape(sim_lat[0, :], (self.input_data.shape[0], self.input_data.shape[0])),
np.reshape(sim_lat[1, :], (self.input_data.shape[0], self.input_data.shape[0])), similarity_matrix,
cmap='magma', vmin=0, vmax=1)
plt.colorbar()
plt.axis([x.min(), x.max(), y.min(), y.max()])
plt.gca().invert_yaxis()
return similarity_matrix
开发者ID:mattoescobar,项目名称:Machine-Learning,代码行数:33,代码来源:GTM.py
示例5: local_kmeans_class
def local_kmeans_class(I, L, x, k):
from scipy.spatial.distance import cdist
sizex = len(np.atleast_2d(x))
label = np.zeros((sizex,k))
for rowsx in range(0, sizex):
tic()
dists = cdist(I, np.atleast_2d(x[rowsx]), metric='euclidean')
toc()
center = np.zeros((10,k,28*28))
label_order = np.unique(L)
l=0
tic()
thing = np.zeros((k,28*28))
for labs in np.unique(L):
indices = L == labs
k_smallest = np.argpartition(dists[indices],tuple(range(1,k)),axis=None)
for i in range(0,k):
M = I[indices]
#center[l,i,:] = np.average(M[k_smallest[:i+1]],axis = 0)
if i == 0:
thing[i] = M[k_smallest[i+1]]
else:
thing[i] = thing[i-1] + M[k_smallest[i+1]]
center[l,:,:] = np.divide(thing,np.repeat(np.arange(1,11).reshape(10,1),28*28,axis=1))
l+=1
toc()
for i in range(k):
#print(cdist(center[:,i,:], np.atleast_2d(x[rowsx]), metric='euclidean'))
dists2center = cdist(center[:,i,:], np.atleast_2d(x[rowsx]), metric='euclidean')
k_smallest = np.argpartition(dists2center,tuple(range(1)),axis=None)
label[rowsx,i] = label_order[k_smallest[0]]
return label
开发者ID:AndrewZastovnik,项目名称:Math-285-hw2,代码行数:33,代码来源:KNN.py
示例6: branch_to_nodes
def branch_to_nodes(self, wt, completion):
"""
Decide which nodes to branch to next
"""
missing_edges = HGT.get_missing_edges(completion) # Obtain the missing edge sparse list
nb = self.strat.node_brancher
# Determine if there is a maximum count
count_max = min(self.strat.max_node_branch, self.num_nodes)
if nb is None or not 'name' in nb: # Default
# Gets nodes that contribute to missing edge
edge = missing_edges.indices[0] # Grab any next edge
node_indices = self.H[:, edge].indices
elif nb['name'] == 'greedy' or nb['name'] == 'long':
# Gets the nodes that overlap the most(least) with what's missing
overlap = self.H.dot(missing_edges.T)
# k = min(count_max + wt.nnz, overlap.nnz)
k = min(count_max, overlap.nnz)
if k >= self.num_nodes or k == overlap.nnz:
if nb['name'] == 'greedy':
alg_slice = np.argsort(overlap.data)[::-1]
else: # long
alg_slice = np.argsort(overlap.data)
else: # Else be smart, don't perform O(nlogn) operations, perform O(k) operations
if nb['name'] == 'greedy':
alg_slice = np.argpartition(overlap.data, -k)[-k:]
else: #long
alg_slice = np.argpartition(overlap.data, k)[:k]
node_indices = overlap.indices[alg_slice]
elif nb['name'] == 'random':
# Gets nodes that contribute to random missing edge
edge = np.random.choice(missing_edges.indices) # Grab any next edge
node_indices = self.H[:, edge].indices
elif nb['name'] == 'diverse':
# Diversify the kinds of transversals that have been found
if wt.nnz == 0: # Just starting out
node_indices = np.arange(self.num_nodes) # Branch to everything
else: # Otherwise be greedy up to one
# edge = missing_edges.indices[0] # Grab any next edge
# node_indices = [self.H[:, edge].indices[0]]
# overlap = self.H.dot(missing_edges.T)
# node_indices = [overlap.indices[np.argmax(overlap.data)]]
scaled_overlap = overlap.data / (self.node_weights[overlap.indices]**2)
node_indices = overlap.indices[np.where(np.max(scaled_overlap) == scaled_overlap)]
else:
raise ValueError("Invalid strat.node_brancher: {0}".format(self.strat.node_brancher))
if nb is not None and bool(nb.get('shuffle', False)):
np.random.shuffle(node_indices)
count = 0
for i in node_indices:
if count >= count_max:
break
if not wt[i, 0] > 0: # not already part of working transversal
self.log('Branching to node:', i)
count += 1
yield i
开发者ID:tcfraser,项目名称:quantum_tools,代码行数:60,代码来源:hypergraph_transversals.py
示例7: construct_initial_solution
def construct_initial_solution(self):
ind = np.argpartition(self.collaboration_coo.data, -len(self.villains_team))[-len(self.villains_team):]
inc = 1
while len(np.unique(self.collaboration_coo.row[ind])) < len(self.villains_team):
ind = np.argpartition(self.collaboration_coo.data, -(len(self.villains_team) + inc))[-(len(self.villains_team) + inc):]
inc += 1
heroes_team = self.heroes.loc[self.heroes[CHARACTER_ID].isin(self.collaboration_coo.row[ind])]
return heroes_team
开发者ID:brunogsa,项目名称:tabu,代码行数:8,代码来源:marvel_tabu.py
示例8: _get_k_max_elements_indices_and_scores
def _get_k_max_elements_indices_and_scores(vec, k, mask=None):
if mask is None:
# We use argpartition here instead of argsort to achieve linear-time performance.
max_elements_indices = np.argpartition(-vec, k - 1)[:k]
else:
masked_vec = vec.copy() # To avoid side-effects
masked_vec[~mask] = -np.inf
max_elements_indices = np.argpartition(-masked_vec, k - 1)[:k]
return max_elements_indices, vec[max_elements_indices]
开发者ID:Allensmile,项目名称:cakechat,代码行数:9,代码来源:beamsearch.py
示例9: similarityPlot
def similarityPlot():
import matplotlib.pyplot as plt
from matplotlib import rcParams
tfidf_vectorizer = TfidfVectorizer(min_df=1)
names = friendsAboveMinNumMessages(200) + [me]
data = []
words = [] #ordering of words in tf_idf matrix
wordsSet = set() #for faster lookup
nameSet = set()
for person in personDict:
for name in person.split():
nameSet.add(name)
nameSet.add(name.lower())
for i in range(len(names)):
data.append(getAllMessagesAsString(names[i], False))
tfidf_matrix = tfidf_vectorizer.fit_transform(data)
featureNames = tfidf_vectorizer.get_feature_names()
tfidf_arr = tfidf_matrix.toarray()
for j in range(len(tfidf_arr[0])):
word = tfidf_arr[0][j]
if word not in wordsSet:
words.append(word)
wordsSet.add(j)
#nmds = manifold.MDS(metric = True, n_components = N_DISTINGUISHING_FEATURES)
#npos = nmds.fit_transform(tfidf_matrix.toarray())
clf = PCA(n_components=2)
npos = clf.fit_transform(tfidf_arr)
plt.scatter(npos[:, 0], npos[:, 1], marker = 'o', c = 'b', cmap = plt.get_cmap('Spectral')) #change colors
for name, x, y in zip(names, npos[:, 0], npos[:, 1]):
plt.annotate(
name,
xy = (x, y), xytext = (-20, 20),
textcoords = 'offset points', ha = 'right', va = 'bottom',
bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5),
arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'))
fig, ax = plt.subplots()
ax2 = ax.twinx()
xAxisP = [featureNames[i] for i in np.argpartition(clf.components_[0], -50)[-50:] if featureNames[i] not in nameSet]
yAxisP = [featureNames[i] for i in np.argpartition(clf.components_[1], -50)[-50:] if featureNames[i] not in nameSet]
xAxisN = [featureNames[i] for i in np.argpartition(-clf.components_[0], -50)[-50:] if featureNames[i] not in nameSet]
yAxisN = [featureNames[i] for i in np.argpartition(-clf.components_[1], -50)[-50:] if featureNames[i] not in nameSet]
ax.set_xlabel("Most Postively influential words along x axis:\n" + ", ".join(xAxisP), fontsize=18)
ax.set_ylabel("Most Postively influential words along y axis:\n" + ", ".join(yAxisP), fontsize=18)
ax2.set_xlabel("Most Negatively influential words along x axis:\n" + ", ".join(xAxisN), fontsize=18)
ax2.set_ylabel("Most Negatively influential words along y axis:\n" + ", ".join(yAxisN), fontsize=18)
# xAxis = [featureNames[i] for i in np.argpartition(np.absolute(clf.components_[0]), -50)[-50:] if featureNames[i] not in nameSet]
# yAxis = [featureNames[i] for i in np.argpartition(np.absolute(clf.components_[1]), -50)[-50:] if featureNames[i] not in nameSet]
# for i in range(1, max(len(xAxis), len(yAxis)) ):
# if i % 20 == 0 and i < len(xAxis):
# xAxis[i] += "\n"
# if i % 15 == 0 and i < len(yAxis):
# yAxis[i] += "\n"
# plt.xlabel("Most influential words along x axis:\n" + ", ".join(xAxis), fontsize=18)
# plt.ylabel("Most influential words along y axis:\n" + ", ".join(yAxis), fontsize=18)
rcParams.update({'figure.autolayout': True})
plt.suptitle("Word-Usage Similarity Scatterplot", fontsize = 24, fontweight = 'bold')
plt.show()
开发者ID:ctbrennan,项目名称:cross-platform-message-analytics,代码行数:57,代码来源:parse_analyze.py
示例10: _phase2
def _phase2(self):
"""
Execute phase 2 of the SP region. This phase is used to compute the
active columns.
Note - This should only be called after phase 1 has been called and
after the inhibition radius and neighborhood have been updated.
"""
# Shift the outputs
self.y[:, 1:] = self.y[:, :-1]
self.y[:, 0] = 0
# Calculate k
# - For a column to be active its overlap must be at least as large
# as the overlap of the k-th largest column in its neighborhood.
k = self._get_num_cols()
if self.global_inhibition:
# The neighborhood is all columns, thus the set of active columns
# is simply columns that have an overlap >= the k-th largest in the
# entire region
# Compute the winning column indexes
if self.learn:
# Randomly break ties
ix = np.argpartition(-self.overlap[:, 0] -
self.prng.uniform(.1, .2, self.ncolumns), k - 1)[:k]
else:
# Choose the same set of columns each time
ix = np.argpartition(-self.overlap[:, 0], k - 1)[:k]
# Set the active columns
self.y[ix, 0] = self.overlap[ix, 0] > 0
else:
# The neighborhood is bounded by the inhibition radius, therefore
# each column's neighborhood must be considered
for i in xrange(self.ncolumns):
# Get the neighbors
ix = np.where(self.neighbors[i])[0]
# Compute the minimum top overlap
if ix.shape[0] <= k:
# Desired number of candidates is at or below the desired
# activity level, so find the overall min
m = max(bn.nanmin(self.overlap[ix, 0]), 1)
else:
# Desired number of candidates is above the desired
# activity level, so find the k-th largest
m = max(-np.partition(-self.overlap[ix, 0], k - 1)[k - 1],
1)
# Set the column activity
if self.overlap[i, 0] >= m: self.y[i, 0] = True
开发者ID:johnrobinsn,项目名称:mHTM,代码行数:55,代码来源:region.py
示例11: _build_recursive
def _build_recursive(indices, level=0, split_index=0):
"""
Descend recursively into tree to build it, setting splits and
returning indices for leaves
:param indices: The current set of indices before partitioning
:param level: The level in the tree
:param split_index: The index of the split to set
:return: A list of arrays representing leaf membership
:rtype: list[np.ndarray]
"""
# If we're at the bottom, no split, just return the set
if level == self._depth:
return [indices]
n = indices.size
# If we literally don't have enough to populate the leaf, make it
# empty
if n < 1:
return []
# Get the random projections for these indices at this level
# NB: Recall that the projection matrix has shape (levels, N)
level_proj = proj[indices, level]
# Split at the median if even, put median in upper half if not
n_split = n // 2
if n % 2 == 0:
part_indices = np.argpartition(
level_proj, (n_split - 1, n_split))
split_val = level_proj[part_indices[n_split - 1]]
split_val += level_proj[part_indices[n_split]]
split_val /= 2.0
else:
part_indices = np.argpartition(level_proj, n_split)
split_val = level_proj[part_indices[n_split]]
splits[split_index] = split_val
# part_indices is relative to this block of values, recover
# main indices
left_indices = indices[part_indices[:n_split]]
right_indices = indices[part_indices[n_split:]]
# Descend into each split and get sub-splits
left_out = _build_recursive(left_indices, level=level + 1,
split_index=2 * split_index + 1)
right_out = _build_recursive(right_indices, level=level + 1,
split_index=2 * split_index + 2)
# Assemble index set
left_out.extend(right_out)
return left_out
开发者ID:Kitware,项目名称:SMQTK,代码行数:54,代码来源:mrpt.py
示例12: fitOneLinearRegression
def fitOneLinearRegression(thetaLinear, IntensityLinear, tiltanglesArray, options):
if (len(tiltanglesArray)%2 == 1):
halfN = int(len(tiltanglesArray)/2) + 1
xLeft, yLeft = thetaLinear[0:halfN], IntensityLinear[0:halfN]
xRight, yRight = thetaLinear[halfN-1:], IntensityLinear[halfN-1:]
else:
halfN = int(len(tiltanglesArray)/2)
xLeft, yLeft = thetaLinear[0:halfN], IntensityLinear[0:halfN]
xRight, yRight = thetaLinear[halfN:], IntensityLinear[halfN:]
slopeLeft, interceptLeft, r2Left = linearRegression(xLeft, yLeft)
slopeRight, interceptRight, r2Right = linearRegression(xRight, yRight)
assert(len(xLeft)==len(xRight))
fitLeft = slopeLeft*xLeft + interceptLeft
fitRight = slopeRight*xRight + interceptRight
#the sum of squared residuals
resLeft = yLeft - fitLeft
resLeft = resLeft / fitLeft
#print "resLeft", resLeft
resRight = yRight - fitRight
resRight = resRight / fitRight
#print "resRight", resRight
fresLeft = sum(resLeft**2)
fresRight = sum(resRight**2)
fres = [fresLeft*1000000, fresRight*1000000]
#find the points with the largest 3 residuals in left and right branches, use numpy.argpartition
#N = options.largestNRes
N=3
negN = (-1)*N
indexLargeLeft = np.argpartition(resLeft**2, negN)[negN:]
indexLargeRight = np.argpartition(resRight**2, negN)[negN:]
M=3
#M = options.smallestNRes
posM = M
indexSmallLeft = np.argpartition(resLeft**2, posM)[:posM]
indexSmallRight = np.argpartition(resRight**2, posM)[:posM]
#MSE, under the assumption that the population error term has a constant variance, the estimate of that variance is given by MSE, mean square error
#The denominator is the sample size reduced by the number of model parameters estimated from the same data, (n-p) for p regressors or (n-p-1) if an intercept is used.
#In this case, p=1 so the denominator is n-2.
stdResLeft = np.std(resLeft, ddof=2)
stdResRight = np.std(resRight, ddof=2)
stdRes = [stdResLeft*1000, stdResRight*1000]
ret = fres, stdRes, xLeft, yLeft, fitLeft, xRight, yRight, fitRight, indexLargeLeft, indexLargeRight, indexSmallLeft, indexSmallRight, resLeft, resRight, slopeLeft, interceptLeft, slopeRight, interceptRight
return ret
开发者ID:jianglab,项目名称:tomography,代码行数:52,代码来源:tomoThickness.py
示例13: define_toplogy
def define_toplogy(self, num_input, num_hidden, num_output, density):
"""
Defines the topology of the OpenBrain network.
:param num_input:
:param num_hidden:
:param num_output:
:param density:
:return:
"""
topo = networkx.DiGraph(networkx.watts_strogatz_graph(self.num_neurons, 5, density, seed=None)).to_directed()
adjacency_list = topo.adjacency_list()
# Pick the output neurons to be those with highest in degree
in_deg = np.array([topo.in_degree(x) for x,_ in enumerate(adjacency_list)])
self.output_neurons = np.argpartition(in_deg, -num_output)[-num_output:]
print(self.output_neurons)
print([topo.in_degree(x) for x in self.output_neurons])
# Pick the input neurons to be those with highest out degree
out_deg = np.array([topo.out_degree(x) if x not in self.output_neurons else -1
for x,_ in enumerate(adjacency_list)])
self.input_neurons = np.argpartition(out_deg, -num_input)[-num_input:]
# Output neurons do not fire out.
for adjacent_neurons in adjacency_list:
for out_neuron in self.output_neurons:
if out_neuron in adjacent_neurons:
adjacent_neurons.remove(out_neuron)
# Disconnect input -> output
for out in self.output_neurons:
for inp in self.input_neurons:
if out in adjacency_list[inp]: adjacency_list[inp].remove(out)
if inp in adjacency_list[out]: adjacency_list[out].remove(inp)
for i, adjacent in enumerate(adjacency_list):
if i not in self.input_neurons and i not in self.output_neurons:
for n in adjacent:
if i in adjacency_list[n]:
if np.random.rand(1)>0.5:
adjacent.remove(n)
else:
adjacency_list[n].remove(i)
# Let nothing enter the input neurons
for inp in self.input_neurons:
adjacency_list[inp] = []
return adjacency_list
开发者ID:mlberkeley,项目名称:openbrain,代码行数:51,代码来源:brain.py
示例14: sort_by_relative_entropy
def sort_by_relative_entropy(corpus, topicct, stemmer):
# get the right file names for the corpus and count
stemmed_weights = ['wordweights/' + fname for fname in os.listdir('wordweights')
if fname.startswith('{}-{}-{}'.format(corpus, stemmer, topicct))]
unstemmed_weights = ['wordweights/' + fname for fname in os.listdir('wordweights')
if fname.startswith('{}-{}-{}'.format(corpus, UNSTEMMED_NAME, topicct))]
stemmed_corpus_file = 'corpora/{}-train-{}-stopped.txt'.format(corpus, stemmer)
unstemmed_corpus_file = 'corpora/{}-train-{}-stopped.txt'.format(corpus, UNSTEMMED_NAME)
# get the mapping from unstemmed to stemmed words
stemmed_to_unstemmed = defaultdict(set)
unstemmed_counts = Counter()
with open(stemmed_corpus_file) as f, open(unstemmed_corpus_file) as g:
for stemmed_line in f:
stemmed_words = stemmed_line.split()[3:]
unstemmed_words = g.readline().split()[3:]
assert(len(stemmed_words) == len(unstemmed_words))
for uword, sword in zip(unstemmed_words, stemmed_words):
stemmed_to_unstemmed[sword].add(uword)
unstemmed_counts[uword] += 1
# for each file; for each word; get the entropy
stemmed_entropies = defaultdict(list)
unstemmed_entropies = defaultdict(list)
for file in stemmed_weights:
entropy_dict = get_stemmed_entropy_per_word(file)
for k, v in entropy_dict.iteritems():
stemmed_entropies[k].append(v)
for file in unstemmed_weights:
entropy_dict = get_unstemmed_entropy_per_word(file, stemmed_to_unstemmed, int(topicct))
for k, v in entropy_dict.iteritems():
unstemmed_entropies[k].append(v)
# compute difference of average entropies
stemmed_vocab = [sword for sword, uwords in stemmed_to_unstemmed.iteritems() if len(uwords) > 1]
entropy_diffs = np.zeros(len(stemmed_vocab))
for i, sword in enumerate(stemmed_vocab):
entropy_diffs[i] = np.mean(stemmed_entropies[sword]) - np.mean(unstemmed_entropies[sword])
# find top 50 maximum and minimum entropies
min_indices = np.argpartition(entropy_diffs, 50)[:50]
max_indices = np.argpartition(entropy_diffs, -50)[-50:]
with open('wordlists/{}-{}-{}.txt'.format(corpus, stemmer, topicct), 'w') as wf:
wf.write('Lowest entropy differences (stemmed is better)\n')
for i in min_indices:
wf.write('{}\t{}\t{}\n'.format(entropy_diffs[i], stemmed_vocab[i], ' '.join(stemmed_to_unstemmed[stemmed_vocab[i]])))
wf.write('Highest entropy differences (unstemmed is better)\n')
for i in max_indices:
wf.write('{}\t{}\t{}\n'.format(entropy_diffs[i], stemmed_vocab[i], ' '.join(stemmed_to_unstemmed[stemmed_vocab[i]])))
开发者ID:heraldicsandfox,项目名称:stemmers,代码行数:49,代码来源:word_entropy.py
示例15: computeRanks
def computeRanks(composedSpace, observedSpace):
"""Ranks all the representations in the composed space with respect to
the representations in the observed space. Cut-off value 1000"
"""
ranks = {}
rankList = []
composedWords = set(composedSpace.get_id2row())
observedWords = observedSpace.get_id2row()
neighbours = 1000
for w_idx, word in enumerate(composedWords):
vector = composedSpace.get_row(word)
Y = 1 - cdist(vector.mat, observedSpace.get_cooccurrence_matrix().mat, 'cosine')
nearest = Y.argmax()
nearest_k_indices = np.argpartition(Y, tuple([-p for p in range(neighbours)]), axis=None)[-neighbours:]
# pp([(observedWords[idx], Y[0][idx]) for idx in reversed(nearest_k_indices)])
words = [observedWords[idx] for idx in reversed(nearest_k_indices)]
wordRanks = {word:index+1 for index,word in enumerate(words)}
# print(wordRanks)
if (word in wordRanks):
r = wordRanks[word]
ranks[word] = r
rankList.append(r)
else:
ranks[word] = 1000
rankList.append(1000)
if ((w_idx > 0) and (w_idx % 100 == 0)):
print(w_idx)
return rankList, ranks
开发者ID:corinadima,项目名称:gWordcomp,代码行数:34,代码来源:composition_eval.py
示例16: splitBimodal
def splitBimodal(self, x, y, largepoly=30):
p = np.polyfit(x, y, largepoly) # polynomial coefficients for fit
extrema = np.roots(np.polyder(p))
extrema = extrema[np.isreal(extrema)]
extrema = extrema[(extrema - x[1]) * (x[-2] - extrema) > 0] # exclude the endpoints due false maxima during fitting
try:
root_vals = [sum([p[::-1][i]*(root**i) for i in range(len(p))]) for root in extrema]
peaks = extrema[np.argpartition(root_vals, -2)][-2:] # find two peaks of bimodal distribution
mid, = np.where((x - peaks[0])* (peaks[1] - x) > 0)
# want data points between the peaks
except:
warnings.warn("Peak finding failed!")
return None
try:
p_mid = np.polyfit(x[mid], y[mid], 2) # fit middle section to a parabola
midpoint = np.roots(np.polyder(p_mid))[0]
except:
warnings.warn("Polynomial fit between peaks of distribution poorly conditioned. Falling back on using the minimum! May result in inaccurate split determination.")
if len(mid) == 0:
return None
midx = np.argmin(y[mid])
midpoint = x[mid][midx]
return midpoint
开发者ID:j-dr,项目名称:bigbrother,代码行数:28,代码来源:metric.py
示例17: query_with_distances
def query_with_distances(self, v, n):
"""Find indices of `n` most similar vectors from the index to query vector `v`."""
if self._metric == 'hamming':
v = numpy.packbits(v)
if self._metric != 'jaccard':
# use same precision for query as for index
v = numpy.ascontiguousarray(v, dtype = self.index.dtype)
# HACK we ignore query length as that's a constant not affecting the final ordering
if self._metric == 'angular':
# argmax_a cossim(a, b) = argmax_a dot(a, b) / |a||b| = argmin_a -dot(a, b)
dists = -numpy.dot(self.index, v)
elif self._metric == 'euclidean':
# argmin_a (a - b)^2 = argmin_a a^2 - 2ab + b^2 = argmin_a a^2 - 2ab
dists = self.lengths - 2 * numpy.dot(self.index, v)
elif self._metric == 'hamming':
diff = numpy.bitwise_xor(v, self.index)
pc = BruteForceBLAS.popcount
den = float(len(v) * 8)
dists = [sum([pc[part] for part in point]) / den for point in diff]
elif self._metric == 'jaccard':
dists = [pd[self._metric]['distance'](v, e) for e in self.index]
else:
assert False, "invalid metric" # shouldn't get past the constructor!
nearest_indices = numpy.argpartition(dists, n)[:n] # partition-sort by distance, get `n` closest
indices = [idx for idx in nearest_indices if pd[self._metric]["distance_valid"](dists[idx])]
def fix(index):
ep = self.index[index]
ev = v
if self._metric == "hamming":
ep = numpy.unpackbits(ep)
ev = numpy.unpackbits(ev)
return (index, pd[self._metric]['distance'](ep, ev))
return map(fix, indices)
开发者ID:ilyaraz,项目名称:ann-benchmarks,代码行数:35,代码来源:bruteforce.py
示例18: nearest
def nearest(self,wrd,N=10):
wrd_vec_norm=self.w_to_normv(wrd)
if wrd_vec_norm is None:
return
sims=self.vectors.dot(wrd_vec_norm)/self.norm_constants #cosine similarity to all other vecs
#http://stackoverflow.com/questions/6910641/how-to-get-indices-of-n-maximum-values-in-a-numpy-array
return sorted(((sims[idx],self.words[idx]) for idx in numpy.argpartition(sims,-N-1)[-N-1:]), reverse=True)[1:]
开发者ID:fginter,项目名称:wvlib_light,代码行数:7,代码来源:lwvlib.py
示例19: cluster_newsgroups
def cluster_newsgroups():
""" Cluster newsgroup categories. """
from kmeans import KMeans
from similarity import simMatrix
corpus, dictionary = build_dictionary(bigram=True)
tfidf = TFIDF(dictionary)
newsgroups = tfidf.vectorize(corpus)
dictionary = tfidf.dictionary
categories = sorted(corpus.keys())
N = 6
print "\n{}-Most Common Words".format(N)
for index, category in enumerate(categories):
nlargest = np.argpartition(newsgroups[index,:], -N)[-N:]
nlargest = nlargest[np.argsort(newsgroups[index,nlargest])][::-1]
print "{:>24} {}".format(category, dictionary[nlargest])
print
K = 3
km = KMeans(n_clusters=K)
km.fit(newsgroups)
labels = km.labels_
print "\nKMeans Label Assignment, K = {}".format(K)
for category, label, in zip(categories, labels):
print int(label), category
simMatrix(newsgroups).plot().show()
开发者ID:JFanZhao,项目名称:practice,代码行数:32,代码来源:tfidf.py
示例20: argtopk
def argtopk(a_plus_idx, k, axis, keepdims):
""" Chunk and combine function of argtopk
Extract the indices of the k largest elements from a on the given axis.
If k is negative, extract the indices of the -k smallest elements instead.
Note that, unlike in the parent function, the returned elements
are not sorted internally.
"""
assert keepdims is True
axis = axis[0]
if isinstance(a_plus_idx, list):
a_plus_idx = list(flatten(a_plus_idx))
a = np.concatenate([ai for ai, _ in a_plus_idx], axis)
idx = np.concatenate([broadcast_to(idxi, ai.shape)
for ai, idxi in a_plus_idx], axis)
else:
a, idx = a_plus_idx
if abs(k) >= a.shape[axis]:
return a_plus_idx
idx2 = np.argpartition(a, -k, axis=axis)
k_slice = slice(-k, None) if k > 0 else slice(-k)
idx2 = idx2[tuple(k_slice if i == axis else slice(None)
for i in range(a.ndim))]
return take_along_axis(a, idx2, axis), take_along_axis(idx, idx2, axis)
开发者ID:martindurant,项目名称:dask,代码行数:27,代码来源:chunk.py
注:本文中的numpy.argpartition函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论