• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python numpy.argpartition函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中numpy.argpartition函数的典型用法代码示例。如果您正苦于以下问题:Python argpartition函数的具体用法?Python argpartition怎么用?Python argpartition使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了argpartition函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: predict_variance_inf_phase1

def predict_variance_inf_phase1(budget, hum_train_means, temp_train_means, hum_train_vars, temp_train_vars):
    """Method to make predictions based on max-variance active inference."""         
    start_hum = 0
    window_hum = None
    window_temp = None
    i = 0

    hum_preds = np.ones((50, 96))
    temp_preds = np.ones((50, 96))

    for t in global_times:
        if budget > 0:
            window_hum = np.argpartition(hum_train_vars[t], -budget)[-budget:]
            window_temp = np.argpartition(temp_train_vars[t], -budget)[-budget:]
        else:
            window_hum = np.array([])
            window_temp = np.array([])

        hum_pred, temp_pred = makePreds_phase1(window_hum, window_temp, hum_train_means, temp_train_means, i, t)

        hum_preds[:, i] = copy.deepcopy(hum_pred)
        temp_preds[:, i] = copy.deepcopy(temp_pred)
        
        i += 1

    hum_mean_err = mean_absolute_error(hum_test, hum_preds)
    temp_mean_err = mean_absolute_error(temp_test, temp_preds)

    return hum_preds, temp_preds, hum_mean_err, temp_mean_err
开发者ID:ironhide23586,项目名称:Sensor-Network-CS583,代码行数:29,代码来源:Phase3_00.py


示例2: doKNN

def doKNN(k):
	dm = cdist(teXf, trXf,'euclidean')
	cfm = np.zeros((10,10), dtype = int)
	for	a in range(0,len(dm)):
		knn = np.argpartition(dm[a], k)[:k]
		preds = trY[knn]
		counts = np.bincount(preds)
		pred = -1
		if len(counts)>=2:
			top2 = np.argpartition(-counts, 1)	
			if counts[top2[0]] == counts[top2[1]]:
				d = 99999
				for i in xrange(0,len(knn)):
					val = dm[a][i]
					if val < d:
						d = dm[a][i]
						pred = trY[knn[i]]
			else:		
				pred = top2[0]
		else:
			pred = 0
		#print pred
		#mnist.visualize(teX[a])
		cfm[teY[a]][pred] += 1
	#print cfm
	#print "ER: ", 1 - np.sum(np.diagonal(cfm))/np.sum(cfm)
	
	return cfm
开发者ID:ealiasannila,项目名称:iml,代码行数:28,代码来源:p3.py


示例3: precision_test_function

		def precision_test_function(theano_inputs):
			k = 10
			scores1, scores2, c_select, n_used_items = theano_test_function(*theano_inputs)
			ids1 = np.argpartition(-scores1, range(k), axis=-1)[0, :k]
			ids2 = np.argpartition(-scores2, range(k), axis=-1)[0, :k]
			
			return ids1, ids2, c_select, n_used_items
开发者ID:yang-tradelab,项目名称:sequence-based-recommendations,代码行数:7,代码来源:fism_cluster.py


示例4: similarity_matrix

 def similarity_matrix(self):
     """ Calculate the similarity matrix given all samples used for GTM map training
     :return: similarity_matrix: Matrix assessing the similarity between samples used for GTM map training
     """
     print "Calculating similarity matrix..."
     # Find one tenth of the highest and lowest probability distribution values for each sample in the latent space
     sim_size = int(round(self.latent_space_size/10))
     responsibility_indexes = np.zeros((sim_size * 2, self.input_data.shape[0]))
     corr_input = np.zeros((sim_size * 2, self.input_data.shape[0]))
     for i in xrange(0, self.input_data.shape[0]):
         responsibility_indexes[0:sim_size, i] = np.argpartition(self.gtm_responsibility[:, i],
                                                                 -sim_size)[-sim_size:]
         responsibility_indexes[sim_size:, i] = np.argpartition(self.gtm_responsibility[:, i], sim_size)[0:sim_size]
     responsibility_indexes = responsibility_indexes.astype(int)
     # Create correlation input matrix for similarity assessment
     for i in xrange(0, self.input_data.shape[0]):
         corr_input[:, i] = self.gtm_responsibility[responsibility_indexes[:, i], i]
     # Calculate correlation between all samples and build similarity matrix
     similarity_matrix = np.corrcoef(np.transpose(corr_input))
     # Plot heat map of the similarity matrix accordingly
     [x, y] = np.meshgrid(np.linspace(1, self.input_data.shape[0], self.input_data.shape[0]),
                          np.linspace(1, self.input_data.shape[0], self.input_data.shape[0]))
     x = np.ravel(x)
     y = np.ravel(y)
     sim_lat = np.array([x, y])
     print "Plotting color mesh image..."
     plt.pcolormesh(np.reshape(sim_lat[0, :], (self.input_data.shape[0], self.input_data.shape[0])),
                np.reshape(sim_lat[1, :], (self.input_data.shape[0], self.input_data.shape[0])), similarity_matrix,
                cmap='magma', vmin=0, vmax=1)
     plt.colorbar()
     plt.axis([x.min(), x.max(), y.min(), y.max()])
     plt.gca().invert_yaxis()
     return similarity_matrix
开发者ID:mattoescobar,项目名称:Machine-Learning,代码行数:33,代码来源:GTM.py


示例5: local_kmeans_class

def local_kmeans_class(I, L, x, k):
    from scipy.spatial.distance import cdist

    sizex = len(np.atleast_2d(x))
    label = np.zeros((sizex,k))
    for rowsx in range(0, sizex):
        tic()
        dists = cdist(I, np.atleast_2d(x[rowsx]), metric='euclidean')
        toc()
        center = np.zeros((10,k,28*28))
        label_order = np.unique(L)
        l=0
        tic()
        thing = np.zeros((k,28*28))
        for labs in np.unique(L):
            indices = L == labs
            k_smallest = np.argpartition(dists[indices],tuple(range(1,k)),axis=None)
            for i in range(0,k):
                M = I[indices]
                #center[l,i,:] = np.average(M[k_smallest[:i+1]],axis = 0)
                if i == 0:
                    thing[i] = M[k_smallest[i+1]]
                else:
                    thing[i] = thing[i-1] + M[k_smallest[i+1]]
            center[l,:,:] = np.divide(thing,np.repeat(np.arange(1,11).reshape(10,1),28*28,axis=1))
            l+=1
        toc()
        for i in range(k):
            #print(cdist(center[:,i,:], np.atleast_2d(x[rowsx]), metric='euclidean'))
            dists2center = cdist(center[:,i,:], np.atleast_2d(x[rowsx]), metric='euclidean')
            k_smallest = np.argpartition(dists2center,tuple(range(1)),axis=None)
            label[rowsx,i] = label_order[k_smallest[0]]
    return label
开发者ID:AndrewZastovnik,项目名称:Math-285-hw2,代码行数:33,代码来源:KNN.py


示例6: branch_to_nodes

    def branch_to_nodes(self, wt, completion):
        """
        Decide which nodes to branch to next
        """
        missing_edges = HGT.get_missing_edges(completion) # Obtain the missing edge sparse list

        nb = self.strat.node_brancher
        
        # Determine if there is a maximum count
        count_max = min(self.strat.max_node_branch, self.num_nodes)
        
        if nb is None or not 'name' in nb: # Default
            # Gets nodes that contribute to missing edge
            edge = missing_edges.indices[0] # Grab any next edge
            node_indices = self.H[:, edge].indices
        elif nb['name'] == 'greedy' or nb['name'] == 'long':
            # Gets the nodes that overlap the most(least) with what's missing
            overlap = self.H.dot(missing_edges.T)
            # k = min(count_max + wt.nnz, overlap.nnz)
            k = min(count_max, overlap.nnz)
            if k >= self.num_nodes or k == overlap.nnz:
                if nb['name'] == 'greedy':
                    alg_slice = np.argsort(overlap.data)[::-1]
                else: # long
                    alg_slice = np.argsort(overlap.data)
            else: # Else be smart, don't perform O(nlogn) operations, perform O(k) operations
                if nb['name'] == 'greedy':
                    alg_slice = np.argpartition(overlap.data, -k)[-k:]
                else: #long
                    alg_slice = np.argpartition(overlap.data, k)[:k]
            node_indices = overlap.indices[alg_slice]
        elif nb['name'] == 'random':
            # Gets nodes that contribute to random missing edge
            edge = np.random.choice(missing_edges.indices) # Grab any next edge
            node_indices = self.H[:, edge].indices
        elif nb['name'] == 'diverse':
            # Diversify the kinds of transversals that have been found
            if wt.nnz == 0: # Just starting out
                node_indices = np.arange(self.num_nodes) # Branch to everything
            else: # Otherwise be greedy up to one
                # edge = missing_edges.indices[0] # Grab any next edge
                # node_indices = [self.H[:, edge].indices[0]]
                # overlap = self.H.dot(missing_edges.T)
                # node_indices = [overlap.indices[np.argmax(overlap.data)]]
                scaled_overlap = overlap.data / (self.node_weights[overlap.indices]**2)
                node_indices = overlap.indices[np.where(np.max(scaled_overlap) == scaled_overlap)]
        else:
            raise ValueError("Invalid strat.node_brancher: {0}".format(self.strat.node_brancher))
        
        if nb is not None and bool(nb.get('shuffle', False)):
            np.random.shuffle(node_indices)
        
        count = 0
        for i in node_indices:
            if count >= count_max:
                break
            if not wt[i, 0] > 0: # not already part of working transversal
                self.log('Branching to node:', i)
                count += 1
                yield i
开发者ID:tcfraser,项目名称:quantum_tools,代码行数:60,代码来源:hypergraph_transversals.py


示例7: construct_initial_solution

 def construct_initial_solution(self):
   ind = np.argpartition(self.collaboration_coo.data, -len(self.villains_team))[-len(self.villains_team):]
   inc = 1
   while len(np.unique(self.collaboration_coo.row[ind])) < len(self.villains_team):
     ind = np.argpartition(self.collaboration_coo.data, -(len(self.villains_team) + inc))[-(len(self.villains_team) + inc):]
     inc += 1
   heroes_team = self.heroes.loc[self.heroes[CHARACTER_ID].isin(self.collaboration_coo.row[ind])]
   return heroes_team
开发者ID:brunogsa,项目名称:tabu,代码行数:8,代码来源:marvel_tabu.py


示例8: _get_k_max_elements_indices_and_scores

 def _get_k_max_elements_indices_and_scores(vec, k, mask=None):
     if mask is None:
         # We use argpartition here instead of argsort to achieve linear-time performance.
         max_elements_indices = np.argpartition(-vec, k - 1)[:k]
     else:
         masked_vec = vec.copy()  # To avoid side-effects
         masked_vec[~mask] = -np.inf
         max_elements_indices = np.argpartition(-masked_vec, k - 1)[:k]
     return max_elements_indices, vec[max_elements_indices]
开发者ID:Allensmile,项目名称:cakechat,代码行数:9,代码来源:beamsearch.py


示例9: similarityPlot

def similarityPlot():
	import matplotlib.pyplot as plt
	from matplotlib import rcParams
	tfidf_vectorizer = TfidfVectorizer(min_df=1)
	names = friendsAboveMinNumMessages(200) + [me]
	data = []
	words = [] #ordering of words in tf_idf matrix
	wordsSet = set() #for faster lookup
	nameSet = set()
	for person in personDict:
		for name in person.split():
			nameSet.add(name)
			nameSet.add(name.lower())
	for i in range(len(names)):
		data.append(getAllMessagesAsString(names[i], False))
	tfidf_matrix = tfidf_vectorizer.fit_transform(data)
	featureNames = tfidf_vectorizer.get_feature_names()
	tfidf_arr = tfidf_matrix.toarray()
	for j in range(len(tfidf_arr[0])):
		word = tfidf_arr[0][j]
		if word not in wordsSet:
			words.append(word)
			wordsSet.add(j)
	#nmds = manifold.MDS(metric = True, n_components = N_DISTINGUISHING_FEATURES) 
	#npos = nmds.fit_transform(tfidf_matrix.toarray())
	clf = PCA(n_components=2)
	npos = clf.fit_transform(tfidf_arr)
	plt.scatter(npos[:, 0], npos[:, 1], marker = 'o', c = 'b', cmap = plt.get_cmap('Spectral')) #change colors
	for name, x, y in zip(names, npos[:, 0], npos[:, 1]):
		plt.annotate(
			name, 
			xy = (x, y), xytext = (-20, 20),
			textcoords = 'offset points', ha = 'right', va = 'bottom',
			bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5),
			arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'))
	fig, ax = plt.subplots()
	ax2 = ax.twinx()
	xAxisP = [featureNames[i] for i in np.argpartition(clf.components_[0], -50)[-50:] if featureNames[i] not in nameSet]
	yAxisP = [featureNames[i] for i in np.argpartition(clf.components_[1], -50)[-50:] if featureNames[i] not in nameSet]
	xAxisN = [featureNames[i] for i in np.argpartition(-clf.components_[0], -50)[-50:] if featureNames[i] not in nameSet]
	yAxisN = [featureNames[i] for i in np.argpartition(-clf.components_[1], -50)[-50:] if featureNames[i] not in nameSet]
	ax.set_xlabel("Most Postively influential words along x axis:\n" + ", ".join(xAxisP), fontsize=18)
	ax.set_ylabel("Most Postively influential words along y axis:\n" + ", ".join(yAxisP), fontsize=18)
	ax2.set_xlabel("Most Negatively influential words along x axis:\n" + ", ".join(xAxisN), fontsize=18)
	ax2.set_ylabel("Most Negatively influential words along y axis:\n" + ", ".join(yAxisN), fontsize=18)
	# xAxis = [featureNames[i] for i in np.argpartition(np.absolute(clf.components_[0]), -50)[-50:] if featureNames[i] not in nameSet]
	# yAxis = [featureNames[i] for i in np.argpartition(np.absolute(clf.components_[1]), -50)[-50:] if featureNames[i] not in nameSet]
	# for i in range(1, max(len(xAxis), len(yAxis)) ):
	# 	if i % 20 == 0 and i < len(xAxis):
	# 		xAxis[i] += "\n"
	# 	if i % 15 == 0 and i < len(yAxis):
	# 		yAxis[i] += "\n"
	# plt.xlabel("Most influential words along x axis:\n" + ", ".join(xAxis), fontsize=18)
	# plt.ylabel("Most influential words along y axis:\n" + ", ".join(yAxis), fontsize=18)
	rcParams.update({'figure.autolayout': True})
	plt.suptitle("Word-Usage Similarity Scatterplot", fontsize = 24, fontweight = 'bold')
	plt.show()
开发者ID:ctbrennan,项目名称:cross-platform-message-analytics,代码行数:57,代码来源:parse_analyze.py


示例10: _phase2

	def _phase2(self):
		"""
		Execute phase 2 of the SP region. This phase is used to compute the
		active columns.
		
		Note - This should only be called after phase 1 has been called and
		after the inhibition radius and neighborhood have been updated.
		"""
		
		# Shift the outputs
		self.y[:, 1:] = self.y[:, :-1]
		self.y[:, 0] = 0
		
		# Calculate k
		#   - For a column to be active its overlap must be at least as large
		#     as the overlap of the k-th largest column in its neighborhood.
		k = self._get_num_cols()
		
		if self.global_inhibition:
			# The neighborhood is all columns, thus the set of active columns
			# is simply columns that have an overlap >= the k-th largest in the
			# entire region
			
			# Compute the winning column indexes
			if self.learn:				
				# Randomly break ties
				ix = np.argpartition(-self.overlap[:, 0] -
					self.prng.uniform(.1, .2, self.ncolumns), k - 1)[:k]
			else:
				# Choose the same set of columns each time
				ix = np.argpartition(-self.overlap[:, 0], k - 1)[:k]
			
			# Set the active columns
			self.y[ix, 0] = self.overlap[ix, 0] > 0
		else:
			# The neighborhood is bounded by the inhibition radius, therefore
			# each column's neighborhood must be considered
			
			for i in xrange(self.ncolumns):
				# Get the neighbors
				ix = np.where(self.neighbors[i])[0]
				
				# Compute the minimum top overlap
				if ix.shape[0] <= k:
					# Desired number of candidates is at or below the desired
					# activity level, so find the overall min
					m = max(bn.nanmin(self.overlap[ix, 0]), 1)
				else:
					# Desired number of candidates is above the desired
					# activity level, so find the k-th largest
					m = max(-np.partition(-self.overlap[ix, 0], k - 1)[k - 1],
						1)
				
				# Set the column activity
				if self.overlap[i, 0] >= m: self.y[i, 0] = True
开发者ID:johnrobinsn,项目名称:mHTM,代码行数:55,代码来源:region.py


示例11: _build_recursive

        def _build_recursive(indices, level=0, split_index=0):
            """
            Descend recursively into tree to build it, setting splits and
            returning indices for leaves

            :param indices: The current set of indices before partitioning
            :param level: The level in the tree
            :param split_index: The index of the split to set

            :return: A list of arrays representing leaf membership
            :rtype: list[np.ndarray]
            """
            # If we're at the bottom, no split, just return the set
            if level == self._depth:
                return [indices]

            n = indices.size
            # If we literally don't have enough to populate the leaf, make it
            # empty
            if n < 1:
                return []

            # Get the random projections for these indices at this level
            # NB: Recall that the projection matrix has shape (levels, N)
            level_proj = proj[indices, level]

            # Split at the median if even, put median in upper half if not
            n_split = n // 2
            if n % 2 == 0:
                part_indices = np.argpartition(
                    level_proj, (n_split - 1, n_split))
                split_val = level_proj[part_indices[n_split - 1]]
                split_val += level_proj[part_indices[n_split]]
                split_val /= 2.0
            else:
                part_indices = np.argpartition(level_proj, n_split)
                split_val = level_proj[part_indices[n_split]]

            splits[split_index] = split_val

            # part_indices is relative to this block of values, recover
            # main indices
            left_indices = indices[part_indices[:n_split]]
            right_indices = indices[part_indices[n_split:]]

            # Descend into each split and get sub-splits
            left_out = _build_recursive(left_indices, level=level + 1,
                                        split_index=2 * split_index + 1)
            right_out = _build_recursive(right_indices, level=level + 1,
                                         split_index=2 * split_index + 2)

            # Assemble index set
            left_out.extend(right_out)
            return left_out
开发者ID:Kitware,项目名称:SMQTK,代码行数:54,代码来源:mrpt.py


示例12: fitOneLinearRegression

def fitOneLinearRegression(thetaLinear, IntensityLinear, tiltanglesArray, options):
	if (len(tiltanglesArray)%2 == 1):
		halfN = int(len(tiltanglesArray)/2) + 1
		xLeft, yLeft = thetaLinear[0:halfN], IntensityLinear[0:halfN]
		xRight, yRight = thetaLinear[halfN-1:], IntensityLinear[halfN-1:]
		
	else:
		halfN = int(len(tiltanglesArray)/2)
		xLeft, yLeft = thetaLinear[0:halfN], IntensityLinear[0:halfN]
		xRight, yRight = thetaLinear[halfN:], IntensityLinear[halfN:]
	
	slopeLeft, interceptLeft, r2Left = linearRegression(xLeft, yLeft)
        slopeRight, interceptRight, r2Right = linearRegression(xRight, yRight)
	
	assert(len(xLeft)==len(xRight))
	
	fitLeft = slopeLeft*xLeft + interceptLeft
        fitRight = slopeRight*xRight + interceptRight
        
        #the sum of squared residuals
        resLeft = yLeft - fitLeft
	resLeft = resLeft / fitLeft
	#print "resLeft", resLeft
        resRight = yRight - fitRight
	resRight = resRight / fitRight
	#print "resRight", resRight
	
	fresLeft = sum(resLeft**2)
        fresRight = sum(resRight**2)
	fres = [fresLeft*1000000, fresRight*1000000]

	#find the points with the largest 3 residuals in left and right branches, use numpy.argpartition
	#N = options.largestNRes
	N=3
        negN = (-1)*N
        indexLargeLeft = np.argpartition(resLeft**2, negN)[negN:]
        indexLargeRight = np.argpartition(resRight**2, negN)[negN:]
	
	M=3
	#M = options.smallestNRes
	posM = M
	indexSmallLeft = np.argpartition(resLeft**2, posM)[:posM]
	indexSmallRight = np.argpartition(resRight**2, posM)[:posM]
	
        #MSE, under the assumption that the population error term has a constant variance, the estimate of that variance is given by MSE, mean square error
        #The denominator is the sample size reduced by the number of model parameters estimated from the same data, (n-p) for p regressors or (n-p-1) if an intercept is used.
        #In this case, p=1 so the denominator is n-2.
        stdResLeft = np.std(resLeft, ddof=2)
        stdResRight = np.std(resRight, ddof=2)
	stdRes = [stdResLeft*1000, stdResRight*1000]
	ret = fres, stdRes, xLeft, yLeft, fitLeft, xRight, yRight, fitRight, indexLargeLeft, indexLargeRight, indexSmallLeft, indexSmallRight, resLeft, resRight, slopeLeft, interceptLeft, slopeRight, interceptRight
	return ret
开发者ID:jianglab,项目名称:tomography,代码行数:52,代码来源:tomoThickness.py


示例13: define_toplogy

    def define_toplogy(self, num_input, num_hidden,  num_output, density):
        """
        Defines the topology of the OpenBrain network.
        :param num_input:
        :param num_hidden:
        :param num_output:
        :param density:
        :return:
        """
        topo = networkx.DiGraph(networkx.watts_strogatz_graph(self.num_neurons, 5, density, seed=None)).to_directed()
        adjacency_list = topo.adjacency_list()


        # Pick the output neurons to be those with highest in degree
        in_deg = np.array([topo.in_degree(x) for x,_ in enumerate(adjacency_list)])
        self.output_neurons = np.argpartition(in_deg, -num_output)[-num_output:]
        print(self.output_neurons)
        print([topo.in_degree(x) for x in self.output_neurons])

        # Pick the input neurons to be those with highest out degree
        out_deg = np.array([topo.out_degree(x) if x not in self.output_neurons else -1
                            for x,_ in enumerate(adjacency_list)])
        self.input_neurons = np.argpartition(out_deg, -num_input)[-num_input:]

        # Output neurons do not fire out.
        for adjacent_neurons in adjacency_list:
            for out_neuron in self.output_neurons:
                if out_neuron in adjacent_neurons:
                    adjacent_neurons.remove(out_neuron)

        # Disconnect input -> output
        for out in self.output_neurons:
            for inp in self.input_neurons:
                if out in adjacency_list[inp]: adjacency_list[inp].remove(out)
                if inp in adjacency_list[out]: adjacency_list[out].remove(inp)


        for i, adjacent in enumerate(adjacency_list):
            if i not in self.input_neurons and i not in self.output_neurons:
                for n in adjacent:
                    if i in adjacency_list[n]:
                        if np.random.rand(1)>0.5:
                            adjacent.remove(n)
                        else:
                            adjacency_list[n].remove(i)

        # Let nothing enter the input neurons
        for inp in self.input_neurons:
            adjacency_list[inp] = []

        return adjacency_list
开发者ID:mlberkeley,项目名称:openbrain,代码行数:51,代码来源:brain.py


示例14: sort_by_relative_entropy

def sort_by_relative_entropy(corpus, topicct, stemmer):
    # get the right file names for the corpus and count
    stemmed_weights = ['wordweights/' + fname for fname in os.listdir('wordweights')
            if fname.startswith('{}-{}-{}'.format(corpus, stemmer, topicct))]
    unstemmed_weights = ['wordweights/' + fname for fname in os.listdir('wordweights')
            if fname.startswith('{}-{}-{}'.format(corpus, UNSTEMMED_NAME, topicct))]
    stemmed_corpus_file = 'corpora/{}-train-{}-stopped.txt'.format(corpus, stemmer)
    unstemmed_corpus_file = 'corpora/{}-train-{}-stopped.txt'.format(corpus, UNSTEMMED_NAME)

    # get the mapping from unstemmed to stemmed words
    stemmed_to_unstemmed = defaultdict(set)
    unstemmed_counts = Counter()
    with open(stemmed_corpus_file) as f, open(unstemmed_corpus_file) as g:
        for stemmed_line in f:
            stemmed_words = stemmed_line.split()[3:]
            unstemmed_words = g.readline().split()[3:]
            assert(len(stemmed_words) == len(unstemmed_words))
            for uword, sword in zip(unstemmed_words, stemmed_words):
                stemmed_to_unstemmed[sword].add(uword)
                unstemmed_counts[uword] += 1

    # for each file; for each word; get the entropy
    stemmed_entropies = defaultdict(list)
    unstemmed_entropies = defaultdict(list)
    for file in stemmed_weights:
        entropy_dict = get_stemmed_entropy_per_word(file)
        for k, v in entropy_dict.iteritems():
            stemmed_entropies[k].append(v)
    for file in unstemmed_weights:
        entropy_dict = get_unstemmed_entropy_per_word(file, stemmed_to_unstemmed, int(topicct))
        for k, v in entropy_dict.iteritems():
            unstemmed_entropies[k].append(v)

    # compute difference of average entropies
    stemmed_vocab = [sword for sword, uwords in stemmed_to_unstemmed.iteritems() if len(uwords) > 1]
    entropy_diffs = np.zeros(len(stemmed_vocab))
    for i, sword in enumerate(stemmed_vocab):
        entropy_diffs[i] = np.mean(stemmed_entropies[sword]) - np.mean(unstemmed_entropies[sword])

    # find top 50 maximum and minimum entropies
    min_indices = np.argpartition(entropy_diffs, 50)[:50]
    max_indices = np.argpartition(entropy_diffs, -50)[-50:]
    with open('wordlists/{}-{}-{}.txt'.format(corpus, stemmer, topicct), 'w') as wf:
        wf.write('Lowest entropy differences (stemmed is better)\n')
        for i in min_indices:
            wf.write('{}\t{}\t{}\n'.format(entropy_diffs[i], stemmed_vocab[i], ' '.join(stemmed_to_unstemmed[stemmed_vocab[i]])))
        wf.write('Highest entropy differences (unstemmed is better)\n')
        for i in max_indices:
            wf.write('{}\t{}\t{}\n'.format(entropy_diffs[i], stemmed_vocab[i], ' '.join(stemmed_to_unstemmed[stemmed_vocab[i]])))
开发者ID:heraldicsandfox,项目名称:stemmers,代码行数:49,代码来源:word_entropy.py


示例15: computeRanks

def computeRanks(composedSpace, observedSpace):
    """Ranks all the representations in the composed space with respect to 
    the representations in the observed space. Cut-off value 1000"
    """
    ranks = {}
    rankList = []

    composedWords = set(composedSpace.get_id2row())
    observedWords = observedSpace.get_id2row()
    neighbours = 1000

    for w_idx, word in enumerate(composedWords):
        vector = composedSpace.get_row(word)
        Y = 1 - cdist(vector.mat, observedSpace.get_cooccurrence_matrix().mat, 'cosine')
        nearest = Y.argmax()
        nearest_k_indices = np.argpartition(Y, tuple([-p for p in range(neighbours)]), axis=None)[-neighbours:]
        # pp([(observedWords[idx], Y[0][idx]) for idx in reversed(nearest_k_indices)])
        words = [observedWords[idx] for idx in reversed(nearest_k_indices)]
        wordRanks = {word:index+1 for index,word in enumerate(words)}
        # print(wordRanks)

        if (word in wordRanks):
            r = wordRanks[word]
            ranks[word] = r
            rankList.append(r)

        else:
            ranks[word] = 1000
            rankList.append(1000)

        if ((w_idx > 0) and (w_idx % 100 == 0)):
            print(w_idx)

    return rankList, ranks
开发者ID:corinadima,项目名称:gWordcomp,代码行数:34,代码来源:composition_eval.py


示例16: splitBimodal

    def splitBimodal(self, x, y, largepoly=30):
        p = np.polyfit(x, y, largepoly) # polynomial coefficients for fit

        extrema = np.roots(np.polyder(p))
        extrema = extrema[np.isreal(extrema)]
        extrema = extrema[(extrema - x[1]) * (x[-2] - extrema) > 0] # exclude the endpoints due false maxima during fitting
        try:
            root_vals = [sum([p[::-1][i]*(root**i) for i in range(len(p))]) for root in extrema]
            peaks = extrema[np.argpartition(root_vals, -2)][-2:] # find two peaks of bimodal distribution

            mid, = np.where((x - peaks[0])* (peaks[1] - x) > 0)
             # want data points between the peaks
        except:
            warnings.warn("Peak finding failed!")
            return None

        try:
            p_mid = np.polyfit(x[mid], y[mid], 2) # fit middle section to a parabola
            midpoint = np.roots(np.polyder(p_mid))[0]
        except:
            warnings.warn("Polynomial fit between peaks of distribution poorly conditioned. Falling back on using the minimum! May result in inaccurate split determination.")
            if len(mid) == 0:
                return None

            midx = np.argmin(y[mid])
            midpoint = x[mid][midx]

        return midpoint
开发者ID:j-dr,项目名称:bigbrother,代码行数:28,代码来源:metric.py


示例17: query_with_distances

    def query_with_distances(self, v, n):
        """Find indices of `n` most similar vectors from the index to query vector `v`."""
        if self._metric == 'hamming':
            v = numpy.packbits(v)

        if self._metric != 'jaccard':
            # use same precision for query as for index
            v = numpy.ascontiguousarray(v, dtype = self.index.dtype)

        # HACK we ignore query length as that's a constant not affecting the final ordering
        if self._metric == 'angular':
            # argmax_a cossim(a, b) = argmax_a dot(a, b) / |a||b| = argmin_a -dot(a, b)
            dists = -numpy.dot(self.index, v)
        elif self._metric == 'euclidean':
            # argmin_a (a - b)^2 = argmin_a a^2 - 2ab + b^2 = argmin_a a^2 - 2ab
            dists = self.lengths - 2 * numpy.dot(self.index, v)
        elif self._metric == 'hamming':
            diff = numpy.bitwise_xor(v, self.index)
            pc = BruteForceBLAS.popcount
            den = float(len(v) * 8)
            dists = [sum([pc[part] for part in point]) / den for point in diff]
        elif self._metric == 'jaccard':
            dists = [pd[self._metric]['distance'](v, e) for e in self.index]
        else:
            assert False, "invalid metric"  # shouldn't get past the constructor!
        nearest_indices = numpy.argpartition(dists, n)[:n]  # partition-sort by distance, get `n` closest
        indices = [idx for idx in nearest_indices if pd[self._metric]["distance_valid"](dists[idx])]
        def fix(index):
            ep = self.index[index]
            ev = v
            if self._metric == "hamming":
                ep = numpy.unpackbits(ep)
                ev = numpy.unpackbits(ev)
            return (index, pd[self._metric]['distance'](ep, ev))
        return map(fix, indices)
开发者ID:ilyaraz,项目名称:ann-benchmarks,代码行数:35,代码来源:bruteforce.py


示例18: nearest

 def nearest(self,wrd,N=10):
     wrd_vec_norm=self.w_to_normv(wrd)
     if wrd_vec_norm is None:
         return
     sims=self.vectors.dot(wrd_vec_norm)/self.norm_constants #cosine similarity to all other vecs
     #http://stackoverflow.com/questions/6910641/how-to-get-indices-of-n-maximum-values-in-a-numpy-array
     return sorted(((sims[idx],self.words[idx]) for idx in numpy.argpartition(sims,-N-1)[-N-1:]), reverse=True)[1:]
开发者ID:fginter,项目名称:wvlib_light,代码行数:7,代码来源:lwvlib.py


示例19: cluster_newsgroups

def cluster_newsgroups():
    """ Cluster newsgroup categories. """

    from kmeans import KMeans
    from similarity import simMatrix

    corpus, dictionary = build_dictionary(bigram=True)
    tfidf = TFIDF(dictionary)
    newsgroups = tfidf.vectorize(corpus)
    dictionary = tfidf.dictionary

    categories = sorted(corpus.keys())

    N = 6
    print "\n{}-Most Common Words".format(N)
    for index, category in enumerate(categories):
        nlargest = np.argpartition(newsgroups[index,:], -N)[-N:]
        nlargest = nlargest[np.argsort(newsgroups[index,nlargest])][::-1]
        print "{:>24} {}".format(category, dictionary[nlargest])
    print

    K = 3
    km = KMeans(n_clusters=K)
    km.fit(newsgroups)

    labels = km.labels_

    print "\nKMeans Label Assignment, K = {}".format(K)
    for category, label, in zip(categories, labels):
        print int(label), category

    simMatrix(newsgroups).plot().show()
开发者ID:JFanZhao,项目名称:practice,代码行数:32,代码来源:tfidf.py


示例20: argtopk

def argtopk(a_plus_idx, k, axis, keepdims):
    """ Chunk and combine function of argtopk

    Extract the indices of the k largest elements from a on the given axis.
    If k is negative, extract the indices of the -k smallest elements instead.
    Note that, unlike in the parent function, the returned elements
    are not sorted internally.
    """
    assert keepdims is True
    axis = axis[0]

    if isinstance(a_plus_idx, list):
        a_plus_idx = list(flatten(a_plus_idx))
        a = np.concatenate([ai for ai, _ in a_plus_idx], axis)
        idx = np.concatenate([broadcast_to(idxi, ai.shape)
                              for ai, idxi in a_plus_idx], axis)
    else:
        a, idx = a_plus_idx

    if abs(k) >= a.shape[axis]:
        return a_plus_idx

    idx2 = np.argpartition(a, -k, axis=axis)
    k_slice = slice(-k, None) if k > 0 else slice(-k)
    idx2 = idx2[tuple(k_slice if i == axis else slice(None)
                      for i in range(a.ndim))]
    return take_along_axis(a, idx2, axis), take_along_axis(idx, idx2, axis)
开发者ID:martindurant,项目名称:dask,代码行数:27,代码来源:chunk.py



注:本文中的numpy.argpartition函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python numpy.argsort函数代码示例发布时间:2022-05-27
下一篇:
Python numpy.argmin函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap