• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python pairwise.cosine_distances函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.metrics.pairwise.cosine_distances函数的典型用法代码示例。如果您正苦于以下问题:Python cosine_distances函数的具体用法?Python cosine_distances怎么用?Python cosine_distances使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了cosine_distances函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_cosine_distances

def test_cosine_distances():
    # Check the pairwise Cosine distances computation
    rng = np.random.RandomState(1337)
    x = np.abs(rng.rand(910))
    XA = np.vstack([x, x])
    D = cosine_distances(XA)
    assert_array_almost_equal(D, [[0., 0.], [0., 0.]])
    # check that all elements are in [0, 2]
    assert np.all(D >= 0.)
    assert np.all(D <= 2.)
    # check that diagonal elements are equal to 0
    assert_array_almost_equal(D[np.diag_indices_from(D)], [0., 0.])

    XB = np.vstack([x, -x])
    D2 = cosine_distances(XB)
    # check that all elements are in [0, 2]
    assert np.all(D2 >= 0.)
    assert np.all(D2 <= 2.)
    # check that diagonal elements are equal to 0 and non diagonal to 2
    assert_array_almost_equal(D2, [[0., 2.], [2., 0.]])

    # check large random matrix
    X = np.abs(rng.rand(1000, 5000))
    D = cosine_distances(X)
    # check that diagonal elements are equal to 0
    assert_array_almost_equal(D[np.diag_indices_from(D)], [0.] * D.shape[0])
    assert np.all(D >= 0.)
    assert np.all(D <= 2.)
开发者ID:scikit-learn,项目名称:scikit-learn,代码行数:28,代码来源:test_pairwise.py


示例2: get_features

    def get_features(vocab):
        vectorizer_head = TfidfVectorizer(vocabulary=vocab, use_idf=False, norm='l2')
        X_train_head = vectorizer_head.fit_transform(headlines)

        vectorizer_body = TfidfVectorizer(vocabulary=vocab, use_idf=False, norm='l2')
        X_train_body = vectorizer_body.fit_transform(bodies)

        # calculates n most important topics of the bodies. Each topic contains all words but ordered by importance. The
        # more important topic words a body contains of a certain topic, the higher its value for this topic
        lda_body = LatentDirichletAllocation(n_topics=n_topics, learning_method='online', random_state=0, n_jobs=3)

        print("latent_dirichlet_allocation_cos: fit and transform body")
        t0 = time()
        lda_body_matrix = lda_body.fit_transform(X_train_body)
        print("done in %0.3fs." % (time() - t0))

        print("latent_dirichlet_allocation_cos: transform head")
        # use the lda trained for body topcis on the headlines => if the headlines and bodies share topics
        # their vectors should be similar
        lda_head_matrix = lda_body.transform(X_train_head)

        #print_top_words(lda_body, vectorizer_body.get_feature_names(), 100)

        print('latent_dirichlet_allocation_cos: calculating cosine distance between head and body')
        # calculate cosine distance between the body and head
        X = []
        for i in range(len(lda_head_matrix)):
            X_head_vector = np.array(lda_head_matrix[i]).reshape((1, -1)) #1d array is deprecated
            X_body_vector = np.array(lda_body_matrix[i]).reshape((1, -1))
            cos_dist = cosine_distances(X_head_vector, X_body_vector).flatten()
            X.append(cos_dist.tolist())
        return X
开发者ID:paris5020,项目名称:athene_system,代码行数:32,代码来源:topic_models.py


示例3: sumACluster

def sumACluster(dist, vecsIn, topK_t, sameTweetThred):
    if dist == "cosine":
        distMatrix = pairwise.cosine_distances(vecsIn)
    elif dist == "eu":
        distMatrix = pairwise.euclidean_distances(vecsIn, vecsIn)

    sameTweetClusters = [[0]]
    for seqid, text in enumerate(vecsIn[1:], start=1):
        added = None
        for stcid, stc in enumerate(sameTweetClusters):
            sameFlag = False
            if distMatrix[seqid][stc[0]] <= sameTweetThred:
                sameFlag = True

            if sameFlag:
                stc.append(seqid)
                added = (stcid, stc)
                break
        if added is None:
            sameTweetClusters.append([seqid])
        else:
            sameTweetClusters[added[0]] = added[1]
    sameTweetClusterNum = [(stcid, len(stc)) for stcid, stc in enumerate(sameTweetClusters)]
    numIn = len(sameTweetClusterNum)
    top = sorted(sameTweetClusterNum, key = lambda a:a[1], reverse=True)[:min(topK_t, numIn)]
    top = [(sameTweetClusters[item[0]][0], item[1]) for item in top]
    return top
开发者ID:qolina,项目名称:DBED,代码行数:27,代码来源:tweetClustering.py


示例4: test_linkage_misc

def test_linkage_misc():
    # Misc tests on linkage
    X = np.ones((5, 5))
    assert_raises(ValueError,
                  AgglomerativeClustering(linkage='foobar').fit,
                  X)
    assert_raises(ValueError, linkage_tree, X, linkage='foobar')
    assert_raises(ValueError, linkage_tree, X, connectivity=np.ones((4, 4)))

    # Smoke test FeatureAgglomeration
    FeatureAgglomeration().fit(X)

    with warnings.catch_warnings(record=True) as warning_list:
        warnings.simplefilter("always", DeprecationWarning)
        # Use the copy argument, to raise a warning
        Ward(copy=True).fit(X)
    # We should be getting 2 warnings: one for using Ward that is
    # deprecated, one for using the copy argument
    assert_equal(len(warning_list), 2)

    # test hiearchical clustering on a precomputed distances matrix
    dis = cosine_distances(X)
    res = linkage_tree(dis, affinity="precomputed")
    assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0])

    # test hiearchical clustering on a precomputed distances matrix
    res = linkage_tree(X, affinity=manhattan_distances)
    assert_array_equal(res[0], linkage_tree(X, affinity="manhattan")[0])
开发者ID:Arezou1,项目名称:scikit-learn,代码行数:28,代码来源:test_hierarchical.py


示例5: test_linkage_misc

def test_linkage_misc():
    # Misc tests on linkage
    rnd = np.random.RandomState(42)
    X = rnd.normal(size=(5, 5))
    assert_raises(ValueError, AgglomerativeClustering(linkage='foo').fit, X)
    assert_raises(ValueError, linkage_tree, X, linkage='foo')
    assert_raises(ValueError, linkage_tree, X, connectivity=np.ones((4, 4)))

    # Smoke test FeatureAgglomeration
    FeatureAgglomeration().fit(X)

    # Deprecation of Ward class
    with warnings.catch_warnings(record=True) as warning_list:
        warnings.simplefilter("always", DeprecationWarning)
        Ward().fit(X)
    assert_equal(len(warning_list), 1)

    # test hiearchical clustering on a precomputed distances matrix
    dis = cosine_distances(X)
    res = linkage_tree(dis, affinity="precomputed")
    assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0])

    # test hiearchical clustering on a precomputed distances matrix
    res = linkage_tree(X, affinity=manhattan_distances)
    assert_array_equal(res[0], linkage_tree(X, affinity="manhattan")[0])
开发者ID:MatteoFu,项目名称:scikit-learn,代码行数:25,代码来源:test_hierarchical.py


示例6: getModelInfo

def getModelInfo(model, features):
    print("Shape of the transformed features = {}".format(features.shape))
    # Uncomment to info:
    # vocab = model.get_feature_names()
    # dist = np.sum(features, axis=0)
    # for tag, count in izip(vocab, dist):
    #     print("word = {}, frequency = {}".format(tag, count))
    return cosine_distances(features)
开发者ID:geekman2,项目名称:GutenTag,代码行数:8,代码来源:sklearn_model.py


示例7: _build_metastore

    def _build_metastore(self):

        medians = np.median(self.X, axis=0).reshape(1, self.dim)

        # how far each data point is from the global median
        dists = cosine_distances(self.X, Y=medians).reshape(-1)

        sorted_index = [self.index[i] for i in dists.argsort()]

        return {'sorted_index': sorted_index}
开发者ID:ashishyadavppe,项目名称:Skater,代码行数:10,代码来源:datamanager.py


示例8: calcurate_centroid_Matrix

def calcurate_centroid_Matrix(veclist, word2vecdic,DimentionN):
	centroid_Matrix = np.zeros((DimentionN, 200))
	distance_arrays = np.zeros(DimentionN)
	for word in veclist:
		label = word2vecdic[word]
		centroid_Matrix[label] += veclist[word]
	for word in veclist:
		label = word2vecdic[word]
		distance_arrays[label] += cosine_distances(veclist[word], centroid_Matrix[label])
	return centroid_Matrix, distance_arrays
开发者ID:ItoTomoki,项目名称:ruiternews,代码行数:10,代码来源:yahoofinancil_board_preprocess.py


示例9: memory_cf

def memory_cf(users, movies, k, similarity_measure, weight_schema,
              repr_matrix=rating_matrix_orig, rating_matrix=rating_matrix_orig):
    """
    Memory-based collaborative filtering.
    :param users: a user list.
    :param movies: a movie list.
    :param k: number of nearest users
    :param similarity_measure: 'cosine' or 'dot_product'
    :param weight_schema: 'mean' or 'weighted_mean'
    :param repr_matrix: data point representation
    :param rating_matrix: ratings based on user-movie or cluster centroids
    :return: recommended ratings for the queries
    """

    # construct mapping between input users and unique users
    ratings, user_unique = [], list(set(users))
    user_index_map = dict((u, i) for i, u in enumerate(user_unique))
    users = [(u, user_index_map[u]) for u in users]

    # find k nearest neighbor for each user
    if similarity_measure == 'cosine':
        dist = cosine_distances(repr_matrix[user_unique, :], repr_matrix)
        sims = 1 - dist
    elif similarity_measure == 'dot_product':
        sims = repr_matrix[user_unique, :].dot(repr_matrix.T)
        if issparse(sims):
            sims = sims.toarray()
        dist = -sims

    sorted_neighbors = np.argsort(dist, axis=1)

    # make rating matrix dense for fast access
    rating_matrix = rating_matrix.toarray()
    weight_method = mean if weight_schema == 'mean' else weighted_mean

    for (user_index, neighbor_index), movie in zip(users, movies):
        neighbors = list(islice(ifilter(lambda u: (u, movie) in entry_set,
                                        sorted_neighbors[neighbor_index]),
                                k + 1))

        # no neighbors, regarded as 3
        if not neighbors:
            ratings.append(3)
            continue

        # exclude itself
        if user_index in neighbors:
            neighbors.remove(user_index)

        rating = weight_method(rating_matrix[neighbors, movie],
                               sims[neighbor_index, neighbors])
        ratings.append(rating)

    return ratings
开发者ID:EDFward,项目名称:TrivialCF,代码行数:54,代码来源:cf.py


示例10: get_sparse_dist_matrix

def get_sparse_dist_matrix(tweets_tfidf_matrix, eps):
    """Get the sparse distance matrix from the pairwise cosine distance
    computations from the given tfidf vectors. Only distances less than or
    equal to eps are put into the matrix"""
    rows = []
    cols = []
    data = []
    for ndx, tweet in enumerate(tweets_tfidf_matrix):
        rows.append(len(cols))
        distances = cosine_distances(tweet, tweets_tfidf_matrix)[0]
        for other_ndx, dist in enumerate(distances):
            if ndx != other_ndx and dist <= eps:
                cols.append(other_ndx)
                data.append(dist)
    return csr_matrix((data, cols, rows), dtype=int)
开发者ID:jiwu14,项目名称:TweetAnalyzer,代码行数:15,代码来源:TweetAnalyzer.py


示例11: test_linkage_misc

def test_linkage_misc():
    # Misc tests on linkage
    rng = np.random.RandomState(42)
    X = rng.normal(size=(5, 5))
    assert_raises(ValueError, AgglomerativeClustering(linkage='foo').fit, X)
    assert_raises(ValueError, linkage_tree, X, linkage='foo')
    assert_raises(ValueError, linkage_tree, X, connectivity=np.ones((4, 4)))

    # Smoke test FeatureAgglomeration
    FeatureAgglomeration().fit(X)

    # test hiearchical clustering on a precomputed distances matrix
    dis = cosine_distances(X)

    res = linkage_tree(dis, affinity="precomputed")
    assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0])

    # test hiearchical clustering on a precomputed distances matrix
    res = linkage_tree(X, affinity=manhattan_distances)
    assert_array_equal(res[0], linkage_tree(X, affinity="manhattan")[0])
开发者ID:foresthz,项目名称:scikit-learn,代码行数:20,代码来源:test_hierarchical.py


示例12: cluster_cf_memory

    def cluster_cf_memory():
        """
        Cluster-based memory CF.
        """
        rating_matrix_cluster = np.empty([k_user, rating_matrix_orig.shape[1]],
                                         dtype=np.float64)

        # build rating matrix for each user cluster, on each movie
        for i in range(k_user):
            cluster_indicator = np.where(user_belonging == i)[0]
            rating_cluster = rating_matrix_orig[cluster_indicator, :]
            rating_sum = rating_cluster.sum(axis=0)
            # take average by dividing count
            rating_cluster.data = np.ones(len(rating_cluster.data))
            mu = rating_sum / rating_cluster.sum(axis=0)
            # fill 0 for nan
            mu[np.isnan(mu)] = 0
            rating_matrix_cluster[i, :] = mu

        # construct mapping between input users and unique users
        ratings, user_unique = [], list(set(users))
        user_index_map = dict((u, i) for i, u in enumerate(user_unique))
        users_neighbors = [user_index_map[u] for u in users]

        if similarity_measure == 'cosine':
            dist = cosine_distances(rating_matrix_orig[user_unique, :], m2uc.T)
            sims = 1 - dist
        else:
            sims = rating_matrix_orig[user_unique, :].dot(m2uc).toarray()
            dist = -sims

        nearest_neighbors = np.argpartition(dist, k, axis=1)[:, :k]
        weight_method = mean if weight_schema == 'mean' else weighted_mean

        for neighbor_index, movie in zip(users_neighbors, movies):
            neighbors = nearest_neighbors[neighbor_index]
            rating = weight_method(rating_matrix_cluster[neighbors, movie],
                                   sims[neighbor_index, neighbors])
            ratings.append(rating)

        return ratings
开发者ID:EDFward,项目名称:TrivialCF,代码行数:41,代码来源:cf.py


示例13: plot_mds

def plot_mds(points, genres, n_points=500):
    '''
    Plots a set of documents in MDS space

    Args:
        points: dense array with coordinates of each document
        genres: list of genres for each entry in points
    Returns:
        None
    '''

    genres = np.array(genres)
    genre_sel = np.not_equal(genres, None)
    X, y = points[genre_sel], genres[genre_sel]

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, stratify=y, train_size=n_points)

    distances = cosine_distances(X_train, X_train)
    mds = MDS(n_components=2, dissimilarity='precomputed')
    mds.fit(distances)

    plot_embedding(mds.embedding_, y_train)
开发者ID:lwoloszy,项目名称:albumpitch,代码行数:23,代码来源:genres.py


示例14: test_fp16_cosine_metric

 def test_fp16_cosine_metric(self):
     arr = numpy.empty((10000, 2), dtype=numpy.float16)
     angs = numpy.random.rand(10000) * 2 * numpy.pi
     for i in range(10000):
         arr[i] = numpy.sin(angs[i]), numpy.cos(angs[i])
     with self.stdout:
         centroids, assignments = kmeans_cuda(
             arr, 4, init="kmeans++", metric="cos", device=1, verbosity=2,
             seed=3)
     self.assertEqual(self._get_iters_number(self.stdout), 5)
     self.assertEqual(len(centroids), 4)
     for c in centroids:
         norm = numpy.linalg.norm(c)
         self.assertTrue(0.9995 < norm < 1.0005)
     dists = numpy.round(cosine_distances(centroids)).astype(int)
     self.assertTrue((dists == [
         [0, 2, 1, 1],
         [2, 0, 1, 1],
         [1, 1, 0, 2],
         [1, 1, 2, 0],
     ]).all())
     self.assertEqual(numpy.min(assignments), 0)
     self.assertEqual(numpy.max(assignments), 3)
开发者ID:src-d,项目名称:kmcuda,代码行数:23,代码来源:test.py


示例15: cosine_similarity

def cosine_similarity(vector_a, vector_b):
	return 1-cosine_distances(vector_a,vector_b)
开发者ID:hanveiga,项目名称:master-thesis,代码行数:2,代码来源:information_measure.py


示例16: len

         'control': 3.721765211295327,
         'democratic': 3.1026721743330414,
         'governments': 4.167571323949673,
         'in': 0.0009654063501214492,
         'law': 2.4538226269605703,
         'popular': 2.764478952022998,
         'response': 4.261461747058352,
         'to': 0.04694493768179923}

word_indices = [map_index_to_word[word] for word in tweet.keys()]

tweet_tf_idf = scipy.sparse.csr_matrix((list(tweet.values()), ([0] * len(word_indices), word_indices)),
                                       shape=(1, tf_idf.shape[1]))
obama_tf_idf = tf_idf[obama_id]
print("The cosine distance between Obama's article and the tweet is {:.6e}."
      .format(cosine_distances(obama_tf_idf, tweet_tf_idf)[0, 0]))
print('''
With cosine distances, the tweet is "nearer" to Barack Obama.
Ignoring article lengths completely resulted in nonsensical results.
In practice, it is common to enforce maximum or minimum document lengths.
''')

# QUIZ QUESTIONS:
print("Quiz Questions:")
# 1. Among the words that appear in both Barack Obama and Francisco Barrio,
#    take the 5 that appear most frequently in Obama.
#    How many of the articles in the Wikipedia dataset contain all of those 5 words?
print("1. Among the words that appear in both Barack Obama and Francisco Barrio, ")
print("   take the 5 that appear most frequently in Obama.")
print("   There are {:d} articles in the Wikipedia dataset contain all of those 5 words.\n"
      .format(has_top_words_count[True]))
开发者ID:yf23,项目名称:Machine_Learning_UW,代码行数:31,代码来源:assignment_1.py


示例17: cosine_similarity

"""
Testing the change in embeddings over time. Assumes
that we've already generated embeddings in output/.
"""
import pandas as pd
import numpy as np
import os, codecs
from sklearn.metrics.pairwise import cosine_similarity, cosine_distances

if __name__ == '__main__':
    out_dir = 'output'
    embedding_files = [os.path.join(out_dir, f) for f in os.listdir(out_dir)]
    # test 0: do the embeddings make semantic sense? 
    end_embedding = pd.read_csv(embedding_files[-1], sep='\t', index_col=0)
    test_words = ['you', 'go', 'road', 'give', 'cold']
    for test_word in test_words:
        sims = end_embedding.apply(lambda r: cosine_similarity(r.reshape(1,-1), 
                                                               end_embedding.loc[test_word].reshape(1,-1))[0][0], 
                                   axis=1)
        print('test word %s has top 10 similarities \n%s'%
              (test_word, sims.sort_values(ascending=False)[:10]))
    # TL;DR the embeddings aren't perfect but they work for more common words
    # test 1: how much have embeddings changed from start to end of data?
    start_embedding = pd.read_csv(embedding_files[1], sep='\t', index_col=0)
    embedding_deltas = abs(cosine_distances(end_embedding, start_embedding))
    embedding_deltas = pd.Series(np.diagonal(embedding_deltas), 
                                 index=end_embedding.index).sort_values(ascending=True)
    print('got embedding deltas %s'%(embedding_deltas))
开发者ID:ianbstewart,项目名称:concept-dynamics,代码行数:28,代码来源:test_temporal_embeddings.py


示例18: manhattan_distances

# extract the terms-by-documents matrix 
# in scipy compressed sparse column format
sparse_movies_tdm = tdm_method.fit_transform(parsed_text)
# convert sparse matrix into regular terms-by-documents matrix
movies_tdm = sparse_movies_tdm.todense()
# define the documents-by-terms matrix 
movies_dtm = movies_tdm.transpose()
 
# dissimilarity measures and multidimensional scaling
# consider alternative pairwise distance metrics from sklearn modules
# euclidean_distances, cosine_distances, manhattan_distances (city-block)
# note that different metrics provide different solutions
# movies_distance_matrix = euclidean_distances(movies_tdm)
# movies_distance_matrix = manhattan_distances(movies_tdm)
movies_distance_matrix = cosine_distances(movies_tdm)

mds_method = manifold.MDS(n_components = 2, random_state = 9999,\
    dissimilarity = 'precomputed')
mds_fit = mds_method.fit(movies_distance_matrix)  
mds_coordinates = mds_method.fit_transform(movies_distance_matrix) 

# plot tagline text for years in two dimensions 
# defined by multidimensional scaling
plt.figure()
plt.scatter(mds_coordinates[:,0],mds_coordinates[:,1],\
    facecolors = 'none', edgecolors = 'none')  # plots points in white (invisible)
labels = []
for iyear in range(1974,2014):
    labels.append(str(iyear))  
for label, x, y in zip(labels, mds_coordinates[:,0], mds_coordinates[:,1]):
开发者ID:Alextnelson,项目名称:mtpa,代码行数:30,代码来源:chapter_7_program.py


示例19: test_pairwise_distances

def test_pairwise_distances():
    # Test the pairwise_distance helper function.
    rng = np.random.RandomState(0)

    # Euclidean distance should be equivalent to calling the function.
    X = rng.random_sample((5, 4))
    S = pairwise_distances(X, metric="euclidean")
    S2 = euclidean_distances(X)
    assert_array_almost_equal(S, S2)

    # Euclidean distance, with Y != X.
    Y = rng.random_sample((2, 4))
    S = pairwise_distances(X, Y, metric="euclidean")
    S2 = euclidean_distances(X, Y)
    assert_array_almost_equal(S, S2)

    # Test with tuples as X and Y
    X_tuples = tuple([tuple([v for v in row]) for row in X])
    Y_tuples = tuple([tuple([v for v in row]) for row in Y])
    S2 = pairwise_distances(X_tuples, Y_tuples, metric="euclidean")
    assert_array_almost_equal(S, S2)

    # Test haversine distance
    # The data should be valid latitude and longitude
    X = rng.random_sample((5, 2))
    X[:, 0] = (X[:, 0] - 0.5) * 2 * np.pi/2
    X[:, 1] = (X[:, 1] - 0.5) * 2 * np.pi
    S = pairwise_distances(X, metric="haversine")
    S2 = haversine_distances(X)
    assert_array_almost_equal(S, S2)

    # Test haversine distance, with Y != X
    Y = rng.random_sample((2, 2))
    Y[:, 0] = (Y[:, 0] - 0.5)*2*np.pi/2
    Y[:, 1] = (Y[:, 1] - 0.5)*2*np.pi
    S = pairwise_distances(X, Y, metric="haversine")
    S2 = haversine_distances(X, Y)
    assert_array_almost_equal(S, S2)

    # "cityblock" uses scikit-learn metric, cityblock (function) is
    # scipy.spatial.
    S = pairwise_distances(X, metric="cityblock")
    S2 = pairwise_distances(X, metric=cityblock)
    assert_equal(S.shape[0], S.shape[1])
    assert_equal(S.shape[0], X.shape[0])
    assert_array_almost_equal(S, S2)

    # The manhattan metric should be equivalent to cityblock.
    S = pairwise_distances(X, Y, metric="manhattan")
    S2 = pairwise_distances(X, Y, metric=cityblock)
    assert_equal(S.shape[0], X.shape[0])
    assert_equal(S.shape[1], Y.shape[0])
    assert_array_almost_equal(S, S2)

    # Test cosine as a string metric versus cosine callable
    # The string "cosine" uses sklearn.metric,
    # while the function cosine is scipy.spatial
    S = pairwise_distances(X, Y, metric="cosine")
    S2 = pairwise_distances(X, Y, metric=cosine)
    assert_equal(S.shape[0], X.shape[0])
    assert_equal(S.shape[1], Y.shape[0])
    assert_array_almost_equal(S, S2)

    # Test with sparse X and Y,
    # currently only supported for Euclidean, L1 and cosine.
    X_sparse = csr_matrix(X)
    Y_sparse = csr_matrix(Y)
    S = pairwise_distances(X_sparse, Y_sparse, metric="euclidean")
    S2 = euclidean_distances(X_sparse, Y_sparse)
    assert_array_almost_equal(S, S2)
    S = pairwise_distances(X_sparse, Y_sparse, metric="cosine")
    S2 = cosine_distances(X_sparse, Y_sparse)
    assert_array_almost_equal(S, S2)
    S = pairwise_distances(X_sparse, Y_sparse.tocsc(), metric="manhattan")
    S2 = manhattan_distances(X_sparse.tobsr(), Y_sparse.tocoo())
    assert_array_almost_equal(S, S2)
    S2 = manhattan_distances(X, Y)
    assert_array_almost_equal(S, S2)

    # Test with scipy.spatial.distance metric, with a kwd
    kwds = {"p": 2.0}
    S = pairwise_distances(X, Y, metric="minkowski", **kwds)
    S2 = pairwise_distances(X, Y, metric=minkowski, **kwds)
    assert_array_almost_equal(S, S2)

    # same with Y = None
    kwds = {"p": 2.0}
    S = pairwise_distances(X, metric="minkowski", **kwds)
    S2 = pairwise_distances(X, metric=minkowski, **kwds)
    assert_array_almost_equal(S, S2)

    # Test that scipy distance metrics throw an error if sparse matrix given
    assert_raises(TypeError, pairwise_distances, X_sparse, metric="minkowski")
    assert_raises(TypeError, pairwise_distances, X, Y_sparse,
                  metric="minkowski")

    # Test that a value error is raised if the metric is unknown
    assert_raises(ValueError, pairwise_distances, X, Y, metric="blah")
开发者ID:scikit-learn,项目名称:scikit-learn,代码行数:98,代码来源:test_pairwise.py


示例20: test_pairwise_distances

def test_pairwise_distances():
    """ Test the pairwise_distance helper function. """
    rng = np.random.RandomState(0)
    # Euclidean distance should be equivalent to calling the function.
    X = rng.random_sample((5, 4))
    S = pairwise_distances(X, metric="euclidean")
    S2 = euclidean_distances(X)
    assert_array_almost_equal(S, S2)
    # Euclidean distance, with Y != X.
    Y = rng.random_sample((2, 4))
    S = pairwise_distances(X, Y, metric="euclidean")
    S2 = euclidean_distances(X, Y)
    assert_array_almost_equal(S, S2)
    # Test with tuples as X and Y
    X_tuples = tuple([tuple([v for v in row]) for row in X])
    Y_tuples = tuple([tuple([v for v in row]) for row in Y])
    S2 = pairwise_distances(X_tuples, Y_tuples, metric="euclidean")
    assert_array_almost_equal(S, S2)
    # "cityblock" uses sklearn metric, cityblock (function) is scipy.spatial.
    S = pairwise_distances(X, metric="cityblock")
    S2 = pairwise_distances(X, metric=cityblock)
    assert_equal(S.shape[0], S.shape[1])
    assert_equal(S.shape[0], X.shape[0])
    assert_array_almost_equal(S, S2)
    # The manhattan metric should be equivalent to cityblock.
    S = pairwise_distances(X, Y, metric="manhattan")
    S2 = pairwise_distances(X, Y, metric=cityblock)
    assert_equal(S.shape[0], X.shape[0])
    assert_equal(S.shape[1], Y.shape[0])
    assert_array_almost_equal(S, S2)
    # manhattan does not support sparse matrices atm.
    assert_raises(ValueError, pairwise_distances, csr_matrix(X),
                  metric="manhattan")
    # Low-level function for manhattan can divide in blocks to avoid
    # using too much memory during the broadcasting
    S3 = manhattan_distances(X, Y, size_threshold=10)
    assert_array_almost_equal(S, S3)
    # Test cosine as a string metric versus cosine callable
    # "cosine" uses sklearn metric, cosine (function) is scipy.spatial
    S = pairwise_distances(X, Y, metric="cosine")
    S2 = pairwise_distances(X, Y, metric=cosine)
    assert_equal(S.shape[0], X.shape[0])
    assert_equal(S.shape[1], Y.shape[0])
    assert_array_almost_equal(S, S2)
    # Tests that precomputed metric returns pointer to, and not copy of, X.
    S = np.dot(X, X.T)
    S2 = pairwise_distances(S, metric="precomputed")
    assert_true(S is S2)
    # Test with sparse X and Y,
    # currently only supported for euclidean and cosine
    X_sparse = csr_matrix(X)
    Y_sparse = csr_matrix(Y)
    S = pairwise_distances(X_sparse, Y_sparse, metric="euclidean")
    S2 = euclidean_distances(X_sparse, Y_sparse)
    assert_array_almost_equal(S, S2)
    S = pairwise_distances(X_sparse, Y_sparse, metric="cosine")
    S2 = cosine_distances(X_sparse, Y_sparse)
    assert_array_almost_equal(S, S2)
    # Test with scipy.spatial.distance metric, with a kwd
    kwds = {"p": 2.0}
    S = pairwise_distances(X, Y, metric="minkowski", **kwds)
    S2 = pairwise_distances(X, Y, metric=minkowski, **kwds)
    assert_array_almost_equal(S, S2)
    # same with Y = None
    kwds = {"p": 2.0}
    S = pairwise_distances(X, metric="minkowski", **kwds)
    S2 = pairwise_distances(X, metric=minkowski, **kwds)
    assert_array_almost_equal(S, S2)
    # Test that scipy distance metrics throw an error if sparse matrix given
    assert_raises(TypeError, pairwise_distances, X_sparse, metric="minkowski")
    assert_raises(TypeError, pairwise_distances, X, Y_sparse,
                  metric="minkowski")
开发者ID:SHoltzen,项目名称:scikit-learn,代码行数:72,代码来源:test_pairwise.py



注:本文中的sklearn.metrics.pairwise.cosine_distances函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python pairwise.cosine_similarity函数代码示例发布时间:2022-05-27
下一篇:
Python pairwise.check_pairwise_arrays函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap