• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python pca.PCA类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.decomposition.pca.PCA的典型用法代码示例。如果您正苦于以下问题:Python PCA类的具体用法?Python PCA怎么用?Python PCA使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了PCA类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: main

def main():
    print('Reading in data file...')
    data = pd.read_csv(path + 'Sentiment Analysis Dataset.csv',
                       usecols=['Sentiment', 'SentimentText'], error_bad_lines=False)

    print('Pre-processing tweet text...')
    corpus = data['SentimentText']
    vectorizer = TfidfVectorizer(decode_error='replace', strip_accents='unicode',
                                 stop_words='english', tokenizer=tokenize)
    X = vectorizer.fit_transform(corpus.values)
    y = data['Sentiment'].values

    print('Training sentiment classification model...')
    classifier = MultinomialNB()
    classifier.fit(X, y)

    print('Training word2vec model...')
    corpus = corpus.map(lambda x: tokenize(x))
    word2vec = Word2Vec(corpus.tolist(), size=100, window=4, min_count=10, workers=4)
    word2vec.init_sims(replace=True)

    print('Fitting PCA transform...')
    word_vectors = [word2vec[word] for word in word2vec.vocab]
    pca = PCA(n_components=2)
    pca.fit(word_vectors)

    print('Saving artifacts to disk...')
    joblib.dump(vectorizer, path + 'vectorizer.pkl')
    joblib.dump(classifier, path + 'classifier.pkl')
    joblib.dump(pca, path + 'pca.pkl')
    word2vec.save(path + 'word2vec.pkl')

    print('Process complete.')
开发者ID:jdwittenauer,项目名称:twitter-viz-demo,代码行数:33,代码来源:build_models.py


示例2: LogisticClassifier

class LogisticClassifier(object):
    def __init__(self, learning_rate=0.01, reg=0., momentum=0.5):
        self.classifier = LogisticRegression(learning_rate, reg, momentum)
        self.pca = None
        self.scaler = None

    def sgd_optimize(self, data, n_epochs, mini_batch_size):
        data = self._preprocess_data(data)
        sgd_optimization(data, self.classifier, n_epochs, mini_batch_size)

    def _preprocess_data(self, data):
        # center data and scale to unit std
        if self.scaler is None:
             self.scaler = StandardScaler()
             data = self.scaler.fit_transform(data)
        else:
            data = self.scaler.transform(data)

        if self.pca is None:
            # use minika's mle to guess appropriate dimension
            self.pca = PCA(n_components='mle')
            data = self.pca.fit_transform(data)
        else:
            data = self.pca.transform(data)

        return data
开发者ID:joshloyal,项目名称:statlearn,代码行数:26,代码来源:logreg.py


示例3: pca_plot

def pca_plot(fp_list, clusters):
    np_fps = []
    for fp in fp_list:
        arr = numpy.zeros((1,))
        DataStructs.ConvertToNumpyArray(fp, arr)
        np_fps.append(arr)
    pca = PCA(n_components=3)
    pca.fit(np_fps)
    np_fps_r = pca.transform(np_fps)
    p1 = figure(x_axis_label="PC1",
                y_axis_label="PC2",
                title="PCA clustering of PAINS")
    p2 = figure(x_axis_label="PC2",
                y_axis_label="PC3",
                title="PCA clustering of PAINS")
    color_vector = ["blue", "red", "green", "orange", "pink", "cyan", "magenta",
                    "brown", "purple"]
    print len(set(clusters))
    for clust_num in set(clusters):
        print clust_num
        local_cluster = []
        for i in xrange(len(clusters)):
            if clusters[i] == clust_num:
                local_cluster.append(np_fps_r[i])
        print len(local_cluster)
        p1.scatter(np_fps_r[:,0], np_fps_r[:,1],
                   color=color_vector[clust_num])
        p2.scatter(np_fps_r[:,1], np_fps_r[:,2],
                   color=color_vector[clust_num])
    return HBox(p1, p2)
开发者ID:dkdeconti,项目名称:PAINS-train,代码行数:30,代码来源:hclust_PAINS.py


示例4: calc_pca

def calc_pca(bnd, npc=None, preaverage=False, use_unbiased=False, \
    method='mdp'):
    '''
    Parameters
    ----------
    bnd : BinnedData
      binned data
    npc : int or None, optional
      number of PCs to calculate, defaults to None
    preaverage : bool
      average across repeats?
      
    Returns
    -------
    score : ndarray
      (npc, nobs)
    weight : ndarray
      (npc, nvar)
    '''
    assert method in ['mdp', 'skl']
    data = format_for_fa(bnd, preaverage=preaverage,
                         use_unbiased=use_unbiased)
    if method == 'mdp':    
        pca_node = mdp.nodes.PCANode(output_dim=npc)
        score = pca_node.execute(data)
        weight = pca_node.get_projmatrix()
    elif method == 'skl':
        pca_obj = PCA(n_components=npc)
        score = pca_obj.fit(data).transform(data)
        weight = pca_obj.components_.T
    return score.T, weight.T
开发者ID:amcmorl,项目名称:motorlab,代码行数:31,代码来源:factors.py


示例5: pca

def pca(target, control, title, name_one, name_two):
    np_fps = []
    for fp in target + control:
        arr = numpy.zeros((1,))
        DataStructs.ConvertToNumpyArray(fp, arr)
        np_fps.append(arr)
    ys_fit = [1] * len(target) + [0] * len(control)
    names = ["PAINS", "Control"]
    pca = PCA(n_components=3)
    pca.fit(np_fps)
    np_fps_r = pca.transform(np_fps)
    p1 = figure(x_axis_label="PC1",
                y_axis_label="PC2",
                title=title)
    p1.scatter(np_fps_r[:len(target), 0], np_fps_r[:len(target), 1],
               color="blue", legend=name_one)
    p1.scatter(np_fps_r[len(target):, 0], np_fps_r[len(target):, 1],
               color="red", legend=name_two)
    p2 = figure(x_axis_label="PC2",
                y_axis_label="PC3",
                title=title)
    p2.scatter(np_fps_r[:len(target), 1], np_fps_r[:len(target), 2],
               color="blue", legend=name_one)
    p2.scatter(np_fps_r[len(target):, 1], np_fps_r[len(target):, 2],
               color="red", legend=name_two)
    return HBox(p1, p2)
开发者ID:dkdeconti,项目名称:PAINS-train,代码行数:26,代码来源:pca_plots_on_fp.py


示例6: pca

def pca(tx, ty, rx, ry):
    compressor = PCA(n_components = tx[1].size/2)
    compressor.fit(tx, y=ty)
    newtx = compressor.transform(tx)
    newrx = compressor.transform(rx)
    em(newtx, ty, newrx, ry, add="wPCAtr", times=10)
    km(newtx, ty, newrx, ry, add="wPCAtr", times=10)
    nn(newtx, ty, newrx, ry, add="wPCAr")
开发者ID:iRapha,项目名称:Machine-Learning,代码行数:8,代码来源:analysis.py


示例7: pca

def pca(tx, ty, rx, ry):
    print "pca"
    compressor = PCA(n_components = tx[1].size/2)
    compressor.fit(tx, y=ty)
    newtx = compressor.transform(tx)
    newrx = compressor.transform(rx)
    em(newtx, ty, newrx, ry, add="wPCAtr")  
    km(newtx, ty, newrx, ry, add="wPCAtr")
    nn(newtx, ty, newrx, ry, add="wPCAtr")
    print "pca done"
开发者ID:jessrosenfield,项目名称:unsupervised-learning,代码行数:10,代码来源:old.py


示例8: PCA

	def PCA佮SVM模型(self, 問題, 答案):
		sample_weight_constant = np.ones(len(問題))
		clf = svm.SVC(C=1)
		pca = PCA(n_components=100)
# 		clf = svm.NuSVC()
		print('訓練PCA')
		pca.fit(問題)
		print('訓練SVM')
		clf.fit(pca.transform(問題), 答案, sample_weight=sample_weight_constant)
		print('訓練了')
		return lambda 問:clf.predict(pca.transform(問))
开发者ID:sih4sing5hong5,项目名称:huan1-ik8_gian2-kiu3,代码行数:11,代码来源:訓練模型.py


示例9: train_pca

def train_pca(pains_fps, num_components=3):
    '''
    Dimensional reduction of fps bit vectors to principal components
    :param pains_fps:
    :return: pca reduced fingerprints bit vectors
    '''
    np_fps = []
    for fp in pains_fps:
        arr = numpy.zeros((1,))
        DataStructs.ConvertToNumpyArray(fp, arr)
        np_fps.append(arr)
    pca = PCA(n_components=num_components)
    pca.fit(np_fps)
    fps_reduced = pca.transform(np_fps)
    return fps_reduced
开发者ID:dkdeconti,项目名称:PAINS-train,代码行数:15,代码来源:kmeans_clustering_of_pca_reduction.py


示例10: classify_for_benchmark

def classify_for_benchmark(data_set_df, user_info_df, features, label='gender', classifier=None, num=None):
    instance_num = len(data_set_df.columns)
    x = data_set_df.loc[features]
    x = x.dropna(how='all', axis=0)
    x = x.dropna(how='all', axis=1)

    imp = Imputer(missing_values='NaN', strategy='most_frequent', axis=1)
    x_replaced = x.replace([np.inf, -np.inf], np.nan)
    x_imp = imp.transform(x_replaced)

    y = user_info_df.get(label)
    y_filtered = y[(map(int, x.columns.values))]

    clf = nb.BernoulliNB() if classifier is None else classifier
    cv_num = min(len(y_filtered), 10)
    if cv_num <= 1 or len(y_filtered.unique()) <= 1:
        return 0.0, 100.0
    else:
        final_score = 0.0
        for i in range(100):
            score = 0.0
            cnt = 0
            skf = StratifiedKFold(y_filtered, n_folds=cv_num, shuffle=True)
            for tr_index, te_index in skf:
                x_train, x_test = x_imp.T[tr_index], x_imp.T[te_index]
                y_train, y_test = y_filtered.iloc[tr_index], y_filtered.iloc[te_index]
                min_num = min(len(x_train), len(x_train.T), len(x_test), len(x_test.T), num)
                pca = PCA(min_num)
                x_train = pca.fit_transform(x_train)
                x_test = pca.fit_transform(x_test)

                try:
                    clf.fit(x_train, y_train)
                    score += clf.score(x_test, y_test)
                    cnt += 1
                    # cv_score = cross_validation.cross_val_score(clf, x_imp.T, y_filtered, cv=cv_num)
                except ValueError:
                    traceback.print_exc()
                    print i, "why error? skip!"
            if cnt > 0:
                score /= cnt
                print i, score
            else:
                return 0.0, (float(instance_num - len(y_filtered)) / instance_num)
            final_score += score
        final_score /= 100
        miss_clf_rate = (float(instance_num - len(y_filtered)) / instance_num)
        return final_score, miss_clf_rate
开发者ID:heevery,项目名称:ohp,代码行数:48,代码来源:classifier.py


示例11: reduction

def reduction(data, params):

    # parse parameters

    for item in params:
        if isinstance(params[item], str):
            exec(item+'='+'"'+params[item]+'"')
        else:
            exec(item+'='+str(params[item]))

    # apply PCA

    pca = PCA(n_components=n_components)
    pca.fit(data)
    X = pca.transform(data)

    return X
开发者ID:emilleishida,项目名称:MLSNeSpectra,代码行数:17,代码来源:pca.py


示例12: pca_no_labels

def pca_no_labels(target, title="PCA clustering of PAINS", color="blue"):
    np_fps = []
    for fp in target:
        arr = numpy.zeros((1,))
        DataStructs.ConvertToNumpyArray(fp, arr)
        np_fps.append(arr)
    pca = PCA(n_components=3)
    pca.fit(np_fps)
    np_fps_r = pca.transform(np_fps)
    p3 = figure(x_axis_label="PC1",
                y_axis_label="PC2",
                title=title)
    p3.scatter(np_fps_r[:, 0], np_fps_r[:, 1], color=color)
    p4 = figure(x_axis_label="PC2",
                y_axis_label="PC3",
                title=title)
    p4.scatter(np_fps_r[:, 1], np_fps_r[:, 2], color=color)
    return HBox(p3, p4)
开发者ID:dkdeconti,项目名称:PAINS-train,代码行数:18,代码来源:pca_plots_on_fp.py


示例13: airline_pca

def airline_pca():
    X = np.array(pca_data)
    pca = PCA(n_components=3)
    pca.fit(X)
    Y=pca.transform(normalize(X))
    
    fig = plt.figure(1, figsize=(8, 6))
    ax = Axes3D(fig, elev=-150, azim=110)
    colordict = {carrier:i for i,carrier in enumerate(major_carriers)}
    pointcolors  = [colordict[carrier] for carrier in target_carrier]
    ax.scatter(Y[:, 0], Y[:, 1], Y[:, 2], c=pointcolors)
    ax.set_title("First three PCA directions")
    ax.set_xlabel("1st eigenvector")
    ax.w_xaxis.set_ticklabels([])
    ax.set_ylabel("2nd eigenvector")
    ax.w_yaxis.set_ticklabels([])
    ax.set_zlabel("3rd eigenvector")
    ax.w_zaxis.set_ticklabels([])
开发者ID:reedharder,项目名称:airline_network_games,代码行数:18,代码来源:market_carrier_analysis.py


示例14: test_pipeline_transform

def test_pipeline_transform():
    # Test whether pipeline works with a transformer at the end.
    # Also test pipline.transform and pipeline.inverse_transform
    iris = load_iris()
    X = iris.data
    pca = PCA(n_components=2)
    pipeline = Pipeline([('pca', pca)])

    # test transform and fit_transform:
    X_trans = pipeline.fit(X).transform(X)
    X_trans2 = pipeline.fit_transform(X)
    X_trans3 = pca.fit_transform(X)
    assert_array_almost_equal(X_trans, X_trans2)
    assert_array_almost_equal(X_trans, X_trans3)

    X_back = pipeline.inverse_transform(X_trans)
    X_back2 = pca.inverse_transform(X_trans)
    assert_array_almost_equal(X_back, X_back2)
开发者ID:PepGardiola,项目名称:scikit-learn,代码行数:18,代码来源:test_pipeline.py


示例15: do_train_with_freq

def do_train_with_freq():
    tf_mix = TrainFiles(train_path = train_path_mix, labels_file = labels_file, test_size = 0.)
    tf_freq = TrainFiles(train_path = train_path_freq, labels_file = labels_file, test_size = 0.)

    X_m, Y_m, _, _ = tf_mix.prepare_inputs()
    X_f, Y_f, _, _ = tf_freq.prepare_inputs()

    X = np.c_[X_m, X_f]
    Y = Y_f

    X, Xt, Y, Yt = train_test_split(X, Y, test_size = 0.1)
    sl = SKSupervisedLearning(SVC, X, Y, Xt, Yt)
    sl.fit_standard_scaler()

    pca = PCA(250)
    pca.fit(np.r_[sl.X_train_scaled, sl.X_test_scaled])
    X_pca = pca.transform(sl.X_train_scaled)
    X_pca_test = pca.transform(sl.X_test_scaled)

    #sl.train_params = {'C': 100, 'gamma': 0.0001, 'probability' : True}
    #print "Start SVM: ", time_now_str()
    #sl_ll_trn, sl_ll_tst = sl.fit_and_validate()
    #print "Finish Svm: ", time_now_str()

    ##construct a dataset for RBM
    #X_rbm = X[:, 257:]
    #Xt_rbm = X[:, 257:]

    #rng = np.random.RandomState(123)
    #rbm = RBM(X_rbm, n_visible=X_rbm.shape[1], n_hidden=X_rbm.shape[1]/4, numpy_rng=rng)

    #pretrain_lr = 0.1
    #k = 2
    #pretraining_epochs = 200
    #for epoch in xrange(pretraining_epochs):
    #    rbm.contrastive_divergence(lr=pretrain_lr, k=k)
    #    cost = rbm.get_reconstruction_cross_entropy()
    #    print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost


    trndata, tstdata = createDataSets(X_pca, Y, X_pca_test, Yt)
    fnn = train(trndata, tstdata, epochs = 1000, test_error = 0.025, momentum = 0.2, weight_decay = 0.0001)
开发者ID:CyberIntelMafia,项目名称:KaggleMalware,代码行数:42,代码来源:train_nn.py


示例16: showDataTable

def showDataTable():
    title = "Descriptive statistics"
    df = frame[cols]
    data_dsc = df.describe().transpose()
    # dsc = df.describe()

    pca = PCA(n_components=5)
    pca.fit(df)
    pc = pca.explained_variance_ratio_

    data_corr = df.corr()
    eigenValues, eigenVectors = LA.eig(data_corr)
    idx = eigenValues.argsort()[::-1]
    # print sorted(eigenValues, key=int, reverse=True)
    print  eigenValues.argsort()[::-1]
    print  eigenValues.argsort()
    eigenValues = pd.DataFrame(eigenValues[idx]).transpose()
    eigenVectors = pd.DataFrame(eigenVectors[:, idx])

    return render_template("showDataTable.html", title=title, data=df, data_dsc=data_dsc, pca=pd.DataFrame(pc).transpose(),data_corr=data_corr, w=eigenValues, v=eigenVectors)
开发者ID:ashutosh0889,项目名称:FlaskApp,代码行数:20,代码来源:__init__.py


示例17: pca_prefit

def pca_prefit(weights, xs):
    """
    SOMの初期値を計算するための前処理.
    線形変換によって重みベクトル列の主成分とその固有値を入力ベクトル列のものと一致させる.
    :param weights: 初期重みベクトル列
    :param xs: 入力ベクトル列
    :return: 前処理した重みベクトル列
    """
    n = np.shape(xs)[1]
    pca_w = PCA(n_components=n)
    pca_w.fit(weights)
    pca_x = PCA(n_components=n)
    pca_x.fit(xs)

    mean_w = np.mean(weights, axis=0)
    mean_x = np.mean(xs, axis=0)
    com_w = pca_w.components_
    com_x = pca_x.components_
    var_w = pca_w.explained_variance_
    var_x = pca_x.explained_variance_

    var_w[var_w == 0] = np.max(var_w) * 1e-6
    new_w = (weights - mean_w).dot(com_w.T) / np.sqrt(var_w)
    new_w = (new_w * np.sqrt(var_x)).dot(com_x) + mean_x

    return new_w
开发者ID:kzm4269,项目名称:self-organizing-map,代码行数:26,代码来源:prefit.py


示例18: plot_similarity_clusters

def plot_similarity_clusters(desc1, desc2, files, plot = None):
	"""
	find similar sounds using Affinity Propagation clusters

	:param desc1: first descriptor values
	:param desc2: second descriptor values
	:returns:
	  - euclidean_labels: labels of clusters
	""" 

	if plot == True:
		print((Fore.MAGENTA + "Clustering"))
	else:
		pass
         
	min_max = preprocessing.scale(np.vstack((desc1,desc2)).T, with_mean=False, with_std=False)          
	pca = PCA(n_components=2, whiten=True)
	y = pca.fit(min_max).transform(min_max)
	    
	euclidean = AffinityPropagation(convergence_iter=1800, affinity='euclidean')                           
	euclidean_labels= euclidean.fit_predict(y)

	if plot == True:

		time.sleep(5)  

		print((Fore.WHITE + "Cada número representa el grupo al que pertence el sonido como ejemplar de otro/s. El grupo '0' esta coloreado en azul, el grupo '1' esta coloreado en rojo, el grupo '2' esta coloreado en amarillo. Observa el ploteo para ver qué sonidos son ejemplares de otros"))
		print(np.vstack((euclidean_labels,files)).T)

		time.sleep(6)

		plt.scatter(y[euclidean_labels==0,0], y[euclidean_labels==0,1], c='b')
		plt.scatter(y[euclidean_labels==1,0], y[euclidean_labels==1,1], c='r')
		plt.scatter(y[euclidean_labels==2,0], y[euclidean_labels==2,1], c='y')
		plt.scatter(y[euclidean_labels==3,0], y[euclidean_labels==3,1], c='g')
		plt.show()
	else:
		pass

	return euclidean_labels
开发者ID:MarsCrop,项目名称:apicultor,代码行数:40,代码来源:SoundSimilarity.py


示例19: calc_pcs_variance_explained

def calc_pcs_variance_explained(bnd, preaverage=False, 
    use_unbiased=False, method='skl'):
    '''
    Parameters
    ----------
    bnd : BinnedData
      binned data
    preaverage : bool
      average across repeats?
    use_unbiased : False
      use the unbiased spike rates calculated using Rob Kass's
      spike rate method
    '''
    assert type(method) == str
    
    data = format_for_fa(bnd, preaverage=preaverage,
                     use_unbiased=use_unbiased)
    
    if method == 'skl':
        pca_obj = PCA()
        score = pca_obj.fit(data)
        return pca_obj.explained_variance_ratio_
    else:
        raise ValueError('method %s not implemented' % method)
开发者ID:amcmorl,项目名称:motorlab,代码行数:24,代码来源:factors.py


示例20: _preprocess_data

    def _preprocess_data(self, data):
        # center data and scale to unit std
        if self.scaler is None:
             self.scaler = StandardScaler()
             data = self.scaler.fit_transform(data)
        else:
            data = self.scaler.transform(data)

        if self.pca is None:
            # use minika's mle to guess appropriate dimension
            self.pca = PCA(n_components='mle')
            data = self.pca.fit_transform(data)
        else:
            data = self.pca.transform(data)

        return data
开发者ID:joshloyal,项目名称:statlearn,代码行数:16,代码来源:logreg.py



注:本文中的sklearn.decomposition.pca.PCA类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python discriminant_analysis.LinearDiscriminantAnalysis类代码示例发布时间:2022-05-27
下一篇:
Python pca._infer_dimension_函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap