本文整理汇总了Python中sklearn.decomposition.pca.PCA类的典型用法代码示例。如果您正苦于以下问题:Python PCA类的具体用法?Python PCA怎么用?Python PCA使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了PCA类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: main
def main():
print('Reading in data file...')
data = pd.read_csv(path + 'Sentiment Analysis Dataset.csv',
usecols=['Sentiment', 'SentimentText'], error_bad_lines=False)
print('Pre-processing tweet text...')
corpus = data['SentimentText']
vectorizer = TfidfVectorizer(decode_error='replace', strip_accents='unicode',
stop_words='english', tokenizer=tokenize)
X = vectorizer.fit_transform(corpus.values)
y = data['Sentiment'].values
print('Training sentiment classification model...')
classifier = MultinomialNB()
classifier.fit(X, y)
print('Training word2vec model...')
corpus = corpus.map(lambda x: tokenize(x))
word2vec = Word2Vec(corpus.tolist(), size=100, window=4, min_count=10, workers=4)
word2vec.init_sims(replace=True)
print('Fitting PCA transform...')
word_vectors = [word2vec[word] for word in word2vec.vocab]
pca = PCA(n_components=2)
pca.fit(word_vectors)
print('Saving artifacts to disk...')
joblib.dump(vectorizer, path + 'vectorizer.pkl')
joblib.dump(classifier, path + 'classifier.pkl')
joblib.dump(pca, path + 'pca.pkl')
word2vec.save(path + 'word2vec.pkl')
print('Process complete.')
开发者ID:jdwittenauer,项目名称:twitter-viz-demo,代码行数:33,代码来源:build_models.py
示例2: LogisticClassifier
class LogisticClassifier(object):
def __init__(self, learning_rate=0.01, reg=0., momentum=0.5):
self.classifier = LogisticRegression(learning_rate, reg, momentum)
self.pca = None
self.scaler = None
def sgd_optimize(self, data, n_epochs, mini_batch_size):
data = self._preprocess_data(data)
sgd_optimization(data, self.classifier, n_epochs, mini_batch_size)
def _preprocess_data(self, data):
# center data and scale to unit std
if self.scaler is None:
self.scaler = StandardScaler()
data = self.scaler.fit_transform(data)
else:
data = self.scaler.transform(data)
if self.pca is None:
# use minika's mle to guess appropriate dimension
self.pca = PCA(n_components='mle')
data = self.pca.fit_transform(data)
else:
data = self.pca.transform(data)
return data
开发者ID:joshloyal,项目名称:statlearn,代码行数:26,代码来源:logreg.py
示例3: pca_plot
def pca_plot(fp_list, clusters):
np_fps = []
for fp in fp_list:
arr = numpy.zeros((1,))
DataStructs.ConvertToNumpyArray(fp, arr)
np_fps.append(arr)
pca = PCA(n_components=3)
pca.fit(np_fps)
np_fps_r = pca.transform(np_fps)
p1 = figure(x_axis_label="PC1",
y_axis_label="PC2",
title="PCA clustering of PAINS")
p2 = figure(x_axis_label="PC2",
y_axis_label="PC3",
title="PCA clustering of PAINS")
color_vector = ["blue", "red", "green", "orange", "pink", "cyan", "magenta",
"brown", "purple"]
print len(set(clusters))
for clust_num in set(clusters):
print clust_num
local_cluster = []
for i in xrange(len(clusters)):
if clusters[i] == clust_num:
local_cluster.append(np_fps_r[i])
print len(local_cluster)
p1.scatter(np_fps_r[:,0], np_fps_r[:,1],
color=color_vector[clust_num])
p2.scatter(np_fps_r[:,1], np_fps_r[:,2],
color=color_vector[clust_num])
return HBox(p1, p2)
开发者ID:dkdeconti,项目名称:PAINS-train,代码行数:30,代码来源:hclust_PAINS.py
示例4: calc_pca
def calc_pca(bnd, npc=None, preaverage=False, use_unbiased=False, \
method='mdp'):
'''
Parameters
----------
bnd : BinnedData
binned data
npc : int or None, optional
number of PCs to calculate, defaults to None
preaverage : bool
average across repeats?
Returns
-------
score : ndarray
(npc, nobs)
weight : ndarray
(npc, nvar)
'''
assert method in ['mdp', 'skl']
data = format_for_fa(bnd, preaverage=preaverage,
use_unbiased=use_unbiased)
if method == 'mdp':
pca_node = mdp.nodes.PCANode(output_dim=npc)
score = pca_node.execute(data)
weight = pca_node.get_projmatrix()
elif method == 'skl':
pca_obj = PCA(n_components=npc)
score = pca_obj.fit(data).transform(data)
weight = pca_obj.components_.T
return score.T, weight.T
开发者ID:amcmorl,项目名称:motorlab,代码行数:31,代码来源:factors.py
示例5: pca
def pca(target, control, title, name_one, name_two):
np_fps = []
for fp in target + control:
arr = numpy.zeros((1,))
DataStructs.ConvertToNumpyArray(fp, arr)
np_fps.append(arr)
ys_fit = [1] * len(target) + [0] * len(control)
names = ["PAINS", "Control"]
pca = PCA(n_components=3)
pca.fit(np_fps)
np_fps_r = pca.transform(np_fps)
p1 = figure(x_axis_label="PC1",
y_axis_label="PC2",
title=title)
p1.scatter(np_fps_r[:len(target), 0], np_fps_r[:len(target), 1],
color="blue", legend=name_one)
p1.scatter(np_fps_r[len(target):, 0], np_fps_r[len(target):, 1],
color="red", legend=name_two)
p2 = figure(x_axis_label="PC2",
y_axis_label="PC3",
title=title)
p2.scatter(np_fps_r[:len(target), 1], np_fps_r[:len(target), 2],
color="blue", legend=name_one)
p2.scatter(np_fps_r[len(target):, 1], np_fps_r[len(target):, 2],
color="red", legend=name_two)
return HBox(p1, p2)
开发者ID:dkdeconti,项目名称:PAINS-train,代码行数:26,代码来源:pca_plots_on_fp.py
示例6: pca
def pca(tx, ty, rx, ry):
compressor = PCA(n_components = tx[1].size/2)
compressor.fit(tx, y=ty)
newtx = compressor.transform(tx)
newrx = compressor.transform(rx)
em(newtx, ty, newrx, ry, add="wPCAtr", times=10)
km(newtx, ty, newrx, ry, add="wPCAtr", times=10)
nn(newtx, ty, newrx, ry, add="wPCAr")
开发者ID:iRapha,项目名称:Machine-Learning,代码行数:8,代码来源:analysis.py
示例7: pca
def pca(tx, ty, rx, ry):
print "pca"
compressor = PCA(n_components = tx[1].size/2)
compressor.fit(tx, y=ty)
newtx = compressor.transform(tx)
newrx = compressor.transform(rx)
em(newtx, ty, newrx, ry, add="wPCAtr")
km(newtx, ty, newrx, ry, add="wPCAtr")
nn(newtx, ty, newrx, ry, add="wPCAtr")
print "pca done"
开发者ID:jessrosenfield,项目名称:unsupervised-learning,代码行数:10,代码来源:old.py
示例8: PCA
def PCA佮SVM模型(self, 問題, 答案):
sample_weight_constant = np.ones(len(問題))
clf = svm.SVC(C=1)
pca = PCA(n_components=100)
# clf = svm.NuSVC()
print('訓練PCA')
pca.fit(問題)
print('訓練SVM')
clf.fit(pca.transform(問題), 答案, sample_weight=sample_weight_constant)
print('訓練了')
return lambda 問:clf.predict(pca.transform(問))
开发者ID:sih4sing5hong5,项目名称:huan1-ik8_gian2-kiu3,代码行数:11,代码来源:訓練模型.py
示例9: train_pca
def train_pca(pains_fps, num_components=3):
'''
Dimensional reduction of fps bit vectors to principal components
:param pains_fps:
:return: pca reduced fingerprints bit vectors
'''
np_fps = []
for fp in pains_fps:
arr = numpy.zeros((1,))
DataStructs.ConvertToNumpyArray(fp, arr)
np_fps.append(arr)
pca = PCA(n_components=num_components)
pca.fit(np_fps)
fps_reduced = pca.transform(np_fps)
return fps_reduced
开发者ID:dkdeconti,项目名称:PAINS-train,代码行数:15,代码来源:kmeans_clustering_of_pca_reduction.py
示例10: classify_for_benchmark
def classify_for_benchmark(data_set_df, user_info_df, features, label='gender', classifier=None, num=None):
instance_num = len(data_set_df.columns)
x = data_set_df.loc[features]
x = x.dropna(how='all', axis=0)
x = x.dropna(how='all', axis=1)
imp = Imputer(missing_values='NaN', strategy='most_frequent', axis=1)
x_replaced = x.replace([np.inf, -np.inf], np.nan)
x_imp = imp.transform(x_replaced)
y = user_info_df.get(label)
y_filtered = y[(map(int, x.columns.values))]
clf = nb.BernoulliNB() if classifier is None else classifier
cv_num = min(len(y_filtered), 10)
if cv_num <= 1 or len(y_filtered.unique()) <= 1:
return 0.0, 100.0
else:
final_score = 0.0
for i in range(100):
score = 0.0
cnt = 0
skf = StratifiedKFold(y_filtered, n_folds=cv_num, shuffle=True)
for tr_index, te_index in skf:
x_train, x_test = x_imp.T[tr_index], x_imp.T[te_index]
y_train, y_test = y_filtered.iloc[tr_index], y_filtered.iloc[te_index]
min_num = min(len(x_train), len(x_train.T), len(x_test), len(x_test.T), num)
pca = PCA(min_num)
x_train = pca.fit_transform(x_train)
x_test = pca.fit_transform(x_test)
try:
clf.fit(x_train, y_train)
score += clf.score(x_test, y_test)
cnt += 1
# cv_score = cross_validation.cross_val_score(clf, x_imp.T, y_filtered, cv=cv_num)
except ValueError:
traceback.print_exc()
print i, "why error? skip!"
if cnt > 0:
score /= cnt
print i, score
else:
return 0.0, (float(instance_num - len(y_filtered)) / instance_num)
final_score += score
final_score /= 100
miss_clf_rate = (float(instance_num - len(y_filtered)) / instance_num)
return final_score, miss_clf_rate
开发者ID:heevery,项目名称:ohp,代码行数:48,代码来源:classifier.py
示例11: reduction
def reduction(data, params):
# parse parameters
for item in params:
if isinstance(params[item], str):
exec(item+'='+'"'+params[item]+'"')
else:
exec(item+'='+str(params[item]))
# apply PCA
pca = PCA(n_components=n_components)
pca.fit(data)
X = pca.transform(data)
return X
开发者ID:emilleishida,项目名称:MLSNeSpectra,代码行数:17,代码来源:pca.py
示例12: pca_no_labels
def pca_no_labels(target, title="PCA clustering of PAINS", color="blue"):
np_fps = []
for fp in target:
arr = numpy.zeros((1,))
DataStructs.ConvertToNumpyArray(fp, arr)
np_fps.append(arr)
pca = PCA(n_components=3)
pca.fit(np_fps)
np_fps_r = pca.transform(np_fps)
p3 = figure(x_axis_label="PC1",
y_axis_label="PC2",
title=title)
p3.scatter(np_fps_r[:, 0], np_fps_r[:, 1], color=color)
p4 = figure(x_axis_label="PC2",
y_axis_label="PC3",
title=title)
p4.scatter(np_fps_r[:, 1], np_fps_r[:, 2], color=color)
return HBox(p3, p4)
开发者ID:dkdeconti,项目名称:PAINS-train,代码行数:18,代码来源:pca_plots_on_fp.py
示例13: airline_pca
def airline_pca():
X = np.array(pca_data)
pca = PCA(n_components=3)
pca.fit(X)
Y=pca.transform(normalize(X))
fig = plt.figure(1, figsize=(8, 6))
ax = Axes3D(fig, elev=-150, azim=110)
colordict = {carrier:i for i,carrier in enumerate(major_carriers)}
pointcolors = [colordict[carrier] for carrier in target_carrier]
ax.scatter(Y[:, 0], Y[:, 1], Y[:, 2], c=pointcolors)
ax.set_title("First three PCA directions")
ax.set_xlabel("1st eigenvector")
ax.w_xaxis.set_ticklabels([])
ax.set_ylabel("2nd eigenvector")
ax.w_yaxis.set_ticklabels([])
ax.set_zlabel("3rd eigenvector")
ax.w_zaxis.set_ticklabels([])
开发者ID:reedharder,项目名称:airline_network_games,代码行数:18,代码来源:market_carrier_analysis.py
示例14: test_pipeline_transform
def test_pipeline_transform():
# Test whether pipeline works with a transformer at the end.
# Also test pipline.transform and pipeline.inverse_transform
iris = load_iris()
X = iris.data
pca = PCA(n_components=2)
pipeline = Pipeline([('pca', pca)])
# test transform and fit_transform:
X_trans = pipeline.fit(X).transform(X)
X_trans2 = pipeline.fit_transform(X)
X_trans3 = pca.fit_transform(X)
assert_array_almost_equal(X_trans, X_trans2)
assert_array_almost_equal(X_trans, X_trans3)
X_back = pipeline.inverse_transform(X_trans)
X_back2 = pca.inverse_transform(X_trans)
assert_array_almost_equal(X_back, X_back2)
开发者ID:PepGardiola,项目名称:scikit-learn,代码行数:18,代码来源:test_pipeline.py
示例15: do_train_with_freq
def do_train_with_freq():
tf_mix = TrainFiles(train_path = train_path_mix, labels_file = labels_file, test_size = 0.)
tf_freq = TrainFiles(train_path = train_path_freq, labels_file = labels_file, test_size = 0.)
X_m, Y_m, _, _ = tf_mix.prepare_inputs()
X_f, Y_f, _, _ = tf_freq.prepare_inputs()
X = np.c_[X_m, X_f]
Y = Y_f
X, Xt, Y, Yt = train_test_split(X, Y, test_size = 0.1)
sl = SKSupervisedLearning(SVC, X, Y, Xt, Yt)
sl.fit_standard_scaler()
pca = PCA(250)
pca.fit(np.r_[sl.X_train_scaled, sl.X_test_scaled])
X_pca = pca.transform(sl.X_train_scaled)
X_pca_test = pca.transform(sl.X_test_scaled)
#sl.train_params = {'C': 100, 'gamma': 0.0001, 'probability' : True}
#print "Start SVM: ", time_now_str()
#sl_ll_trn, sl_ll_tst = sl.fit_and_validate()
#print "Finish Svm: ", time_now_str()
##construct a dataset for RBM
#X_rbm = X[:, 257:]
#Xt_rbm = X[:, 257:]
#rng = np.random.RandomState(123)
#rbm = RBM(X_rbm, n_visible=X_rbm.shape[1], n_hidden=X_rbm.shape[1]/4, numpy_rng=rng)
#pretrain_lr = 0.1
#k = 2
#pretraining_epochs = 200
#for epoch in xrange(pretraining_epochs):
# rbm.contrastive_divergence(lr=pretrain_lr, k=k)
# cost = rbm.get_reconstruction_cross_entropy()
# print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost
trndata, tstdata = createDataSets(X_pca, Y, X_pca_test, Yt)
fnn = train(trndata, tstdata, epochs = 1000, test_error = 0.025, momentum = 0.2, weight_decay = 0.0001)
开发者ID:CyberIntelMafia,项目名称:KaggleMalware,代码行数:42,代码来源:train_nn.py
示例16: showDataTable
def showDataTable():
title = "Descriptive statistics"
df = frame[cols]
data_dsc = df.describe().transpose()
# dsc = df.describe()
pca = PCA(n_components=5)
pca.fit(df)
pc = pca.explained_variance_ratio_
data_corr = df.corr()
eigenValues, eigenVectors = LA.eig(data_corr)
idx = eigenValues.argsort()[::-1]
# print sorted(eigenValues, key=int, reverse=True)
print eigenValues.argsort()[::-1]
print eigenValues.argsort()
eigenValues = pd.DataFrame(eigenValues[idx]).transpose()
eigenVectors = pd.DataFrame(eigenVectors[:, idx])
return render_template("showDataTable.html", title=title, data=df, data_dsc=data_dsc, pca=pd.DataFrame(pc).transpose(),data_corr=data_corr, w=eigenValues, v=eigenVectors)
开发者ID:ashutosh0889,项目名称:FlaskApp,代码行数:20,代码来源:__init__.py
示例17: pca_prefit
def pca_prefit(weights, xs):
"""
SOMの初期値を計算するための前処理.
線形変換によって重みベクトル列の主成分とその固有値を入力ベクトル列のものと一致させる.
:param weights: 初期重みベクトル列
:param xs: 入力ベクトル列
:return: 前処理した重みベクトル列
"""
n = np.shape(xs)[1]
pca_w = PCA(n_components=n)
pca_w.fit(weights)
pca_x = PCA(n_components=n)
pca_x.fit(xs)
mean_w = np.mean(weights, axis=0)
mean_x = np.mean(xs, axis=0)
com_w = pca_w.components_
com_x = pca_x.components_
var_w = pca_w.explained_variance_
var_x = pca_x.explained_variance_
var_w[var_w == 0] = np.max(var_w) * 1e-6
new_w = (weights - mean_w).dot(com_w.T) / np.sqrt(var_w)
new_w = (new_w * np.sqrt(var_x)).dot(com_x) + mean_x
return new_w
开发者ID:kzm4269,项目名称:self-organizing-map,代码行数:26,代码来源:prefit.py
示例18: plot_similarity_clusters
def plot_similarity_clusters(desc1, desc2, files, plot = None):
"""
find similar sounds using Affinity Propagation clusters
:param desc1: first descriptor values
:param desc2: second descriptor values
:returns:
- euclidean_labels: labels of clusters
"""
if plot == True:
print((Fore.MAGENTA + "Clustering"))
else:
pass
min_max = preprocessing.scale(np.vstack((desc1,desc2)).T, with_mean=False, with_std=False)
pca = PCA(n_components=2, whiten=True)
y = pca.fit(min_max).transform(min_max)
euclidean = AffinityPropagation(convergence_iter=1800, affinity='euclidean')
euclidean_labels= euclidean.fit_predict(y)
if plot == True:
time.sleep(5)
print((Fore.WHITE + "Cada número representa el grupo al que pertence el sonido como ejemplar de otro/s. El grupo '0' esta coloreado en azul, el grupo '1' esta coloreado en rojo, el grupo '2' esta coloreado en amarillo. Observa el ploteo para ver qué sonidos son ejemplares de otros"))
print(np.vstack((euclidean_labels,files)).T)
time.sleep(6)
plt.scatter(y[euclidean_labels==0,0], y[euclidean_labels==0,1], c='b')
plt.scatter(y[euclidean_labels==1,0], y[euclidean_labels==1,1], c='r')
plt.scatter(y[euclidean_labels==2,0], y[euclidean_labels==2,1], c='y')
plt.scatter(y[euclidean_labels==3,0], y[euclidean_labels==3,1], c='g')
plt.show()
else:
pass
return euclidean_labels
开发者ID:MarsCrop,项目名称:apicultor,代码行数:40,代码来源:SoundSimilarity.py
示例19: calc_pcs_variance_explained
def calc_pcs_variance_explained(bnd, preaverage=False,
use_unbiased=False, method='skl'):
'''
Parameters
----------
bnd : BinnedData
binned data
preaverage : bool
average across repeats?
use_unbiased : False
use the unbiased spike rates calculated using Rob Kass's
spike rate method
'''
assert type(method) == str
data = format_for_fa(bnd, preaverage=preaverage,
use_unbiased=use_unbiased)
if method == 'skl':
pca_obj = PCA()
score = pca_obj.fit(data)
return pca_obj.explained_variance_ratio_
else:
raise ValueError('method %s not implemented' % method)
开发者ID:amcmorl,项目名称:motorlab,代码行数:24,代码来源:factors.py
示例20: _preprocess_data
def _preprocess_data(self, data):
# center data and scale to unit std
if self.scaler is None:
self.scaler = StandardScaler()
data = self.scaler.fit_transform(data)
else:
data = self.scaler.transform(data)
if self.pca is None:
# use minika's mle to guess appropriate dimension
self.pca = PCA(n_components='mle')
data = self.pca.fit_transform(data)
else:
data = self.pca.transform(data)
return data
开发者ID:joshloyal,项目名称:statlearn,代码行数:16,代码来源:logreg.py
注:本文中的sklearn.decomposition.pca.PCA类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论