本文整理汇总了Python中sklearn.decomposition.PCA类的典型用法代码示例。如果您正苦于以下问题:Python PCA类的具体用法?Python PCA怎么用?Python PCA使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了PCA类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: pcafunction
def pcafunction(dataList,countList,nameList):
from sklearn.decomposition import PCA
import pylab as pl
pcadataArray = np.array(dataList)
pcaCountArray = np.array(countList)
pca = PCA(n_components=2)
X = pca.fit(pcadataArray).transform(pcadataArray)
pcaNameList = []
for i in range(0,len(nameList)):
if nameList[i] not in pcaNameList:
pcaNameList.append(nameList[i])
print('explained variance ratio (first two components): %s'
% str(pca.explained_variance_ratio_))
plt.plot(X[pcaCountArray == 0, 0], X[pcaCountArray == 0, 1], 'or',
X[pcaCountArray == 1, 0], X[pcaCountArray == 1, 1], '^b',
X[pcaCountArray == 2, 0], X[pcaCountArray == 2, 1], 'sg'
)
plt.xlabel('PC1 (explained variance ratio: ' + str(pca.explained_variance_ratio_[0])+')',fontsize=14)
plt.ylabel('PC2 (explained variance ratio: ' + str(pca.explained_variance_ratio_[1])+')',fontsize=14)
plt.legend((str(pcaNameList[0]),str(pcaNameList[1])),loc='best',fontsize=14)
plt.title('PCA',fontsize=16)
开发者ID:shiorisio,项目名称:UA,代码行数:26,代码来源:uapca.py
示例2: add_tsne_features
def add_tsne_features(x_train, x_test):
print('add_tsne_features <<')
x_train_data = x_train.data_
x_test_data = x_test.data_
x = np.vstack((x_train_data, x_test_data))
print('applying pca...')
pca = PCA(n_components=25)
x_pca = pca.fit_transform(x)
print('applying t-SNE...')
tsne_model = TSNE(n_components=2, random_state=0)
x_tsne = tsne_model.fit_transform(x_pca)
x_train_data = np.hstack((x_train_data, x_tsne[:x_train_data.shape[0], :]))
x_test_data = np.hstack((x_test_data, x_tsne[-x_test_data.shape[0]:, :]))
assert(x_train.columns_ == x_test.columns_)
columns = x_train.columns_ + ['tsne_1', 'tsne_2']
x_train = DataSet(x_train.ids_, columns, x_train_data)
x_test = DataSet(x_test.ids_, columns, x_test_data)
print('add_tsne_features >>')
return x_train, x_test
开发者ID:kaluzhny,项目名称:airbnb,代码行数:25,代码来源:features.py
示例3: test_feature_union_weights
def test_feature_union_weights():
# test feature union with transformer weights
iris = load_iris()
X = iris.data
y = iris.target
pca = PCA(n_components=2, svd_solver='randomized', random_state=0)
select = SelectKBest(k=1)
# test using fit followed by transform
fs = FeatureUnion([("pca", pca), ("select", select)],
transformer_weights={"pca": 10})
fs.fit(X, y)
X_transformed = fs.transform(X)
# test using fit_transform
fs = FeatureUnion([("pca", pca), ("select", select)],
transformer_weights={"pca": 10})
X_fit_transformed = fs.fit_transform(X, y)
# test it works with transformers missing fit_transform
fs = FeatureUnion([("mock", Transf()), ("pca", pca), ("select", select)],
transformer_weights={"mock": 10})
X_fit_transformed_wo_method = fs.fit_transform(X, y)
# check against expected result
# We use a different pca object to control the random_state stream
assert_array_almost_equal(X_transformed[:, :-1], 10 * pca.fit_transform(X))
assert_array_equal(X_transformed[:, -1],
select.fit_transform(X, y).ravel())
assert_array_almost_equal(X_fit_transformed[:, :-1],
10 * pca.fit_transform(X))
assert_array_equal(X_fit_transformed[:, -1],
select.fit_transform(X, y).ravel())
assert_equal(X_fit_transformed_wo_method.shape, (X.shape[0], 7))
开发者ID:dsquareindia,项目名称:scikit-learn,代码行数:31,代码来源:test_pipeline.py
示例4: scikit_pca
def scikit_pca(model, rel_wds, plot_lims, title, cluster="kmeans"):
"""
Given a word2vec model and a cluster (choice of "kmeans" or "spectral")
Make a plot of all word-vectors in the model.
"""
X, keys = make_data_matrix(model)
for i, key in enumerate(keys):
X[i,] = model[key]
if cluster == "kmeans":
k_means = KMeans(n_clusters=8)
labels = k_means.fit_predict(X)
elif cluster == "spectral":
sp_clust = SpectralClustering()
labels = sp_clust.fit_predict(X)
# PCA
X_std = StandardScaler().fit_transform(X)
sklearn_pca = PCA(n_components=2)
X_transf = sklearn_pca.fit_transform(X_std)
scatter_plot(X_transf[:,0], X_transf[:,1], rel_wds, labels, title, keys, plot_lims)
return sklearn_pca.explained_variance_ratio_
开发者ID:quinngroup,项目名称:sm_w2v,代码行数:26,代码来源:plot_utils.py
示例5: __init__
def __init__(self):
super(RegressionDriver, self).__init__()
if REGRESSOR == "LOG":
self.driver = LogisticRegression()
elif REGRESSOR == "RFR":
self.driver = RandomForestRegressor(n_estimators=N_ESTIMATORS, n_jobs=N_JOBS)
elif REGRESSOR == "GBR":
self.driver = GradientBoostingClassifier(n_estimators=300, max_depth=5, learning_rate=0.05)
elif REGRESSOR == "PCA":
self.driver = PCA(n_components=1)
else:
raise Exception("Regressor: %s not supported." % REGRESSOR)
genuineX = []
forgeryX = []
genuineY = []
forgeryY = []
# Training process
for sigs in self.train_set:
personTrain = PersonTraining(sigs)
genuine, forgery = personTrain.calc_train_set()
genuineX.extend(genuine)
forgeryX.extend(forgery)
# To adjust PCA result, 0 means genuine and 1 means forgery
genuineY = [0.0] * len(genuineX)
forgeryY = [1.0] * len(forgeryX)
trainX = genuineX + forgeryX
trainY = genuineY + forgeryY
self.driver.fit(trainX, trainY)
开发者ID:zixuan-zhang,项目名称:OpenSV,代码行数:35,代码来源:driver_for_susig.py
示例6: main
def main():
inp=np.loadtxt('../../out_files/bivar_regress.txt', usecols=(1, 2, 3))
X=inp[:,[1, 2]]
ncomp=int(sys.argv[3])
pca=PCA(n_components=ncomp)
pca.fit(X)
l=pca.transform(X)
print "Doing an \t"+str(ncomp)+"\t component PCA \n\n----------------"
#linear regression fit
res=sm.OLS(inp[:,0], l).fit()
t2_new=float(sys.argv[1])
err_t2_new=float(sys.argv[2])
#array for 1000 realisations with slope and slope error -0.0264 and 0.004
ar=np.array([(rn(-0.0264, 0.004)*rn(pca.transform([rn(t2_new, err_t2_new)]), 0.85)+rn(np.mean(inp[:,0]), 0.07))/rn(2.0, 0.3) for k in range(1000)])
print "The estimated L_max is\t "+ str(np.mean(ar))
print "The error from the PCA is\t "+ str(np.std(ar))
print "Standard error on y mean is \t "+ str(np.std(inp[:,0])/np.sqrt(len(inp[:,0])))
print "Error by bootstrapping is \t"+ str(np.std(boots(inp[:,0])))
开发者ID:sdhawan21,项目名称:bolometric_Ia,代码行数:28,代码来源:sklearn_pc.py
示例7: classification_level_SGDReg_pipeline
def classification_level_SGDReg_pipeline(classifications_DF):
X = classifications_DF.iloc[:,3:89]
#assign the target (session length) to y and convert to int
y_actual = classifications_DF.iloc[:,2:3].astype(float)
#scaling the data for feature selection
X_scaled = preprocessing.scale(X)
X_scaled_train, X_scaled_test, y_actual_train, y_actual_test = train_test_split(X_scaled, y_actual, test_size=0.5, random_state=0)
pca_selection = PCA(n_components=2)
X_features = pca_selection.fit(X_scaled_train['session_length'].values).transform(X_scaled_train)
SGDReg = SGDRegressor(alpha=0.0001)
# Do grid search over k, n_components and SVR parameters:
pipeline = Pipeline([('pca', pca_selection),('SGDReg',SGDReg)])
tuned_params = dict(pca__n_components=[5,30,40,50],
SGDReg__alpha=[0.1,0.01,0.001,0.0001,0.00001],
SGDReg__l1_ratio=[.05, .15, .5, .7, .9, .95, .99, 1],
SGDReg__penalty=['l2','l1','elasticnet'])
grid_search = GridSearchCV(pipeline, param_grid=tuned_params,scoring='mean_squared_error',cv=3,verbose=10)
grid_search.fit(X_scaled_train, y_actual_train['session_length'].values)
print(grid_search.best_estimator_)
y_true, y_pred = y_actual_test['session_length'].values,grid_search.best_estimator_.predict(X_scaled_test)
print "Mean squared error:"+str(mean_squared_error(y_true,y_pred))
pd.DataFrame(y_true, y_pred).to_csv("SGDReg_pred_true.csv")
开发者ID:vmaidel,项目名称:data-science,代码行数:30,代码来源:GZBarLength-ML.py
示例8: cluster_kmeans
def cluster_kmeans():
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
# import sklearn.decomposition.pca
limit = 10000
# X,real_labels=data_dict.get_training_set()
filepath = '/home/wenjusun/bigdata/data/adult-income/adult.data'
record_list = data_parser.parse_file_fetch_records(filepath, limit)
X = np.array(data_parser.records_to_vector(record_list, enable_label=False))
pca_estimator = PCA(n_components=1)
X=pca_estimator.fit_transform(X)
kmeans_model = KMeans(n_clusters=4).fit(X)
labels = kmeans_model.labels_
# print kmeans_model.cluster_centers_
# print labels[:100]
print len(X),len(labels)
print labels[:40]
# print array(real_labels)
# count=0
# for xLabel,eLabel in zip(X[-1],labels):
# if xLabel==eLabel:
# count +=1
#
# print "count=%d,ratio:%f" %(count,1.0*count/len(labels))
# print np.sum(labels)
plt.figure(1)
plt.scatter(X,labels)
plt.show()
开发者ID:swenker,项目名称:bigdata,代码行数:33,代码来源:study_adult_income_cluster.py
示例9: reduced_dimension
def reduced_dimension(posture):
i_user = 1
session = 1
while i_user <= 31:
currentdirectory = os.getcwd() # get the directory.
parentdirectory = os.path.abspath(currentdirectory + "/../..") # Get the parent directory(2 levels up)
path = parentdirectory + '\Output Files\Reduced Dimensional Dataset/posture-'+str(posture)+'/GenuineUser'+str(i_user)+''
if not os.path.exists(path):
os.makedirs(path)
while session <= 8:
data = np.genfromtxt("../../Output Files/E2-Genuine User-Session Split/Posture-"+str(posture)+"/GenuineUser-"+str(i_user)+"/1-"+str(i_user)+"-"+str(posture)+"-"+str(session)+".csv", dtype=float, delimiter=",")
userinformation = data[:, [0,1,2,3,4]]
sample_train = data[:, [5,6,7,8,9,10,11,13,15,16,17,18,19,20,21]]
scaler = preprocessing.MinMaxScaler().fit(sample_train)
sample_train_scaled = scaler.transform(sample_train)
pca = PCA(n_components=7)
sample_train_pca = pca.fit(sample_train_scaled).transform(sample_train_scaled)
completedata = np.column_stack((userinformation, sample_train_pca))
np.savetxt("../../Output Files/Reduced Dimensional Dataset/Posture-"+str(posture)+"/GenuineUser"+str(i_user)+"/1-"+str(i_user)+"-"+str(posture)+"-"+str(session)+".csv", completedata, delimiter=',')
session += 1
session = 1
i_user += 1
开发者ID:npalaska,项目名称:Leveraging_the_effect_of_posture_orientation_of_mobile_device_in_Touch-Dynamics,代码行数:29,代码来源:Reduced_dimension.py
示例10: sentence_to_vec
def sentence_to_vec(sentence_list: List[Sentence], embedding_size: int, a: float=1e-3):
sentence_set = []
for sentence in sentence_list:
vs = np.zeros(embedding_size) # add all word2vec values into one vector for the sentence
sentence_length = sentence.len()
for word in sentence.word_list:
a_value = a / (a + get_word_frequency(word.text)) # smooth inverse frequency, SIF
vs = np.add(vs, np.multiply(a_value, word.vector)) # vs += sif * word_vector
vs = np.divide(vs, sentence_length) # weighted average
sentence_set.append(vs) # add to our existing re-calculated set of sentences
# calculate PCA of this sentence set
pca = PCA(n_components=embedding_size)
pca.fit(np.array(sentence_set))
u = pca.components_[0] # the PCA vector
u = np.multiply(u, np.transpose(u)) # u x uT
# pad the vector? (occurs if we have less sentences than embeddings_size)
if len(u) < embedding_size:
for i in range(embedding_size - len(u)):
u = np.append(u, 0) # add needed extension for multiplication below
# resulting sentence vectors, vs = vs -u x uT x vs
sentence_vecs = []
for vs in sentence_set:
sub = np.multiply(u,vs)
sentence_vecs.append(np.subtract(vs, sub))
return sentence_vecs
开发者ID:hjpwhu,项目名称:Python,代码行数:30,代码来源:semeval.py
示例11: pca
def pca(inF,MIN):
df = pd.read_table(inF, header=0)
dc = list(df.columns)
dc[0]='GeneID'
df.columns = dc
print(df.shape)
sel = ~((df.ix[:,2] < MIN) & (df.ix[:,3]< MIN) & (df.ix[:,4]< MIN) & (df.ix[:,5]< MIN) & (df.ix[:,6]< MIN) & (df.ix[:,7]< MIN) & (df.ix[:,8]< MIN) & (df.ix[:,9]< MIN))
df = df.ix[sel,:]
print(df.shape)
X = df.ix[:,2:df.shape[1]].values.T
y = df.columns[2:df.shape[1]].values
X_std = StandardScaler().fit_transform(X)
#pca = PCA(n_components=2)
pca = PCA()
Y_sklearn = pca.fit_transform(X_std)
fig = plt.figure()
plt.style.use('ggplot')
#plt.style.use('seaborn-whitegrid')
ax = fig.add_subplot(111)
for lab, col in zip(y,('red','red', 'green','green', 'blue','blue','m','m')):
ax.scatter(Y_sklearn[y==lab, 0],Y_sklearn[y==lab, 1],label=lab,c=col, s=80)
ax.set_xlabel('Principal Component 1 : %.2f'%(pca.explained_variance_ratio_[0]*100) + '%')
ax.set_ylabel('Principal Component 2 : %.2f'%(pca.explained_variance_ratio_[1]*100) + '%')
ax.legend(loc='lower right', prop={'size':8})
plt.tight_layout()
plt.savefig(inF + '-RNASeq-MIN' + str(MIN) + '.pdf')
开发者ID:jiamaozheng,项目名称:StanfordSGTC,代码行数:33,代码来源:09-pca.py
示例12: feature_scaled_nn_acc
def feature_scaled_nn_acc(mds, type):
train, validation = validation_split(mds)
# Multiply by 1 to convert to bool
y_train = train['Up'] * 1
X_train = train.drop('Up', axis=1)
y_validation = validation['Up'] * 1
X_validation = validation.drop('Up', axis=1)
pre = PCA(n_components=19, whiten=True)
X_train_pca = pre.fit_transform(X_train)
X_validation_pca = pre.fit_transform(X_validation)
model = create_model(X_train_pca.shape[1], type)
# Convert to Keras format
y_train = to_categorical(y_train.values)
y_validation = to_categorical(y_validation.values)
model.fit(X_train_pca, y_train, nb_epoch=5, batch_size=16)
time.sleep(0.1)
# Fit and guess
guess_train = model.predict_classes(X_train_pca)
guess_train = to_categorical(guess_train)
guess_validation = model.predict_classes(X_validation_pca)
guess_validation = to_categorical(guess_validation)
train_acc = accuracy_score(y_train, guess_train)
validation_acc = accuracy_score(y_validation, guess_validation)
print "\n neural net train accuracy is {}".format(train_acc)
print "\n neural net validation accuracy is {}".format(validation_acc)
return guess_validation
开发者ID:Dsinghbailey,项目名称:futures_predictor,代码行数:28,代码来源:nn.py
示例13: pca_project
def pca_project(vecs, n_components=2, whiten=False):
pca = PCA(n_components=n_components)
vecs_projected = pca.fit_transform(vecs)
print "=== PCA projection ==="
print pca.explained_variance_ratio_
print "choosen explained: %.2f" % np.sum(pca.explained_variance_ratio_[:n_components])
return vecs_projected
开发者ID:pombredanne,项目名称:ChordRipple,代码行数:7,代码来源:postprocessing_tools.py
示例14: Ploting3D
def Ploting3D(data, n_dimension=3):
pca = PCA(n_components = n_dimension)
colors = ['r', 'g', 'b', 'm']
labels = ['label_1', 'label_2', 'label_3', 'label_4']
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
idx = [0, len(data[0])]
combined = np.array(data[0])
# Combined all data
for i in xrange(1, len(data)):
combined = np.insert(combined, len(combined), data[i], axis=0)
idx.append(idx[i]+len(data[i]))
combined = pca.fit_transform(combined)
for i in xrange(len(data)):
ax.scatter(combined[idx[i]:idx[i+1], 0], combined[idx[i]:idx[i+1], 1], combined[idx[i]:idx[i+1], 2], c=colors[i], marker='o', s=70)
ax.set_xlabel('1st_component')
ax.set_ylabel('2nd_component')
ax.set_zlabel('3rd_component')
ax.set_xlim3d(-100, 100)
ax.set_ylim3d(-60, 50)
ax.set_zlim3d(-60, 50)
plt.show()
开发者ID:jhowliu,项目名称:foot,代码行数:30,代码来源:train.py
示例15: plot_2d_results
def plot_2d_results(X, y, preds):
pca = PCA(n_components=2)
X_r = pca.fit(X).transform(X)
# Plot scatter
plt.figure()
cs = "cm"
cats = [1, -1]
target_names = ["positive", "negative"]
for c, i, target_name in zip(cs, cats, target_names):
plt.scatter(X_r[y == i, 0], X_r[y == i, 1], c=c, label=target_name)
plt.legend()
plt.title("PCA of 2d data")
plt.savefig("figures/data-scatter.png")
# Plot mispredictions
plt.figure()
diff = np.array([1 if y_test[i] == preds[i] else 0 for i in range(len(y_test))])
cs = "rg"
cats = [0, 1]
target_names = ["incorrect", "correct"]
for c, i, target_name in zip(cs, cats, target_names):
plt.scatter(X_r[diff == i, 0], X_r[diff == i, 1], c=c, label=target_name)
plt.legend()
plt.title("PCA of correct/incorrect predictions")
# plt.show()
plt.savefig("figures/residual-scatter.png")
开发者ID:ataki,项目名称:epigensML,代码行数:27,代码来源:cv-train.py
示例16: __init__
class Transformer:
def __init__(self, use_PCA=True):
self._clf = DecisionTreeClassifier(min_samples_leaf=10)
self._idx = None
self._scaler = StandardScaler()
self._trans = PCA('mle')
self._use_PCA = use_PCA
def fit(self, X, y):
X = np.array(X)
self._clf.fit(X, y)
self._idx = filter(lambda x: self._clf.feature_importances_[x] > 0, \
range(len(self._clf.feature_importances_)))
new_set = [X[i][self._idx] for i in xrange(len(X))]
# new_set = self._scaler.fit_transform(new_set)
if self._use_PCA:
new_set = self._trans.fit_transform(new_set)
return new_set
def transform(self, features):
features = features[self._idx]
# features = self._scaler.transform(features.astype(float))
if self._use_PCA:
features = self._trans.transform(features)
return features
开发者ID:ItsLastDay,项目名称:Opinion-mining-from-reviews,代码行数:29,代码来源:solution.py
示例17: pca_variance
def pca_variance(df): # inputs are original data frame
df_pca = PCA()
df_pca.fit(df)
ratio = df_pca.explained_variance_ratio_
components = [('component'+str(x)) for x in range(1, (df.shape[1]+1))]
df2 = pd.Series(ratio, index = components)
return df2
开发者ID:wang1128,项目名称:ML_Hyperspectral_Data_Classication,代码行数:7,代码来源:pca.py
示例18: pca
def pca(df, n_components=2, mean_center=False, *args, **kwargs):
if not sklearn:
assert('This library depends on scikit-learn (sklearn) to perform PCA analysis')
from sklearn.decomposition import PCA
df = df.copy()
# We have to zero fill, nan errors in PCA
df[ np.isnan(df) ] = 0
if mean_center:
mean = np.mean(df.values, axis=0)
df = df - mean
pca = PCA(n_components=n_components, *args, **kwargs)
pca.fit(df.values.T)
scores = pd.DataFrame(pca.transform(df.values.T)).T
scores.index = ['Principal Component %d' % (n+1) for n in range(0, scores.shape[0])]
scores.columns = df.columns
weights = pd.DataFrame(pca.components_).T
weights.index = df.index
weights.columns = ['Weights on Principal Component %d' % (n+1) for n in range(0, weights.shape[1])]
return scores, weights
开发者ID:WMGoBuffs,项目名称:pymaxquant,代码行数:27,代码来源:analysis.py
示例19: main
def main():
x = 10
y = 10
steps = 10000
history = []
world = np.array([i for i in xrange(625)])
world.resize((25, 25))
for _ in xrange(steps):
active = getActive(world, x, y)
assert len(active) == 25, "{}, {}: {}".format(x, y, active)
history.append(active)
x, y = getNewLocation(x, y, 25, 2, False)
correlation = computeCorrelation(history)
#plt.imshow(correlation, cmap="hot", interpolation="nearest")
#plt.show()
pca = PCA(n_components=25)
pca.fit(correlation)
print 'components'
print pca.components_
#negativeMask = (pca.components_ < 0)
#pca.components_[negativeMask] = 0
print 'transform:'
transform = pca.transform(correlation)
#negativeMask = (transform < 0)
#transform[negativeMask] = 0
print transform.shape
for i in xrange(25):
plt.imshow(transform[:,i].reshape((25, 25)), cmap="hot", interpolation="nearest")
plt.show()
开发者ID:dubing12,项目名称:htmresearch,代码行数:32,代码来源:dordek.py
示例20: fit
def fit(self, x, y, i=0):
# if gaussian processes are being used, data dimensionality needs to be reduced before fitting
if self.method[i] == 'GP':
if self.reduce_dim == 'FastICA':
print('Reducing dimensionality with ICA')
do_ica = FastICA(n_components=self.n_components)
self.do_reduce_dim = do_ica.fit(x)
if self.reduce_dim == 'PCA':
print('Reducing dimensionality with PCA')
do_pca = PCA(n_components=self.n_components)
self.do_reduce_dim = do_pca.fit(x)
x = self.do_reduce_dim.transform(x)
#try:
print('Training model...')
try:
self.model.fit(x, y)
self.goodfit = True
print(self.model)
except:
self.goodfit = False
if self.method[i] == 'GP':
print('Model failed to train! (For GP this does not always indicate a problem, especially for low numbers of components.)')
pass
else:
print('Model failed to train!')
traceback.print_stack()
if self.ransac:
self.outliers = np.logical_not(self.model.inlier_mask_)
print(str(np.sum(self.outliers)) + ' outliers removed with RANSAC')
开发者ID:USGS-Astrogeology,项目名称:PySAT,代码行数:31,代码来源:regression.py
注:本文中的sklearn.decomposition.PCA类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论