本文整理汇总了Python中sklearn.decomposition.RandomizedPCA类的典型用法代码示例。如果您正苦于以下问题:Python RandomizedPCA类的具体用法?Python RandomizedPCA怎么用?Python RandomizedPCA使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了RandomizedPCA类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: tryLinearDiscriminantAnalysis
def tryLinearDiscriminantAnalysis(goFast):
from sklearn.datasets import dump_svmlight_file, load_svmlight_file
if goFast:
training_data, training_labels = load_svmlight_file("dt1_1500.trn.svm", n_features=253659, zero_based=True)
validation_data, validation_labels = load_svmlight_file("dt1_1500.vld.svm", n_features=253659, zero_based=True)
testing_data, testing_labels = load_svmlight_file("dt1_1500.tst.svm", n_features=253659, zero_based=True)
else:
training_data, training_labels = load_svmlight_file("dt1.trn.svm", n_features=253659, zero_based=True)
validation_data, validation_labels = load_svmlight_file("dt1.vld.svm", n_features=253659, zero_based=True)
testing_data, testing_labels = load_svmlight_file("dt1.tst.svm", n_features=253659, zero_based=True)
from sklearn.lda import LDA
from sklearn.metrics import accuracy_score
from sklearn.grid_search import ParameterGrid
from sklearn.decomposition import RandomizedPCA
rpcaDataGrid = [{"n_components": [10,45,70,100],
"iterated_power": [2, 3, 4],
"whiten": [True]}]
for rpca_parameter_set in ParameterGrid(rpcaDataGrid):
rpcaOperator = RandomizedPCA(**rpca_parameter_set)
rpcaOperator.fit(training_data,training_labels)
new_training_data = rpcaOperator.transform(training_data,training_labels)
new_validation_data = rpcaOperator.transform(validation_data,validation_labels)
ldaOperator = LDA()
ldaOperator.fit(new_training_data,training_labels)
print "Score = " + str(accuracy_score(validation_labels,ldaOperator.predict(new_validation_data)))
开发者ID:Ikram,项目名称:DUMLS14,代码行数:28,代码来源:dataset_one_learner.py
示例2: scatter
def scatter(data, labels=None, title=None, name=None):
"""2d PCA scatter plot with optional class info
Return the pca model to be able to introspect the components or transform
new data with the same model.
"""
data = atleast2d_or_csr(data)
if data.shape[1] == 2:
# No need for a PCA:
data_2d = data
else:
pca = RandomizedPCA(n_components=2)
data_2d = pca.fit_transform(data)
for i, c, m in zip(np.unique(labels), cycle(COLORS), cycle(MARKERS)):
plt.scatter(data_2d[labels == i, 0], data_2d[labels == i, 1],
c=c, marker=m, label=i, alpha=0.5)
plt.legend(loc='best')
if title is None:
title = "2D PCA scatter plot"
if name is not None:
title += " for " + name
plt.xlabel('First Principal Component')
plt.ylabel('Second Principal Component')
plt.title(title)
return pca
开发者ID:chrinide,项目名称:oglearn,代码行数:29,代码来源:visualization.py
示例3: LogisticRegressionPCA
def LogisticRegressionPCA(X, y):
# divide our data set into a training set and a test set
X_train, X_test, y_train, y_test = cross_validation.train_test_split(
X, y, test_size=TRAIN_TEST_SPLIT_RATIO)
# get randomized PCA model
num_components = 147
print("Extracting the top %d eigenfaces from %d faces"
% (num_components, X_train.shape[0]))
pca = RandomizedPCA(n_components=num_components, whiten=True).fit(X_train)
# use the PCA model on our training set and test set.
print("Projecting the input data on the eigenfaces orthonormal basis")
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)
print("done ")
h = .02 # step size in the mesh
logistic_regression = linear_model.LogisticRegression(C=1e5)
# we create an instance of Neighbours Classifier and fit the data.
logistic_regression.fit(X, y)
# print the performance of logistic regression
print("====== Logistic Regression with PCA ========")
print('TRAIN SCORE', logistic_regression.score(X_train, y_train))
print('TEST SCORE', logistic_regression.score(X_test, y_test))
开发者ID:lionheartX,项目名称:Kaggle_uoft,代码行数:29,代码来源:logistic_regression_with_PCA.py
示例4: SVM
def SVM(X, y):
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=TRAIN_TEST_SPLIT_RATIO)
print(len(X_train))
# Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled
# dataset): unsupervised feature extraction / dimensionality reduction
n_components = 150
pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train)
print("Projecting the input data on the eigenfaces orthonormal basis")
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)
print("done ")
X_train_pca = equalize_hist(X_train_pca)
preprocessing.scale(X_train_pca * 1.0, axis=1)
X_test_pca = equalize_hist(X_test_pca)
preprocessing.scale(X_test_pca * 1.0, axis=1)
# classifier = svm.SVC(kernel='poly', degree = 3)
# classifier.fit(X_train, y_train)
# # print("======",3,"========")
# print('TRAIN SCORE', classifier.score(X_train, y_train))
# print('TEST SCORE', classifier.score(X_test, y_test))
param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
classifier2 = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
classifier2.fit(X_train_pca, y_train)
# print("======",3,"========")
print('TRAIN SCORE', classifier2.score(X_train_pca, y_train))
print('TEST SCORE', classifier2.score(X_test_pca, y_test))
开发者ID:lionheartX,项目名称:Kaggle_uoft,代码行数:35,代码来源:yue.py
示例5: pca_estimator
def pca_estimator(data, targets, estimator, components_number=DEFAULT_COMPONENTS_NUMBER,
folds_number=DEFAULT_FOLDS_NUMBER):
kf = KFold(len(targets), n_folds=folds_number)
# 'scores' is numpy array. An index is a number of a fold. A value is a percent of right
# predicted samples from a test.
scores = np.zeros(folds_number)
start = time()
index = 0
for train, test in kf:
x_train, x_test, y_train, y_test = data[train], data[test], targets[train], targets[test]
pca = RandomizedPCA(n_components=components_number, whiten=True).fit(x_train)
x_train_pca = pca.transform(x_train)
x_test_pca = pca.transform(x_test)
clf = estimator.fit(x_train_pca, y_train)
scores[index] = clf.score(x_test_pca, y_test)
index += 1
# print("Iteration %d from %d has done! Score: %f" % (index, folds_number,
# scores[index - 1]))
finish = time()
return scores.mean(), scores.std() * 2, (finish - start)
开发者ID:himl,项目名称:boson,代码行数:27,代码来源:PCA.py
示例6: test_explained_variance
def test_explained_variance():
# Check that PCA output has unit-variance
rng = np.random.RandomState(0)
n_samples = 100
n_features = 80
X = rng.randn(n_samples, n_features)
pca = PCA(n_components=2).fit(X)
rpca = RandomizedPCA(n_components=2, random_state=rng).fit(X)
assert_array_almost_equal(pca.explained_variance_ratio_,
rpca.explained_variance_ratio_, 1)
# compare to empirical variances
X_pca = pca.transform(X)
assert_array_almost_equal(pca.explained_variance_,
np.var(X_pca, axis=0))
X_rpca = rpca.transform(X)
assert_array_almost_equal(rpca.explained_variance_, np.var(X_rpca, axis=0),
decimal=1)
# Same with correlated data
X = datasets.make_classification(n_samples, n_features,
n_informative=n_features-2,
random_state=rng)[0]
pca = PCA(n_components=2).fit(X)
rpca = RandomizedPCA(n_components=2, random_state=rng).fit(X)
assert_array_almost_equal(pca.explained_variance_ratio_,
rpca.explained_variance_ratio_, 5)
开发者ID:AppliedArtificialIntelligence,项目名称:scikit-learn,代码行数:31,代码来源:test_pca.py
示例7: dimentionality_reduction
def dimentionality_reduction(train_x , test_x):
print "Dimentionality reduction to 10D on training and test data...."
pca = RandomizedPCA(n_components=10)
train_x = pca.fit_transform(train_x)
test_x = pca.transform(test_x)
print "Done."
return train_x , test_x
开发者ID:shreyanshd,项目名称:Image-Classifier,代码行数:7,代码来源:image_classifier.py
示例8: reduce_features
def reduce_features(features, var_explained=0.9, n_components=0, verbose=False):
"""
Performs feature reduction using PCA. Automatically selects nr. components
for explaining min_var_explained variance.
:param features: Features.
:param var_explained: Minimal variance explained.
:param n_components: Nr. of components.
:param exclude_columns: Columns to exclude.
:param verbose: Verbosity.
:return: Reduced feature set.
"""
if n_components == 0:
# Run full PCA to estimate nr. components for explaining given
# percentage of variance.
estimator = RandomizedPCA()
estimator.fit_transform(features)
variance = 0.0
for i in range(len(estimator.explained_variance_ratio_)):
variance += estimator.explained_variance_ratio_[i]
if variance > var_explained:
n_components = i + 1
if verbose:
print('{} % of variance explained using {} components'.format(var_explained, n_components))
break
# Re-run PCA with only estimated nr. components
estimator = RandomizedPCA(n_components=n_components)
features = estimator.fit_transform(features)
return features
开发者ID:rbrecheisen,项目名称:scripts,代码行数:28,代码来源:prepare.py
示例9: do_nbnn
def do_nbnn(train_folder, test_folder):
train = load_patches(args.train_folder)
test = load_patches(args.test_folder)
if options.relu:
get_logger().info("Applying RELU")
for class_data in train:
class_data.patches = class_data.patches.clip(min=0)
for class_data in test:
class_data.patches = class_data.patches.clip(min=0)
if options.scale:
get_logger().info("Applying standardization")
scaler = StandardScaler(copy=False)
scaler.fit(np.vstack([t.patches for t in train]))
for class_data in train:
class_data.patches = scaler.transform(class_data.patches)
for class_data in test:
class_data.patches = scaler.transform(class_data.patches)
if options.pca:
get_logger().info("Calculating PCA")
pca = RandomizedPCA(n_components=options.pca)
pca.fit(np.vstack([t.patches for t in train]))
#for class_data in train:
#get_logger().info("Fitting class " + class_data.name)
#pca.partial_fit(class_data.patches)
get_logger().info("Keeping " + str(pca.explained_variance_ratio_.sum()) + " variance (" + str(options.pca) +
") components\nApplying PCA")
for class_data in train:
class_data.patches = pca.transform(class_data.patches)
for class_data in test:
class_data.patches = pca.transform(class_data.patches)
nbnn(train, test, NN_Engine())
开发者ID:enoonIT,项目名称:nbnn-nbnl,代码行数:31,代码来源:nbnn.py
示例10: build_classifier
def build_classifier(train_data_x_in, train_data_y, classifier_in="svc_basic"):
print "Attempting to build classifier."
train_data_x = train_data_x_in
transformer = ""
# classifier = grid_search.GridSearchCV(svm.SVC(), parameters).fit(train_data_x, train_data_y)
if classifier_in == "svc_basic":
classifier = svm.SVC()
print "Selection was basic svm.SVC."
elif classifier_in == "svc_extensive":
classifier = svm.SVC(kernel="linear", C=0.025, gamma=0.01)
print "Selection was extensive svm.SVC, with linear kernel, C==0.025 and gamma==0.01."
elif classifier_in == "kneighbors_basic":
transformer = RandomizedPCA(n_components=2000)
train_data_x = transformer.fit_transform(train_data_x)
classifier = KNeighborsClassifier()
print "Selection was KNeighbors basic, using RandomizedPCA to transform data first. n_components==2000."
elif classifier_in == "bagging_basic":
classifier = BaggingClassifier(KNeighborsClassifier(), max_samples=0.5, max_features=0.5)
print "Selection was Bagging basic, with max_samples==0.5 and max_features==0.5."
elif classifier_in == "spectral_basic":
transformer = SpectralEmbedding(n_components=2000)
train_data_x = transformer.fit_transform(train_data_x)
classifier = KNeighborsClassifier()
print "Selection was Spectral basic, using svm.SVC with Spectral data fitting. n_components==2000."
# default to SVC in case of any sort of parsing error.
else:
print "Error in selecting classifier class. Reverting to SVC."
classifier = svm.SVC()
classifier.fit(train_data_x, train_data_y)
print "Doing classifier estimation."
return classifier, train_data_x, transformer
开发者ID:RAMichel,项目名称:image_classifier,代码行数:31,代码来源:build_classifier.py
示例11: calc_hog
def calc_hog(fpaths, save=False):
'''
Compute histogram of gradients (HOG). Saves in batches to prevent memory issues.
Input:
fpaths : files on which HOG will be computed
save : if true, output is saved to disk
'''
hogs = np.empty((len(fpaths), 15876))
for i, fpath in enumerate(fpaths):
img = imread(os.path.join(imgdir, fpath))
if len(img.shape)==3:
img = rgb2gray(img)
# rescale so all feature vectors are the same length
img_resize = resize(img, (128, 128))
img_hog = hog(img_resize)
hogs[i, :] = img_hog
hogs_sc = scale(hogs)
n_components = 15
pca = RandomizedPCA(n_components=n_components)
hogs_decomp = pca.fit_transform(hogs_sc)
df = pd.DataFrame(hogs_decomp, index=[os.path.split(i)[1] for i in fpaths])
df.index.name='fpath'
df.columns = ['feat_hog_%2.2u' % i for i in range(1, n_components+1)]
if save: df.to_csv('hog.csv')
return df
开发者ID:r-b-g-b,项目名称:AY250_HW,代码行数:31,代码来源:calcFeatures.py
示例12: fit
def fit(self):
wordids_map = NameToIndex()
labs_map = NameToIndex()
wordscount = self._word_cluster.get_words_count()
print "start compute_tfidf ..."
#计算文档的词袋模型
docs = self._word_cluster.get_samples()
count =0
bow = []
labs = []
for k,v in docs.iteritems():
vec = numpy.zeros(wordscount).tolist()
for i in v:
vec[wordids_map.map(i)]+=1
bow.append(vec)
labs.append(labs_map.map(k[0]))
labs = numpy.array(labs)
tfidf = TfidfTransformer(smooth_idf=True, sublinear_tf=True,use_idf=True)
datas = numpy.array(tfidf.fit_transform(bow).toarray())
print "compute_tfidf done"
pca = RandomizedPCA(n_components=20, whiten=True).fit(datas)
svc = train_svc(numpy.array(labs_map.names), labs, pca.transform(datas))
self._tfidf = tfidf
self._svc = svc
self._labs_map = labs_map
self._wordids_map = wordids_map
self._pca = pca
开发者ID:caoym,项目名称:odr,代码行数:34,代码来源:odr.py
示例13: test_randomized_pca_check_list
def test_randomized_pca_check_list():
"""Test that the projection by RandomizedPCA on list data is correct"""
X = [[1.0, 0.0], [0.0, 1.0]]
X_transformed = RandomizedPCA(n_components=1, random_state=0).fit(X).transform(X)
assert_equal(X_transformed.shape, (2, 1))
assert_almost_equal(X_transformed.mean(), 0.00, 2)
assert_almost_equal(X_transformed.std(), 0.71, 2)
开发者ID:Garrett-R,项目名称:scikit-learn,代码行数:7,代码来源:test_pca.py
示例14: pca_data
def pca_data(test_x, train_x, params):
print 'pcaing data ...'
components = int(params['components'])
pca = RandomizedPCA(components, whiten=True).fit(train_x)
pca_train_x = pca.transform(train_x)
pca_test_x = pca.transform(test_x)
return pca_test_x, pca_train_x
开发者ID:123fengye741,项目名称:FaceRetrieval,代码行数:7,代码来源:pre_process.py
示例15: compute_pca
def compute_pca(reception_stats,n_components=5):
reception_mean = reception_stats.mean(axis=0)
pca = RandomizedPCA(n_components-1)
pca.fit(reception_stats)
pca_components = np.vstack([reception_mean,pca.components_])
return pca,pca_components
开发者ID:AndrewRook,项目名称:phdfootball,代码行数:7,代码来源:wr_pca.py
示例16: test_sparse_randomized_pca_inverse
def test_sparse_randomized_pca_inverse():
"""Test that RandomizedPCA is inversible on sparse data"""
rng = np.random.RandomState(0)
n, p = 50, 3
X = rng.randn(n, p) # spherical data
X[:, 1] *= .00001 # make middle component relatively small
# no large means because the sparse version of randomized pca does not do
# centering to avoid breaking the sparsity
X = csr_matrix(X)
# same check that we can find the original data from the transformed signal
# (since the data is almost of rank n_components)
pca = RandomizedPCA(n_components=2, random_state=0)
assert_warns(DeprecationWarning, pca.fit, X)
Y = pca.transform(X)
Y_inverse = pca.inverse_transform(Y)
assert_almost_equal(X.todense(), Y_inverse, decimal=2)
# same as above with whitening (approximate reconstruction)
pca = assert_warns(DeprecationWarning, RandomizedPCA(n_components=2,
whiten=True, random_state=0).fit, X)
Y = pca.transform(X)
Y_inverse = pca.inverse_transform(Y)
relative_max_delta = (np.abs(X.todense() - Y_inverse)
/ np.abs(X).mean()).max()
# XXX: this does not seam to work as expected:
assert_almost_equal(relative_max_delta, 0.91, decimal=2)
开发者ID:Adrellias,项目名称:scikit-learn,代码行数:28,代码来源:test_pca.py
示例17: detect
def detect(self, imageURLs, params):
array = []
for param in params:
img = self.img_to_matrix(param['imageURL'])
data = self.flatten_image(img)
array.append(data)
array = np.array(array)
pca = RandomizedPCA(n_components=5)
n_data = pca.fit_transform(array)
clf = joblib.load('src/resource/models/model.pkl')
result = clf.predict(n_data).tolist()
for param, r in zip(params, result):
raw_img = urllib2.urlopen(param['imageURL']).read()
if r == 1:
cntr = len([i for i in os.listdir("test/images/rain/") if 'rain' in i]) + 1
path = "static/images/rain_" + str(cntr) + '.jpg'
f = open(path, 'wb')
f.write(raw_img)
f.close()
# イベント情報作成
when = {'type': 'timestamp', 'time':param['time']}
where = { "type": "Point", "coordinates": [param['longitude'], param['latitude']]}
what = {'topic': {'value':u'雨'}, 'tweet': param['value']}
who = [{"type": "url", "value": param['imageURL']},
{"value": "evwh <[email protected]>", "type": "author"}]
event = {'observation':{'what': what, 'when': when, 'where': where, 'who': who}}
self.connection['event']['TwitterImageRainSensor'].insert(event)
开发者ID:bulbulpaul,项目名称:image-detector,代码行数:31,代码来源:detect.py
示例18: SVM
def SVM(X_data, y_data):
X_data = equalize_hist(X_data)
preprocessing.normalize(X_data, 'max')
preprocessing.scale(X_data, axis=1)
# preprocessing.normalize(X_data, 'max')
# X_data = equalize_hist(X_data)
# divide our data set into a training set and a test set
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X_data, y_data, test_size=TRAIN_TEST_SPLIT_RATIO)
n_components = 120
print("Extracting the top %d eigenfaces from %d faces"
% (n_components, X_train.shape[0]))
pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train)
print("Projecting the input data on the eigenfaces orthonormal basis")
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)
print("done ")
param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
classifier = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
classifier.fit(X_train_pca, y_train)
print("====== PCA 150 ========")
print('TRAIN SCORE', classifier.score(X_train_pca, y_train))
print('TEST SCORE', classifier.score(X_test_pca, y_test))
开发者ID:lionheartX,项目名称:Kaggle_uoft,代码行数:32,代码来源:SVM_best.py
示例19: SVM
def SVM(X_train, y_train, X_test):
print("SVM with PCA of rbf, writening all on, no normalize")
preprocessing.normalize(X_train, 'max')
preprocessing.normalize(X_test, 'max')
#preprocessing.robust_scale(X, axis=1, with_centering = True) #bad
X_train = equalize_hist(X_train)
X_test = equalize_hist(X_test)
'''X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=TRAIN_TEST_SPLIT_RATIO)'''
n_components = 147
print("Extracting the top %d eigenfaces from %d faces"
% (n_components, X_train.shape[0]))
pca = RandomizedPCA(n_components=n_components, whiten=False).fit(X_train)
print("Projecting the input data on the eigenfaces orthonormal basis")
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)
print("done ")
param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
classifier13 = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
classifier13.fit(X_train_pca, y_train)
return list(classifier13.predict(X_test_pca))
开发者ID:lionheartX,项目名称:Kaggle_uoft,代码行数:25,代码来源:SVM_filter.py
示例20: rpca
def rpca(numpy_file='../data/Paintings/two_class/Paintings_train.csv'):
""" Performs randomized PCA on given numpy file.
Given a numpy file of n-rows and n-cols, where the last column is
the label and rest are features,n-rows are the samples.
:type numpy_file: string
:param numpy_file: The file name of numpy file to be analyzed.
"""
import numpy as np
import matplotlib.pyplot as pl
import pandas as pd
from sklearn.decomposition import RandomizedPCA
all_data = np.loadtxt(numpy_file,delimiter=',')
data = all_data[:,:-1]
y = all_data[:,-1]
pca = RandomizedPCA(n_components=2)
X = pca.fit_transform(data)
df = pd.DataFrame({"x": X[:, 0], "y": X[:, 1],\
"label":np.where(y==1, "realism", "abstract")})
colors = ["red", "yellow"]
for label, color in zip(df['label'].unique(), colors):
mask = df['label']==label
pl.scatter(df[mask]['x'], df[mask]['y'], c=color, label=label)
pl.legend()
pl.title('Randomized PCA analysis')
pl.show()
开发者ID:abhishekraok,项目名称:promising-patterns,代码行数:28,代码来源:utils_abhi.py
注:本文中的sklearn.decomposition.RandomizedPCA类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论