本文整理汇总了Python中sklearn.pipeline.FeatureUnion类的典型用法代码示例。如果您正苦于以下问题:Python FeatureUnion类的具体用法?Python FeatureUnion怎么用?Python FeatureUnion使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了FeatureUnion类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: pca_kpca
def pca_kpca(train_data, labels):
estimators = make_union(PCA(), TruncatedSVD(), KernelPCA())
# estimators = [('linear_pca', PCA()), ('kernel_pca', KernelPCA())]
combined = FeatureUnion(estimators)
combined.fit(train_data, labels) # combined.fit_tranform(tain_data, labels)
return combined
开发者ID:kirk86,项目名称:Task-1,代码行数:7,代码来源:misc.py
示例2: test_feature_union
def test_feature_union(self):
"""Tests that combining multiple featurizers works as expected"""
modules = ["bag-of-words", "entities"]
modules_list, _ = modules_to_dictionary(modules)
feature_union = FeatureUnion(modules_list)
feature_union.fit(texts_entities, outcomes)
feature_union.transform(["unknown"])
开发者ID:cgoldammer,项目名称:simple_text_analysis,代码行数:7,代码来源:tests.py
示例3: testLogistic
def testLogistic(lbda=1.0, n_components=20, kbest=4):
# X = otto.data[:1000, :20]
# y = otto.target[:1000]
otto = load_otto()
X = otto.data[:, :]
y = otto.target[:]
# n_components = 20
# kbest = 4
# print 'y.shape =', y.shape
scalar = StandardScaler().fit(X)
X = scalar.transform(X)
pca = PCA(n_components=n_components)
selection = SelectKBest(k=kbest)
combined_features = FeatureUnion(
[("pca", pca), ('univ_select', selection)]
)
X_features = combined_features.fit(X,y).transform(X)
logistic = LogisticRegression(C=1.0/lbda)
pipe = Pipeline(steps=[('features', combined_features), ('logistic', logistic)])
trainData = X
trainTarget = y
pipe.fit(trainData, trainTarget)
# print trainTarget
test_otto = load_testotto()
testData = test_otto.data
testData = scalar.transform(testData)
# logging.debug('lambda=%.3f: score is %.3f' % (lbda, pipe.score()))
'save the prediction'
prediction = pipe.predict_proba(testData)
proba = pipe.predict_proba(testData)
save_submission(lbda, proba, prediction)
开发者ID:Turf1013,项目名称:Machine_Learning,代码行数:35,代码来源:logistic_submission.py
示例4: testSVC
def testSVC(lbda=1.0, n_components=20, kbest=4):
otto = load_otto()
X = otto.data
y = otto.target
# X = otto.data[:10000, :10]
# y = otto.target[:10000]
scaler = StandardScaler().fit(X)
X = scaler.transform(X)
pca = PCA(n_components=n_components)
selection = SelectKBest(k=kbest)
combined_features = FeatureUnion(
[("pca", pca), ("univ_select", selection)]
)
X_features = combined_features.fit(X, y).transform(X)
svc = SVC(C=1.0/lbda, kernel='rbf', cache_size=400, probability=True)
pipe = Pipeline(steps=[('features', combined_features), ('svc', svc)])
trainData = X
trainTarget = y
pipe.fit(trainData, trainTarget)
test_otto = load_testotto()
testData = test_otto.data
testData = scaler.transform(testData)
'save the prediction'
prediction = pipe.predict_proba(testData)
proba = pipe.predict_proba(testData)
save_submission(lbda, proba, prediction)
开发者ID:Turf1013,项目名称:Machine_Learning,代码行数:30,代码来源:svc_submission.py
示例5: best_estimator
def best_estimator(self, X, y):
try:
pca = PCA(n_components=2)
selection = SelectKBest(k=2)
combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])
X_features = combined_features.fit(X, y).transform(X)
regr = linear_model.LassoCV()
pipeline = Pipeline([("features", combined_features), ("regression", regr)])
if 'batter' in self.player:
param_grid = dict(features__pca__n_components=[1, 2, 3],
features__univ_select__k=[1, 2])
else:
param_grid = dict(features__pca__n_components=[1, 2,3],
features__univ_select__k=[1,2])
grid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose=100)
grid_search.fit(X, y)
self.modelled = True
regr = grid_search
return regr
except ValueError,e:
print e
self.modelled = False
return None
开发者ID:emschorsch,项目名称:fanduel,代码行数:25,代码来源:Model.py
示例6: prediction
def prediction(train_df, test_df, MODEL):
print "... start prediction"
fu_obj = FeatureUnion(transformer_list=features.feature_list)
train_X = fu_obj.fit_transform(train_df)
train_y = train_df["Sales"].as_matrix()
clf = GridSearchCV(estimator=clf_dict[MODEL]["clf"],
param_grid=clf_dict[MODEL]["paramteters"],
n_jobs=3, scoring=rmspe, verbose=1)
clf.fit(train_X, train_y)
print clf.best_score_
index_sr = pd.Series(get_split_feature_list(fu_obj), name="Feature")
if hasattr(clf.best_estimator_, "coef_"):
coef_sr = pd.Series(clf.best_estimator_.coef_, name="Coef")
coef_df = pd.concat([index_sr, coef_sr], axis=1).set_index("Feature")
coeffile = SUBMISSION + "coef_%s.csv" % MODEL
coef_df.to_csv(coeffile)
if hasattr(clf.best_estimator_, "feature_importances_"):
coef_sr = pd.Series(clf.best_estimator_.feature_importances_,
name="Importance")
coef_df = pd.concat([index_sr, coef_sr], axis=1).set_index("Feature")
coeffile = SUBMISSION + "importance_%s.csv" % MODEL
coef_df.to_csv(coeffile)
print "... start y_pred"
test_X = fu_obj.transform(test_df)
y_pred = clf.predict(test_X)
pred_sr = pd.Series(y_pred, name="Sales", index=test_df["Id"])
submissionfile = SUBMISSION + "submission_%s.csv" % MODEL
pred_sr.to_csv(submissionfile, header=True, index_label="ID")
开发者ID:guruttosekai2011,项目名称:Rossmann_Store_Sales,代码行数:34,代码来源:prediction.py
示例7: trainItalianSexClassifier
def trainItalianSexClassifier(self):
#get correct labels from dictionary in trainY and testY
trainX = self.italianTrainData[0]
trainY = self.getYlabels(self.italianTrainData[1], 'sex')
combined_features = FeatureUnion([("tfidf", TfidfVectorizer()),
("ngrams", TfidfVectorizer(ngram_range=(3, 3), analyzer="char")),
("counts", CountVectorizer()),
("latin", Latin()),
],transformer_weights={
'latin': 1,
'tfidf': 2,
'ngrams': 2,
'counts': 1,
})
X_features = combined_features.fit(trainX, trainY).transform(trainX)
classifier = svm.LinearSVC()
pipeline = Pipeline([("features", combined_features), ("classifier", classifier)])
pipeline.fit(trainX, trainY)
return pipeline
开发者ID:chrispool,项目名称:lfd,代码行数:25,代码来源:classifiers.py
示例8: fit
def fit(self, X, y=None):
Trans2 = Q2Transformer()
Trans3 = Q3Transformer()
Trans4 = Q4Transformer()
combined_features = FeatureUnion([("Q2", Trans2), ("Q3", Trans3), ("Q4", Trans4)])
self.fit = combined_features.fit(X)
return self
开发者ID:FangMath,项目名称:MachineLearning_Mini_Project,代码行数:7,代码来源:Models_ml.py
示例9: best_estimator
def best_estimator(self, X, y):
try:
pca = PCA(n_components=2)
selection = SelectKBest(k=2)
combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])
X_features = combined_features.fit(X, y).transform(X)
regr = linear_model.LassoCV()
pipeline = Pipeline([("features", combined_features), ("regression", regr)])
if 'batter' in self.player:
param_grid = dict(features__pca__n_components=[1],
features__univ_select__k=[1])
else:
param_grid = dict(features__pca__n_components=[1,2,3,4],
features__univ_select__k=[1,2,3,4])
grid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose=0)
grid_search.fit(X, y)
self.modelled = True
regr = grid_search
self.R2=r2_score(self.target_matrix,regr.predict(self.feature_matrix)) #Ian: should do R2 on predicted points vs. points on a given day
return regr
except ValueError,e:
print e
self.modelled = False
return None
开发者ID:cole-maclean,项目名称:fanduel,代码行数:26,代码来源:Model.py
示例10: rbf_kernels
def rbf_kernels(env, n_samples=100000, gamma=[0.01, 0.1], n_components=100):
"""Represent observation samples using RBF-kernels.
EXAMPLE
-------
>>> env = gym.make('MountainCar-v0')
>>> n_params, rbf = rbf_kernels(env, n_components=100)
>>> sample = env.observation_space.sample().reshape((1, env.observation_space.shape[0]))
>>> rbf(sample).shape
(1, 100)
"""
observation_examples = np.array([env.observation_space.sample() for _ in range(n_samples)])
# Fit feature scaler
scaler = sklearn.preprocessing.StandardScaler()
scaler.fit(observation_examples)
# Fir feature extractor
features = []
for g in gamma:
features.append(('gamma={}'.format(g), RBFSampler(n_components=n_components // len(gamma), gamma=g)))
features = FeatureUnion(features)
features.fit(scaler.transform(observation_examples))
def _rbf_kernels(observation):
return features.transform(scaler.transform(observation))
return _rbf_kernels
开发者ID:sotetsuk,项目名称:pyRLbook,代码行数:29,代码来源:function_approximation.py
示例11: concat_feature_extractors
def concat_feature_extractors(train_data, labels):
# This dataset is way to high-dimensional. Better do PCA:
pca = PCA(n_components = 2)
# Maybe some original features where good, too?
selection = SelectKBest(k = 1)
# Build estimator from PCA and Univariate selection:
combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])
# Use combined features to transform dataset:
X_features = combined_features.fit(train_data, labels).transform(train_data)
# Classify:
svm = SVC(kernel = "linear")
svm.fit(X_features, labels)
# Do grid search over k, n_components and C:
pipeline = Pipeline([("features", combined_features), ("svm", svm)])
param_grid = dict(features__pca__n_components = [1, 2, 3],
features__univ_select__k = [1, 2],
svm__C = [0.1, 1, 10])
grid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose = 10)
grid_search.fit(train_data, labels)
print(grid_search.best_estimator_)
开发者ID:kirk86,项目名称:Task-1,代码行数:29,代码来源:misc.py
示例12: train_model
def train_model(trainset):
word_vector = TfidfVectorizer(analyzer="word", ngram_range=(2,2), binary = False, max_features= 2000,min_df=1,decode_error="ignore")
# print word_vector
print "works fine"
char_vector = TfidfVectorizer(ngram_range=(2,3), analyzer="char", binary = False, min_df = 1, max_features = 2000,decode_error= "ignore")
vectorizer =FeatureUnion([ ("chars", char_vector),("words", word_vector) ])
corpus = []
classes = []
for item in trainset:
corpus.append(item['text'])
classes.append(item['label'])
print "Training instances : ", 0.8*len(classes)
print "Testing instances : ", 0.2*len(classes)
matrix = vectorizer.fit_transform(corpus)
print "feature count : ", len(vectorizer.get_feature_names())
print "training model"
X = matrix.toarray()
y = numpy.asarray(classes)
model =LinearSVC()
X_train, X_test, y_train, y_test= train_test_split(X,y,train_size=0.8,test_size=.2,random_state=0)
y_pred = OneVsRestClassifier(model).fit(X_train, y_train).predict(X_test)
#y_prob = OneVsRestClassifier(model).fit(X_train, y_train).decision_function(X_test)
#print y_prob
#con_matrix = []
#for row in range(len(y_prob)):
# temp = [y_pred[row]]
# for prob in y_prob[row]:
# temp.append(prob)
# con_matrix.append(temp)
#for row in con_matrix:
# output.write(str(row)+"\n")
#print y_pred
#print y_test
res1=[i for i, j in enumerate(y_pred) if j == 'anonEdited']
res2=[i for i, j in enumerate(y_test) if j == 'anonEdited']
reset=[]
for r in res1:
if y_test[r] != "anonEdited":
reset.append(y_test[r])
for r in res2:
if y_pred[r] != "anonEdited":
reset.append(y_pred[r])
output=open(sys.argv[2],"w")
for suspect in reset:
output.write(str(suspect)+"\n")
cm = confusion_matrix(y_test, y_pred)
print(cm)
pl.matshow(cm)
pl.title('Confusion matrix')
pl.colorbar()
pl.ylabel('True label')
pl.xlabel('Predicted label')
pl.show()
print accuracy_score(y_pred,y_test)
开发者ID:srini21,项目名称:Amazon-deceptive-reviews,代码行数:60,代码来源:anontesting.py
示例13: test_feature_union_feature_names
def test_feature_union_feature_names():
word_vect = CountVectorizer(analyzer="word")
char_vect = CountVectorizer(analyzer="char_wb", ngram_range=(3, 3))
ft = FeatureUnion([("chars", char_vect), ("words", word_vect)])
ft.fit(JUNK_FOOD_DOCS)
feature_names = ft.get_feature_names()
for feat in feature_names:
assert_true("chars__" in feat or "words__" in feat)
assert_equal(len(feature_names), 35)
开发者ID:Givonaldo,项目名称:scikit-learn,代码行数:9,代码来源:test_pipeline.py
示例14: convert_testdata
def convert_testdata(test_gray_data, feature_rule=f.feature_transformer_rule):
data_df = f.make_test_df(test_gray_data)
fu = FeatureUnion(transformer_list=feature_rule)
Std = preprocessing.StandardScaler()
X_test = fu.fit_transform(data_df)
#X_test = Std.fit_transform(X_test)
return X_test
开发者ID:haisland0909,项目名称:Denoising-Dirty-Documents,代码行数:10,代码来源:repredict.py
示例15: get_pca_transformer
def get_pca_transformer(train_x, train_y, n_components=-1):
if n_components == -1:
n_components = int(np.ceil(np.sqrt(train_x.shape[1])))
pca = PCA(n_components=n_components)
selection = SelectKBest(k=n_components/2)
combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])
return combined_features.fit(train_x, train_y)
开发者ID:challenging,项目名称:kaggle,代码行数:10,代码来源:pre_loader.py
示例16: fit_logreg
def fit_logreg(self):
tokenize_sense = CachedFitTransform(Pipeline([
('tokenize', Map(compose(tokenize, normalize_special, unescape))),
('normalize', MapTokens(normalize_elongations)),
]), self.memory)
features = FeatureUnion([
# ('w2v_doc', ToCorporas(Pipeline([
# ('tokenize', MapCorporas(tokenize_sense)),
# ('feature', MergeSliceCorporas(Doc2VecTransform(CachedFitTransform(Doc2Vec(
# dm=0, dbow_words=1, size=100, window=10, hs=0, negative=5, sample=1e-3, min_count=1, iter=20,
# workers=16
# ), self.memory)))),
# ]).fit([self.train_docs, self.unsup_docs[:10**6], self.val_docs, self.test_docs]))),
# ('w2v_word_avg', Pipeline([
# ('tokenize', tokenize_sense),
# ('feature', Word2VecAverage(CachedFitTransform(Word2Vec(
# sg=1, size=100, window=10, hs=0, negative=5, sample=1e-3, min_count=1, iter=20, workers=16
# ), self.memory))),
# ]).fit(self.unsup_docs[:10**6])),
# ('w2v_word_avg_google', Pipeline([
# ('tokenize', tokenize_sense),
# ('feature', Word2VecAverage(joblib.load('data/google/GoogleNews-vectors-negative300.pickle'))),
# ])),
# ('w2v_word_norm_avg', Pipeline([
# ('tokenize', tokenize_sense),
# ('feature', Word2VecNormAverage(CachedFitTransform(Word2Vec(
# sg=1, size=100, window=10, hs=0, negative=5, sample=1e-3, min_count=1, iter=20, workers=16
# ), self.memory))),
# ]).fit(self.unsup_docs[:10**6])),
('w2v_word_norm_avg_google', Pipeline([
('tokenize', tokenize_sense),
('feature', Word2VecNormAverage(joblib.load('data/google/GoogleNews-vectors-negative300.pickle'))),
])),
# ('w2v_word_max', Pipeline([
# ('tokenize', tokenize_sense),
# ('feature', Word2VecMax(CachedFitTransform(Word2Vec(
# sg=1, size=100, window=10, hs=0, negative=5, sample=1e-3, min_count=1, iter=20, workers=16
# ), self.memory))),
# ]).fit(self.unsup_docs[:10**6])),
# ('w2v_word_max_google', Pipeline([
# ('tokenize', tokenize_sense),
# ('feature', Word2VecMax(joblib.load('data/google/GoogleNews-vectors-negative300.pickle'))),
# ])),
# ('w2v_word_inv', ToCorporas(Pipeline([
# ('tokenize', MapCorporas(tokenize_sense)),
# ('feature', MergeSliceCorporas(Word2VecInverse(CachedFitTransform(Word2Vec(
# sg=1, size=100, window=10, hs=0, negative=5, sample=0, min_count=1, iter=20, workers=16
# ), self.memory)))),
# ]).fit([self.train_docs, self.unsup_docs[:10**5], self.val_docs, self.test_docs]))),
])
classifier = LogisticRegression()
with temp_log_level({'gensim.models.word2vec': logging.INFO}):
classifier.fit(features.transform(self.train_docs), self.train_labels())
estimator = Pipeline([('features', features), ('classifier', classifier)])
return 'logreg({})'.format(','.join(name for name, _ in features.transformer_list)), estimator
开发者ID:meshiguge,项目名称:senti,代码行数:55,代码来源:senti_models.py
示例17: test_feature_union
def test_feature_union():
# basic sanity check for feature union
iris = load_iris()
X = iris.data
X -= X.mean(axis=0)
y = iris.target
pca = RandomizedPCA(n_components=2, random_state=0)
select = SelectKBest(k=1)
fs = FeatureUnion([("pca", pca), ("select", select)])
fs.fit(X, y)
X_transformed = fs.transform(X)
assert_equal(X_transformed.shape, (X.shape[0], 3))
# check if it does the expected thing
assert_array_almost_equal(X_transformed[:, :-1], pca.fit_transform(X))
assert_array_equal(X_transformed[:, -1],
select.fit_transform(X, y).ravel())
# test if it also works for sparse input
# We use a different pca object to control the random_state stream
fs = FeatureUnion([("pca", pca), ("select", select)])
X_sp = sparse.csr_matrix(X)
X_sp_transformed = fs.fit_transform(X_sp, y)
assert_array_almost_equal(X_transformed, X_sp_transformed.toarray())
# test setting parameters
fs.set_params(select__k=2)
assert_equal(fs.fit_transform(X, y).shape, (X.shape[0], 4))
开发者ID:cdeil,项目名称:scikit-learn,代码行数:28,代码来源:test_pipeline.py
示例18: pca
def pca(x, y, test_x, n_features=-1):
if n_features == -1:
n_features = int(np.ceil(np.sqrt(x.shape[1])))
pca = PCA(n_components=n_features)
selection = SelectKBest(k=n_features/2)
combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])
combined_features.fit(x, y)
return combined_features.transform(x), combined_features.transform(test_x)
开发者ID:challenging,项目名称:kaggle,代码行数:11,代码来源:feature_engineering.py
示例19: cv_score
def cv_score(train_df, MODEL):
print "... start cross validation"
fu_obj = FeatureUnion(transformer_list=features.feature_list)
train_X = fu_obj.fit_transform(train_df)
train_y = train_df["Sales"].as_matrix()
clf = GridSearchCV(estimator=clf_dict[MODEL]["clf"],
param_grid=clf_dict[MODEL]["paramteters"],
n_jobs=-1, scoring=rmspe, cv=None)
print cross_val_score(clf, train_X, train_y, scoring=rmspe, cv=5, n_jobs=3)
开发者ID:guruttosekai2011,项目名称:Rossmann_Store_Sales,代码行数:11,代码来源:prediction.py
示例20: set_traindata
def set_traindata(df, key):
fu = FeatureUnion(transformer_list=f.feature_transformer_rule)
Std = preprocessing.StandardScaler()
X = fu.fit_transform(df)
y = np.concatenate(df["label"].apply(lambda x: x.flatten()))
X = Std.fit_transform(X)
return (X, y)
开发者ID:haisland0909,项目名称:Denoising-Dirty-Documents,代码行数:11,代码来源:classify.py
注:本文中的sklearn.pipeline.FeatureUnion类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论