本文整理汇总了Python中sklearn.pipeline.make_pipeline函数的典型用法代码示例。如果您正苦于以下问题:Python make_pipeline函数的具体用法?Python make_pipeline怎么用?Python make_pipeline使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了make_pipeline函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: main
def main(met_fname, gday_outfname, var):
# Load met data
s = remove_comments_from_header(met_fname)
df_met = pd.read_csv(s, parse_dates=[[0,1]], skiprows=4, index_col=0,
sep=",", keep_date_col=True,
date_parser=date_converter)
# Need to build numpy array, so drop year, doy cols
met_data = df_met.ix[:,2:].values
met_data_train = df_met.ix[0:4000,2:].values
# Load GDAY outputs
df = pd.read_csv(gday_outfname, skiprows=3, sep=",", skipinitialspace=True)
df['date'] = make_data_index(df)
df = df.set_index('date')
target = df[var][0:4000].values
# BUILD MODELS
# hold back 40% of the dataset for testing
#X_train, X_test, Y_train, Y_test = \
# cross_validation.train_test_split(met_data, target, \
# test_size=0.4, random_state=0)
param_KNR = { "n_neighbors": [20], "weights": ['distance'] }
#regmod = DecisionTreeRegressor()
#regmod = RandomForestRegressor()
#regmod = SVR()
regmod = KNeighborsRegressor()
pipeit3 = lambda model: make_pipeline(StandardScaler(), PCA(), model)
pipeit2 = lambda model: make_pipeline(StandardScaler(), model)
regmod_p = pipeit2(regmod)
modlab = regmod_p.steps[-1][0]
par_grid = {'{0}__{1}'.format(modlab, parkey): pardat \
for (parkey, pardat) in param_KNR.iteritems()}
#emulator = GridSearchCV(regmod, param_grid=param_DTR, cv=5)
emulator = GridSearchCV(regmod_p, param_grid=par_grid, cv=5)
#emulator.fit(X_train, Y_train)
emulator.fit(met_data_train, target)
predict = emulator.predict(met_data)
df = pd.DataFrame({'DT': df.index, 'emu': predict, 'gday': df[var]})
plt.plot_date(df.index[4000:4383], df['emu'][4000:4383], 'o',
label='Emulator')
plt.plot_date(df.index[4000:4383], df['gday'][4000:4383], 'o',
label='GDAY')
plt.ylabel('GPP (g C m$^{-2}$ s$^{-1}$)')
plt.legend()
plt.show()
开发者ID:mdekauwe,项目名称:gday_emulator,代码行数:60,代码来源:emulator.py
示例2: test_pipeline_ducktyping
def test_pipeline_ducktyping():
pipeline = make_pipeline(Mult(5))
pipeline.predict
pipeline.transform
pipeline.inverse_transform
pipeline = make_pipeline(Transf())
assert not hasattr(pipeline, 'predict')
pipeline.transform
pipeline.inverse_transform
pipeline = make_pipeline('passthrough')
assert pipeline.steps[0] == ('passthrough', 'passthrough')
assert not hasattr(pipeline, 'predict')
pipeline.transform
pipeline.inverse_transform
pipeline = make_pipeline(Transf(), NoInvTransf())
assert not hasattr(pipeline, 'predict')
pipeline.transform
assert not hasattr(pipeline, 'inverse_transform')
pipeline = make_pipeline(NoInvTransf(), Transf())
assert not hasattr(pipeline, 'predict')
pipeline.transform
assert not hasattr(pipeline, 'inverse_transform')
开发者ID:allefpablo,项目名称:scikit-learn,代码行数:26,代码来源:test_pipeline.py
示例3: preprocess
def preprocess(self,any_set,is_train):
if is_train:
dico_pattern={'match_lowercase_only':'\\b[a-z]+\\b',
'match_word':'\\w{2,}',
'match_word1': '(?u)\\b\\w+\\b',
'match_word_punct': '\w+|[,.?!;]',
'match_NNP': '\\b[A-Z][a-z]+\\b|\\b[A-Z]+\\b',
'match_punct': "[,.?!;'-]"
}
tfv_title = TfidfVectorizer(lowercase=True, stop_words='english', token_pattern=dico_pattern["match_word1"],
ngram_range=(1, 2), max_df=1.0, min_df=2, max_features=None,
vocabulary=None, binary=True, norm=u'l2',
use_idf=True, smooth_idf=True, sublinear_tf=True)
tfv_desc = TfidfVectorizer(lowercase=True, stop_words='english', token_pattern=dico_pattern["match_word1"],
ngram_range=(1, 2), max_df=1.0, min_df=2, max_features=None,
vocabulary=None, binary=True, norm=u'l2',
use_idf=True, smooth_idf=True, sublinear_tf=True)
title_pipe = make_pipeline(ColumnSelector(key='title'), tfv_title)
desc_pipe = make_pipeline(ColumnSelector(key='description'), tfv_desc)
self.pipeline = make_union(title_pipe, desc_pipe)
return self.pipeline.fit_transform(any_set)
else:
return self.pipeline.transform(any_set)
开发者ID:Cadene,项目名称:DataScienceGame,代码行数:28,代码来源:Predictor.py
示例4: __init__
def __init__(self, **config):
# Validate options are present
for option in _configuration_options:
if option not in config:
raise ValueError("Missing configuration "
"option {!r}".format(option))
# Feature extraction
sparse_features = parse_features(config["sparse_features"])
densifier = make_pipeline(Vectorizer(sparse_features, sparse=True),
ClassifierAsFeature())
dense_features = parse_features(config["dense_features"])
vectorization = make_union(densifier,
Vectorizer(dense_features, sparse=False))
# Classifier
try:
classifier = _valid_classifiers[config["classifier"]]
except KeyError:
raise ValueError("Unknown classification algorithm "
"{!r}".format(config["classifier"]))
classifier = classifier(**config["classifier_args"])
self.pipeline = make_pipeline(vectorization, StandardScaler())
self.classifier = classifier
开发者ID:52nlp,项目名称:iepy,代码行数:25,代码来源:relation_extraction_classifier.py
示例5: get_pipeline
def get_pipeline(fsmethods, clfmethod):
"""Returns an instance of a sklearn Pipeline given the parameters
fsmethod1 and fsmethod2 will be joined in a FeatureUnion, then it will joined
in a Pipeline with clfmethod
Parameters
----------
fsmethods: list of estimators
All estimators in a pipeline, must be transformers (i.e. must have a transform method).
clfmethod: classifier
The last estimator may be any type (transformer, classifier, etc.).
Returns
-------
pipe
"""
feat_union = None
if not isinstance(fsmethods, list):
if hasattr(fsmethods, 'transform'):
feat_union = fsmethods
else:
raise ValueError('fsmethods expected to be either a list or a transformer method')
else:
feat_union = make_union(*fsmethods)
if feat_union is None:
pipe = make_pipeline(clfmethod)
else:
pipe = make_pipeline(feat_union, clfmethod)
return pipe
开发者ID:Neurita,项目名称:darwin,代码行数:32,代码来源:sklearn_utils.py
示例6: analysis
def analysis(name, typ, condition=None, query=None, title=None):
"""Wrapper to ensure that we attribute the same function for each type
of analyses: e.g. categorical, regression, circular regression."""
# Define univariate analysis
erf_function = None # Default is fast_mannwhitneyu
# /!\ for categorical analyses, the contrast is min(y) - max(y)
# e.g. target_present==False - target_present==True
if typ == 'categorize':
# estimator is normalization + l2 Logistic Regression
clf = make_pipeline(
StandardScaler(),
force_predict(LogisticRegression(class_weight='balanced'), axis=1))
scorer = scorer_auc
chance = .5
elif typ == 'regress':
# estimator is normalization + l2 Ridge
clf = make_pipeline(StandardScaler(), Ridge())
scorer = scorer_spearman
chance = 0.
elif typ == 'circ_regress':
# estimator is normalization + l2 Logistic Regression on cos and sin
clf = make_pipeline(StandardScaler(), PolarRegression(Ridge()))
scorer = scorer_angle
chance = 0.
# The univariate analysis needs a different scorer
erf_function = scorer_circlin
if condition is None:
condition = name
return dict(name=name, condition=condition, query=query, clf=clf,
scorer=scorer, chance=chance, erf_function=erf_function,
cv=8, typ=typ, title=title, single_trial=True)
开发者ID:kingjr,项目名称:decoding_unconscious_maintenance,代码行数:32,代码来源:conditions.py
示例7: test_pipeline_ducktyping
def test_pipeline_ducktyping():
pipeline = make_pipeline(Mult(5))
pipeline.predict
pipeline.transform
pipeline.inverse_transform
pipeline = make_pipeline(Transf())
assert_false(hasattr(pipeline, 'predict'))
pipeline.transform
pipeline.inverse_transform
pipeline = make_pipeline(None)
assert_false(hasattr(pipeline, 'predict'))
pipeline.transform
pipeline.inverse_transform
pipeline = make_pipeline(Transf(), NoInvTransf())
assert_false(hasattr(pipeline, 'predict'))
pipeline.transform
assert_false(hasattr(pipeline, 'inverse_transform'))
pipeline = make_pipeline(NoInvTransf(), Transf())
assert_false(hasattr(pipeline, 'predict'))
pipeline.transform
assert_false(hasattr(pipeline, 'inverse_transform'))
开发者ID:dsquareindia,项目名称:scikit-learn,代码行数:25,代码来源:test_pipeline.py
示例8: test_bagging_classifier_with_missing_inputs
def test_bagging_classifier_with_missing_inputs():
# Check that BaggingClassifier can accept X with missing/infinite data
X = np.array([
[1, 3, 5],
[2, None, 6],
[2, np.nan, 6],
[2, np.inf, 6],
[2, np.NINF, 6],
])
y = np.array([3, 6, 6, 6, 6])
classifier = DecisionTreeClassifier()
pipeline = make_pipeline(
FunctionTransformer(replace, validate=False),
classifier
)
pipeline.fit(X, y).predict(X)
bagging_classifier = BaggingClassifier(pipeline)
bagging_classifier.fit(X, y)
y_hat = bagging_classifier.predict(X)
assert_equal(y.shape, y_hat.shape)
bagging_classifier.predict_log_proba(X)
bagging_classifier.predict_proba(X)
# Verify that exceptions can be raised by wrapper classifier
classifier = DecisionTreeClassifier()
pipeline = make_pipeline(classifier)
assert_raises(ValueError, pipeline.fit, X, y)
bagging_classifier = BaggingClassifier(pipeline)
assert_raises(ValueError, bagging_classifier.fit, X, y)
开发者ID:daniel-perry,项目名称:scikit-learn,代码行数:29,代码来源:test_bagging.py
示例9: cross_validation_LR
def cross_validation_LR(X,Y, n_folds, C_seq, K_seq, verbose = False):
'''
To classify Y using X, we first use ANOVA to choose K dimensions
in X, where the difference between different Ys are highest, then run
a logistic regression classifier with regularization parameter C on
the K dimensions.
To quantify how well X can classify Y, without specifying training and
testing partition, we do n_folds cross validation.
In each fold, during training, we do an inner loop cross validation to
select C and K that give the best classification accuracy from a given
range; and then we use this to classify the held-out testing data.
Inputs:
X, [n, p], n trials of p dimensional data, used for classification
Y, [n], class labels
n_folds,integer, split the data into n_folds for cross validation
C_seq, a sequence of regularizatioin parameters for logistic
regression classifiers, smaller values specify stronger
regularization.
e.g. C_seq = 10.0** np.arange(-3,1,1)
K_seq, a sequence of integers,
e.g. K_seq = (np.floor(np.arange(0.2,1,0.2)*p)).astype(np.int)
verbose: boolean, if ture, print the best C and K chosen
Output:
averaged classification accuracy of the n_folds
'''
cv0 = StratifiedKFold(Y,n_folds = n_folds)
cv_acc = np.zeros(n_folds)
for i in range(n_folds):
ind_test = cv0.test_folds == i
ind_train = cv0.test_folds != i
tmpX_train = X[ind_train,:]
tmpY_train = Y[ind_train]
tmpX_test = X[ind_test,:]
tmpY_test = Y[ind_test]
# grid search
tmp_cv_score = np.zeros([len(C_seq), len(K_seq)])
for j in range(len(C_seq)):
for k in range(len(K_seq)):
cv1 = StratifiedKFold(tmpY_train,n_folds = n_folds)
anova_filter = SelectKBest(f_regression, k = K_seq[k])
clf = LogisticRegression(C = C_seq[j], penalty = "l2")
anova_clf = make_pipeline(anova_filter, clf)
tmp_cv_score[j,k] = cross_val_score(anova_clf, tmpX_train,
tmpY_train, scoring = "accuracy", cv = cv1).mean()
best_ind = np.argmax(tmp_cv_score.ravel())
best_j, best_k = np.unravel_index(best_ind, tmp_cv_score.shape)
anova_filter = SelectKBest(f_regression, k = K_seq[k])
clf = LogisticRegression(C = C_seq[j], penalty = "l2")
anova_clf = make_pipeline(anova_filter, clf)
tmpY_predict = anova_clf.fit(tmpX_train, tmpY_train).predict(tmpX_test)
if verbose:
print C_seq[best_j],K_seq[best_k]
cv_acc[i] = np.mean(tmpY_test == tmpY_predict)
return np.mean(cv_acc)
开发者ID:YingYang,项目名称:misc_neuro_imaging_analysis_code,代码行数:59,代码来源:decoding.py
示例10: test_generator_ok
def test_generator_ok(self):
pipeline = make_pipeline(FakeGenerator(fakes=['job', 'name', 'address'], nb_sample=20, random_state=40))
result = pipeline.fit_transform(None)
self.assertEqual(result.shape, (20, 3))
pipeline = make_pipeline(FakeGenerator(fakes=['job', 'name', 'address'], nb_sample=20, random_state=40))
result_2 = pipeline.fit_transform(None)
# Testing the seed
assert_frame_equal(result, result_2)
开发者ID:romainx,项目名称:pyranha,代码行数:8,代码来源:test_io.py
示例11: fit
def fit(self, X, y):
# Filthy hack
sids = X[:, -1]
all_pipelines = [make_pipeline(LogisticRegressionCV()).fit(X_s, y_s) for
X_s, y_s in subject_splitter(X[:, :-1], y, sids)]
f_union = make_union(*[FeatureUnionWrapper(p) for p in all_pipelines])
self.clf_ = make_pipeline(f_union, LogisticRegressionCV()).fit(X[:, :-1], y)
return self
开发者ID:kastnerkyle,项目名称:kaggle-decmeg2014,代码行数:8,代码来源:minimal_clf.py
示例12: test_make_pipeline_memory
def test_make_pipeline_memory():
cachedir = mkdtemp()
memory = Memory(cachedir=cachedir)
pipeline = make_pipeline(DummyTransf(), SVC(), memory=memory)
assert_true(pipeline.memory is memory)
pipeline = make_pipeline(DummyTransf(), SVC())
assert_true(pipeline.memory is None)
shutil.rmtree(cachedir)
开发者ID:lebigot,项目名称:scikit-learn,代码行数:9,代码来源:test_pipeline.py
示例13: __init__
def __init__(self):
self.clf1 = [make_pipeline(Imputer(),
GradientBoostingRegressor(n_estimators=5000, max_depth=8)) for _ in range(5)]
self.clf2 = [make_pipeline(Imputer(strategy='median'),
ExtraTreesRegressor(n_estimators=5000, criterion='mse', max_depth=8,
min_samples_split=10, min_samples_leaf=1,
min_weight_fraction_leaf=0.0,
max_features='auto', max_leaf_nodes=None, bootstrap=False,
oob_score=False,
n_jobs=1, random_state=42, verbose=0, warm_start=True)) for _ in range(5)]
self.clf3 = [make_pipeline(Imputer(),
svm.LinearSVR()) for _ in range(5)]
self.clf = [linear_model.LinearRegression() for _ in range(5)]
开发者ID:BenSchannes,项目名称:Epidemium,代码行数:13,代码来源:Regressor_blend_1.py
示例14: test_classes_property
def test_classes_property():
iris = load_iris()
X = iris.data
y = iris.target
reg = make_pipeline(SelectKBest(k=1), LinearRegression())
reg.fit(X, y)
assert_raises(AttributeError, getattr, reg, "classes_")
clf = make_pipeline(SelectKBest(k=1), LogisticRegression(random_state=0))
assert_raises(AttributeError, getattr, clf, "classes_")
clf.fit(X, y)
assert_array_equal(clf.classes_, np.unique(y))
开发者ID:Givonaldo,项目名称:scikit-learn,代码行数:13,代码来源:test_pipeline.py
示例15: test_make_pipeline_memory
def test_make_pipeline_memory():
cachedir = mkdtemp()
if LooseVersion(joblib_version) < LooseVersion('0.12'):
# Deal with change of API in joblib
memory = Memory(cachedir=cachedir, verbose=10)
else:
memory = Memory(location=cachedir, verbose=10)
pipeline = make_pipeline(DummyTransf(), SVC(), memory=memory)
assert_true(pipeline.memory is memory)
pipeline = make_pipeline(DummyTransf(), SVC())
assert_true(pipeline.memory is None)
shutil.rmtree(cachedir)
开发者ID:as133,项目名称:scikit-learn,代码行数:13,代码来源:test_pipeline.py
示例16: test_make_pipeline
def test_make_pipeline():
t1 = Transf()
t2 = Transf()
pipe = make_pipeline(t1, t2)
assert_true(isinstance(pipe, Pipeline))
assert_equal(pipe.steps[0][0], "transf-1")
assert_equal(pipe.steps[1][0], "transf-2")
pipe = make_pipeline(t1, t2, FitParamT())
assert_true(isinstance(pipe, Pipeline))
assert_equal(pipe.steps[0][0], "transf-1")
assert_equal(pipe.steps[1][0], "transf-2")
assert_equal(pipe.steps[2][0], "fitparamt")
开发者ID:dsquareindia,项目名称:scikit-learn,代码行数:13,代码来源:test_pipeline.py
示例17: get_results
def get_results(dataset):
X_full, y_full = dataset.data, dataset.target
n_samples = X_full.shape[0]
n_features = X_full.shape[1]
# Estimate the score on the entire dataset, with no missing values
estimator = RandomForestRegressor(random_state=0, n_estimators=100)
full_scores = cross_val_score(estimator, X_full, y_full,
scoring='neg_mean_squared_error')
# Add missing values in 75% of the lines
missing_rate = 0.75
n_missing_samples = int(np.floor(n_samples * missing_rate))
missing_samples = np.hstack((np.zeros(n_samples - n_missing_samples,
dtype=np.bool),
np.ones(n_missing_samples,
dtype=np.bool)))
rng.shuffle(missing_samples)
missing_features = rng.randint(0, n_features, n_missing_samples)
# Estimate the score after replacing missing values by 0
X_missing = X_full.copy()
X_missing[np.where(missing_samples)[0], missing_features] = 0
y_missing = y_full.copy()
estimator = RandomForestRegressor(random_state=0, n_estimators=100)
zero_impute_scores = cross_val_score(estimator, X_missing, y_missing,
scoring='neg_mean_squared_error')
# Estimate the score after imputation (mean strategy) of the missing values
X_missing = X_full.copy()
X_missing[np.where(missing_samples)[0], missing_features] = 0
y_missing = y_full.copy()
estimator = make_pipeline(
make_union(SimpleImputer(missing_values=0, strategy="mean"),
MissingIndicator(missing_values=0)),
RandomForestRegressor(random_state=0, n_estimators=100))
mean_impute_scores = cross_val_score(estimator, X_missing, y_missing,
scoring='neg_mean_squared_error')
# Estimate the score after chained imputation of the missing values
estimator = make_pipeline(
make_union(ChainedImputer(missing_values=0, random_state=0),
MissingIndicator(missing_values=0)),
RandomForestRegressor(random_state=0, n_estimators=100))
chained_impute_scores = cross_val_score(estimator, X_missing, y_missing,
scoring='neg_mean_squared_error')
return ((full_scores.mean(), full_scores.std()),
(zero_impute_scores.mean(), zero_impute_scores.std()),
(mean_impute_scores.mean(), mean_impute_scores.std()),
(chained_impute_scores.mean(), chained_impute_scores.std()))
开发者ID:lebigot,项目名称:scikit-learn,代码行数:51,代码来源:plot_missing_values.py
示例18: build_text_extraction
def build_text_extraction(binary, min_df, ngram, stopwords,useTfIdf ):
if useTfIdf:
return make_pipeline(TfidfVectorizer(min_df=min_df,
max_df = 0.8,
sublinear_tf=True,
use_idf=True,
ngram_range=(1,3)), ClassifierOvOAsFeatures())
return make_pipeline(CountVectorizer(binary=binary,
tokenizer=lambda x: x.split(),
min_df=min_df,
ngram_range=(1, ngram),
stop_words=stopwords),
ClassifierOvOAsFeatures())
开发者ID:EspenAlbert,项目名称:sentimentAnalysisMovieReviews,代码行数:14,代码来源:predictor.py
示例19: out_fold_pred
def out_fold_pred(params, X, y_array, y_ix, reps):
y = y_array[:, y_ix]
# cross validation here
preds = np.zeros((y_array.shape[0]))
clf = make_pipeline(StandardScaler(), LogisticRegression(**params))
for train_ix, test_ix in makeKFold(5, y, reps):
X_train, y_train = X[train_ix, :], y[train_ix]
X_test = X[test_ix, :]
clf = make_pipeline(StandardScaler(), LogisticRegression(**params))
clf.fit(X_train, y_train)
pred = clf.predict_proba(X_test)[:, 1]
preds[test_ix] = pred
return preds
开发者ID:jingxiang-li,项目名称:kaggle-yelp,代码行数:15,代码来源:glm_l1.py
示例20: build_synset_extraction
def build_synset_extraction(binary, min_df, ngram, useTfIdf):
if useTfIdf:
return make_pipeline(MapToSynsets(),
TfidfVectorizer(min_df=min_df,
max_df = 0.8,
sublinear_tf=True,
use_idf=True,
ngram_range=(1,3)),
ClassifierOvOAsFeatures())
return make_pipeline(MapToSynsets(),
CountVectorizer(binary=binary,
tokenizer=lambda x: x.split(),
min_df=min_df,
ngram_range=(1, ngram)),
ClassifierOvOAsFeatures())
开发者ID:EspenAlbert,项目名称:sentimentAnalysisMovieReviews,代码行数:15,代码来源:predictor.py
注:本文中的sklearn.pipeline.make_pipeline函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论