本文整理汇总了Python中sklearn.pipeline.make_union函数的典型用法代码示例。如果您正苦于以下问题:Python make_union函数的具体用法?Python make_union怎么用?Python make_union使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了make_union函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_make_union_kwargs
def test_make_union_kwargs():
pca = PCA(svd_solver='full')
mock = Transf()
fu = make_union(pca, mock, n_jobs=3)
assert_equal(fu.transformer_list, make_union(pca, mock).transformer_list)
assert_equal(3, fu.n_jobs)
# invalid keyword parameters should raise an error message
assert_raise_message(
TypeError,
'Unknown keyword arguments: "transformer_weights"',
make_union, pca, mock, transformer_weights={'pca': 10, 'Transf': 1}
)
开发者ID:lebigot,项目名称:scikit-learn,代码行数:12,代码来源:test_pipeline.py
示例2: get_results
def get_results(dataset):
X_full, y_full = dataset.data, dataset.target
n_samples = X_full.shape[0]
n_features = X_full.shape[1]
# Estimate the score on the entire dataset, with no missing values
estimator = RandomForestRegressor(random_state=0, n_estimators=100)
full_scores = cross_val_score(estimator, X_full, y_full,
scoring='neg_mean_squared_error')
# Add missing values in 75% of the lines
missing_rate = 0.75
n_missing_samples = int(np.floor(n_samples * missing_rate))
missing_samples = np.hstack((np.zeros(n_samples - n_missing_samples,
dtype=np.bool),
np.ones(n_missing_samples,
dtype=np.bool)))
rng.shuffle(missing_samples)
missing_features = rng.randint(0, n_features, n_missing_samples)
# Estimate the score after replacing missing values by 0
X_missing = X_full.copy()
X_missing[np.where(missing_samples)[0], missing_features] = 0
y_missing = y_full.copy()
estimator = RandomForestRegressor(random_state=0, n_estimators=100)
zero_impute_scores = cross_val_score(estimator, X_missing, y_missing,
scoring='neg_mean_squared_error')
# Estimate the score after imputation (mean strategy) of the missing values
X_missing = X_full.copy()
X_missing[np.where(missing_samples)[0], missing_features] = 0
y_missing = y_full.copy()
estimator = make_pipeline(
make_union(SimpleImputer(missing_values=0, strategy="mean"),
MissingIndicator(missing_values=0)),
RandomForestRegressor(random_state=0, n_estimators=100))
mean_impute_scores = cross_val_score(estimator, X_missing, y_missing,
scoring='neg_mean_squared_error')
# Estimate the score after chained imputation of the missing values
estimator = make_pipeline(
make_union(ChainedImputer(missing_values=0, random_state=0),
MissingIndicator(missing_values=0)),
RandomForestRegressor(random_state=0, n_estimators=100))
chained_impute_scores = cross_val_score(estimator, X_missing, y_missing,
scoring='neg_mean_squared_error')
return ((full_scores.mean(), full_scores.std()),
(zero_impute_scores.mean(), zero_impute_scores.std()),
(mean_impute_scores.mean(), mean_impute_scores.std()),
(chained_impute_scores.mean(), chained_impute_scores.std()))
开发者ID:lebigot,项目名称:scikit-learn,代码行数:51,代码来源:plot_missing_values.py
示例3: __init__
def __init__(self, training_values=None, training_targets=None):
self.vectorizer = make_union(TfidfVectorizer(), PostTransformer())
# Set using parameter_search. TODO: review after updating
# corpus.
self.classifier = svm.LinearSVC(C=1, loss='squared_hinge', multi_class='ovr', class_weight='balanced', tol=1e-6)
if training_values is not None and training_targets is not None:
self.fit(training_values, training_targets)
开发者ID:franciscocorrales,项目名称:LearnProgrammingBot,代码行数:7,代码来源:main.py
示例4: PipelineTelstra
def PipelineTelstra(Classifier):
pipeline = make_pipeline(
make_union(
make_pipeline(
DataSpliterTrans(cols='location',transp=True),
preprocessing.OneHotEncoder(handle_unknown='ignore')
),
make_pipeline(
DataSpliterTrans(cols='event_type',matrix=True),
DictVectorizer()
),
make_pipeline(
DataSpliterTrans(cols='severity_type',matrix=True),
DictVectorizer()
),
make_pipeline(
DataSpliterTrans(cols='resource_type',matrix=True),
DictVectorizer()
),
make_pipeline(
DataSpliterTrans(cols='volume',matrix=True),
DictVectorizer()
),
make_pipeline(
DataSpliterTrans(cols='log_feature',matrix=True),
DictVectorizer()
)
),
Classifier()
)
print('pipeline done.')
return pipeline
开发者ID:diazcelsa,项目名称:kaggle,代码行数:32,代码来源:data_modifier.py
示例5: __init__
def __init__(self, transforms):
self.transforms = transforms
union = make_union(*[t() for t in transforms])
pipeline = [union]
self.pipeline = make_pipeline(*pipeline)
self.classifier = LogisticRegression(penalty="l1", class_weight="auto")
开发者ID:willferreira,项目名称:mscproject,代码行数:7,代码来源:lr_predictors.py
示例6: preprocess
def preprocess(self,any_set,is_train):
if is_train:
dico_pattern={'match_lowercase_only':'\\b[a-z]+\\b',
'match_word':'\\w{2,}',
'match_word1': '(?u)\\b\\w+\\b',
'match_word_punct': '\w+|[,.?!;]',
'match_NNP': '\\b[A-Z][a-z]+\\b|\\b[A-Z]+\\b',
'match_punct': "[,.?!;'-]"
}
tfv_title = TfidfVectorizer(lowercase=True, stop_words='english', token_pattern=dico_pattern["match_word1"],
ngram_range=(1, 2), max_df=1.0, min_df=2, max_features=None,
vocabulary=None, binary=True, norm=u'l2',
use_idf=True, smooth_idf=True, sublinear_tf=True)
tfv_desc = TfidfVectorizer(lowercase=True, stop_words='english', token_pattern=dico_pattern["match_word1"],
ngram_range=(1, 2), max_df=1.0, min_df=2, max_features=None,
vocabulary=None, binary=True, norm=u'l2',
use_idf=True, smooth_idf=True, sublinear_tf=True)
title_pipe = make_pipeline(ColumnSelector(key='title'), tfv_title)
desc_pipe = make_pipeline(ColumnSelector(key='description'), tfv_desc)
self.pipeline = make_union(title_pipe, desc_pipe)
return self.pipeline.fit_transform(any_set)
else:
return self.pipeline.transform(any_set)
开发者ID:Cadene,项目名称:DataScienceGame,代码行数:28,代码来源:Predictor.py
示例7: test_make_union
def test_make_union():
pca = PCA()
mock = TransfT()
fu = make_union(pca, mock)
names, transformers = zip(*fu.transformer_list)
assert_equal(names, ("pca", "transft"))
assert_equal(transformers, (pca, mock))
开发者ID:Givonaldo,项目名称:scikit-learn,代码行数:7,代码来源:test_pipeline.py
示例8: get_extra_features
def get_extra_features(args):
forest = ExtraTreesClassifier(n_estimators=2000,
criterion='entropy',
max_features='sqrt',
max_depth=6,
min_samples_split=8,
n_jobs=-1,
bootstrap=True,
oob_score=True,
verbose=1,
class_weight='balanced')
pca = PCA(n_components=200)
ica = FastICA(n_components=200, max_iter=1000)
kmeans = KMeans(n_clusters=200, n_init=20, max_iter=1000)
pipeline = make_pipeline(selectKFromModel(forest, k=1000),
StandardScaler(),
make_union(pca, ica, kmeans))
X_train = np.load('feature/1_100/X_train.npy')
y_train = np.load('feature/1_100/y_train.npy')
X_test = np.load('feature/1_100/X_test.npy')
pipeline.fit(X_train, y_train[:, args.yix])
sel_ixs = pipeline.steps[0][1].indices[:500]
X_train_ext = np.hstack((pipeline.transform(X_train), X_train[:, sel_ixs]))
X_test_ext = np.hstack((pipeline.transform(X_test), X_test[:, sel_ixs]))
with open(path.join(save_dir, 'pipe.pkl'), 'wb') as f_pipe:
pickle.dump(pipeline, f_pipe)
np.save(path.join(save_dir, 'selix.npy'), sel_ixs)
return X_train_ext, X_test_ext
开发者ID:jingxiang-li,项目名称:kaggle-yelp,代码行数:33,代码来源:feature_selection.py
示例9: test_make_union
def test_make_union():
pca = PCA(svd_solver='full')
mock = Transf()
fu = make_union(pca, mock)
names, transformers = zip(*fu.transformer_list)
assert_equal(names, ("pca", "transf"))
assert_equal(transformers, (pca, mock))
开发者ID:dsquareindia,项目名称:scikit-learn,代码行数:7,代码来源:test_pipeline.py
示例10: get_pipeline
def get_pipeline(fsmethods, clfmethod):
"""Returns an instance of a sklearn Pipeline given the parameters
fsmethod1 and fsmethod2 will be joined in a FeatureUnion, then it will joined
in a Pipeline with clfmethod
Parameters
----------
fsmethods: list of estimators
All estimators in a pipeline, must be transformers (i.e. must have a transform method).
clfmethod: classifier
The last estimator may be any type (transformer, classifier, etc.).
Returns
-------
pipe
"""
feat_union = None
if not isinstance(fsmethods, list):
if hasattr(fsmethods, 'transform'):
feat_union = fsmethods
else:
raise ValueError('fsmethods expected to be either a list or a transformer method')
else:
feat_union = make_union(*fsmethods)
if feat_union is None:
pipe = make_pipeline(clfmethod)
else:
pipe = make_pipeline(feat_union, clfmethod)
return pipe
开发者ID:Neurita,项目名称:darwin,代码行数:32,代码来源:sklearn_utils.py
示例11: test_missing_indicator_with_imputer
def test_missing_indicator_with_imputer(X, missing_values, X_trans_exp):
trans = make_union(
SimpleImputer(missing_values=missing_values, strategy='most_frequent'),
MissingIndicator(missing_values=missing_values)
)
X_trans = trans.fit_transform(X)
assert_array_equal(X_trans, X_trans_exp)
开发者ID:psorianom,项目名称:scikit-learn,代码行数:7,代码来源:test_impute.py
示例12: __init__
def __init__(self, classifier="sgd", classifier_args=None, lowercase=True,
text_replacements=None, map_to_synsets=False, binary=False,
min_df=0, ngram=1, stopwords=None, limit_train=None,
map_to_lex=False, duplicates=False):
self.limit_train = limit_train
self.duplicates = duplicates
pipeline = [ExtractText(lowercase)]
if text_replacements:
pipeline.append(ReplaceText(text_replacements))
ext = [build_text_extraction(binary=binary, min_df=min_df,
ngram=ngram, stopwords=stopwords)]
if map_to_synsets:
ext.append(build_synset_extraction(binary=binary, min_df=min_df,
ngram=ngram))
if map_to_lex:
ext.append(build_lex_extraction(binary=binary, min_df=min_df,
ngram=ngram))
ext = make_union(*ext)
pipeline.append(ext)
#Building classifier
if classifier_args is None:
classifier_args={}
classifier = _valid_classifiers[classifier](**classifier_args)
self.pipeline = make_pipeline(*pipeline)
self.classifier = classifier
开发者ID:jthang,项目名称:KaggleLab,代码行数:29,代码来源:predictor.py
示例13: pca_kpca
def pca_kpca(train_data, labels):
estimators = make_union(PCA(), TruncatedSVD(), KernelPCA())
# estimators = [('linear_pca', PCA()), ('kernel_pca', KernelPCA())]
combined = FeatureUnion(estimators)
combined.fit(train_data, labels) # combined.fit_tranform(tain_data, labels)
return combined
开发者ID:kirk86,项目名称:Task-1,代码行数:7,代码来源:misc.py
示例14: __init__
def __init__(self, **config):
# Validate options are present
for option in _configuration_options:
if option not in config:
raise ValueError("Missing configuration "
"option {!r}".format(option))
# Feature extraction
sparse_features = parse_features(config["sparse_features"])
densifier = make_pipeline(Vectorizer(sparse_features, sparse=True),
ClassifierAsFeature())
dense_features = parse_features(config["dense_features"])
vectorization = make_union(densifier,
Vectorizer(dense_features, sparse=False))
# Classifier
try:
classifier = _valid_classifiers[config["classifier"]]
except KeyError:
raise ValueError("Unknown classification algorithm "
"{!r}".format(config["classifier"]))
classifier = classifier(**config["classifier_args"])
self.pipeline = make_pipeline(vectorization, StandardScaler())
self.classifier = classifier
开发者ID:52nlp,项目名称:iepy,代码行数:25,代码来源:relation_extraction_classifier.py
示例15: fit
def fit(self, X, y):
# Filthy hack
sids = X[:, -1]
all_pipelines = [make_pipeline(LogisticRegressionCV()).fit(X_s, y_s) for
X_s, y_s in subject_splitter(X[:, :-1], y, sids)]
f_union = make_union(*[FeatureUnionWrapper(p) for p in all_pipelines])
self.clf_ = make_pipeline(f_union, LogisticRegressionCV()).fit(X[:, :-1], y)
return self
开发者ID:kastnerkyle,项目名称:kaggle-decmeg2014,代码行数:8,代码来源:minimal_clf.py
示例16: get_scores_for_imputer
def get_scores_for_imputer(imputer, X_missing, y_missing):
estimator = make_pipeline(
make_union(imputer, MissingIndicator(missing_values=0)),
REGRESSOR)
impute_scores = cross_val_score(estimator, X_missing, y_missing,
scoring='neg_mean_squared_error',
cv=N_SPLITS)
return impute_scores
开发者ID:allefpablo,项目名称:scikit-learn,代码行数:8,代码来源:plot_missing_values.py
示例17: make_pipe
def make_pipe(classifier):
language_featurizer = make_union(CountVectorizer(),
FunctionFeaturizer(longest_run_of_capital_letters_feature,
percent_character_feature,
percent_character_combinations,
longest_run_of_character_feature,
character_combinations_binary
))
return make_pipeline(language_featurizer, classifier)
开发者ID:jdhiggins,项目名称:programming-language-classifier,代码行数:9,代码来源:feature_vectorizer.py
示例18: _create_feature_union
def _create_feature_union(features):
"""
Create a FeatureUnion.
Each "feature" is a 3-tuple: (name, feature_extractor, vectorizer).
"""
return make_union(*[
make_pipeline(fe, vec)
for name, fe, vec in features
])
开发者ID:rolando-contribute,项目名称:TeamHG-Memex-Formasaurus,代码行数:9,代码来源:model.py
示例19: __init__
def __init__(self, transforms, n_estimators=2000, criterion='gini', min_samples_leaf=2, n_jobs=-1):
self.transforms = transforms
self.n_estimators = n_estimators
self.criterion = criterion
self.min_samples_leaf = min_samples_leaf
self.n_jobs = n_jobs
union = make_union(*[t() for t in transforms])
pipeline = [union]
self.pipeline = make_pipeline(*pipeline)
self.classifier = RandomForestClassifier(n_estimators, criterion, min_samples_leaf=min_samples_leaf, n_jobs=-1)
开发者ID:paris5020,项目名称:athene_system,代码行数:12,代码来源:rf_predictors.py
示例20: create_input_transformer
def create_input_transformer(fields, vec_name):
"""Create a pipeline of input transformations, allowing to use scaling of input fields."""
pipeline = []
for field in fields:
field_name = field['name']
field_scale = field['scale']
field_type = processed_db.get_field_type(field_name)
pipeline.append(
make_pipeline(ItemSelector(field_name), # select the correct column
Vectorizer(vec_name, field_type), # vectorize (depending on str/numeric input)
Scaler(field_scale)) # scale column based on user input
)
return make_union(*pipeline)
开发者ID:Lilykos,项目名称:clusterix,代码行数:15,代码来源:utils.py
注:本文中的sklearn.pipeline.make_union函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论