本文整理汇总了Python中sklearn.metrics.make_scorer函数的典型用法代码示例。如果您正苦于以下问题:Python make_scorer函数的具体用法?Python make_scorer怎么用?Python make_scorer使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了make_scorer函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: training
def training(matrix, Y, SVM):
""" def training(matrix , Y , svm ):
matrix: is the train data
Y: is the labels in array
svm: is a boolean. If svm == True we perform svm otherwise we perform AdaBoostClassifier
return: cross_validation scores
"""
if SVM:
classifier = svm.SVC()
else:
classifier = AdaBoostClassifier(n_estimators=300)
precision_micro_scorer = metrics.make_scorer(custom_precision_micro_score)
precision_macro_scorer = metrics.make_scorer(custom_precision_macro_score)
recall_micro_scorer = metrics.make_scorer(custom_recall_micro_score)
recall_macro_scorer = metrics.make_scorer(custom_recall_macro_score)
precision_micro = cross_val_score(classifier, matrix, Y, cv=10, scoring=precision_micro_scorer)
precision_macro = cross_val_score(classifier, matrix, Y, cv=10, scoring=precision_macro_scorer)
recall_micro = cross_val_score(classifier, matrix, Y, cv=10, scoring=recall_micro_scorer)
recall_macro = cross_val_score(classifier, matrix, Y, cv=10, scoring=recall_macro_scorer)
return {"micro": (precision_micro, recall_micro), "macro": (precision_macro, recall_macro)}
开发者ID:Ethiy,项目名称:ALTEGRAD,代码行数:25,代码来源:training.py
示例2: __init__
def __init__(self, n_features=None, tuning_ranges=None, models=None, cv=None, njobs=1, pre_dispatch='2*n_jobs',
stack=True, verbose=False, metric='lad'):
if metric.lower() not in ['lad', 'mse']:
raise ValueError('Metric must be either lad or mse.')
if tuning_ranges is None:
try:
n_features is not None
except ValueError:
'Must supply one of n_features or tuning_ranges.'
# use default values for grid search over tuning parameters for all models
tuning_ranges = {'DecisionTreeClassifier': {'max_depth': [5, 10, 20, 50, None]},
'RandomForestRegressor': {'max_features':
list(np.unique(np.linspace(2, n_features, 5).astype(np.int)))},
'GbrAutoNtrees': {'max_depth': [1, 2, 3, 5, 10]}}
if models is None:
# initialize the list of sklearn objects corresponding to different statistical models
models = []
if 'DecisionTreeRegressor' in tuning_ranges:
models.append(DecisionTreeRegressor())
if 'RandomForestRegressor' in tuning_ranges:
models.append(RandomForestRegressor(n_estimators=500, oob_score=True, n_jobs=njobs))
if 'GbrAutoNtrees' in tuning_ranges:
models.append(GbrAutoNtrees(subsample=0.75, n_estimators=500, learning_rate=0.01))
super(RegressionSuite, self).__init__(tuning_ranges, models, cv, njobs, pre_dispatch, stack, verbose)
self.scorer = make_scorer(accuracy_score)
self.nfeatures = n_features
self.metric = metric.lower()
if self.metric == 'lad':
self.scorer = make_scorer(mean_absolute_error, greater_is_better=False)
elif self.metric == 'mse':
self.scorer = make_scorer(mean_squared_error, greater_is_better=False)
开发者ID:albertfxwang,项目名称:bck_stats,代码行数:34,代码来源:sklearn_estimator_suite.py
示例3: _make_scoring_r0
def _make_scoring_r0( scoring):
if scoring == 'r2':
return metrics.make_scorer( metrics.r2_score)
elif scoring == 'mean_absolute_error':
return metrics.make_scorer( metrics.mean_absolute_error, greater_is_better=False)
elif scoring == 'mean_squared_error':
return metrics.make_scorer( metrics.mean_squared_error, greater_is_better=False)
elif scoring == 'median_absolute_error':
return metrics.make_scorer( metrics.median_absolute_error, greater_is_better=False)
else:
raise ValueError("Not supported scoring")
开发者ID:jskDr,项目名称:jamespy_py3,代码行数:11,代码来源:_jgrid_r0.py
示例4: _test_ridge_loo
def _test_ridge_loo(filter_):
# test that can work with both dense or sparse matrices
n_samples = X_diabetes.shape[0]
ret = []
fit_intercept = filter_ == DENSE_FILTER
ridge_gcv = _RidgeGCV(fit_intercept=fit_intercept)
# check best alpha
ridge_gcv.fit(filter_(X_diabetes), y_diabetes)
alpha_ = ridge_gcv.alpha_
ret.append(alpha_)
# check that we get same best alpha with custom loss_func
f = ignore_warnings
scoring = make_scorer(mean_squared_error, greater_is_better=False)
ridge_gcv2 = RidgeCV(fit_intercept=False, scoring=scoring)
f(ridge_gcv2.fit)(filter_(X_diabetes), y_diabetes)
assert ridge_gcv2.alpha_ == pytest.approx(alpha_)
# check that we get same best alpha with custom score_func
func = lambda x, y: -mean_squared_error(x, y)
scoring = make_scorer(func)
ridge_gcv3 = RidgeCV(fit_intercept=False, scoring=scoring)
f(ridge_gcv3.fit)(filter_(X_diabetes), y_diabetes)
assert ridge_gcv3.alpha_ == pytest.approx(alpha_)
# check that we get same best alpha with a scorer
scorer = get_scorer('neg_mean_squared_error')
ridge_gcv4 = RidgeCV(fit_intercept=False, scoring=scorer)
ridge_gcv4.fit(filter_(X_diabetes), y_diabetes)
assert ridge_gcv4.alpha_ == pytest.approx(alpha_)
# check that we get same best alpha with sample weights
if filter_ == DENSE_FILTER:
ridge_gcv.fit(filter_(X_diabetes), y_diabetes,
sample_weight=np.ones(n_samples))
assert ridge_gcv.alpha_ == pytest.approx(alpha_)
# simulate several responses
Y = np.vstack((y_diabetes, y_diabetes)).T
ridge_gcv.fit(filter_(X_diabetes), Y)
Y_pred = ridge_gcv.predict(filter_(X_diabetes))
ridge_gcv.fit(filter_(X_diabetes), y_diabetes)
y_pred = ridge_gcv.predict(filter_(X_diabetes))
assert_allclose(np.vstack((y_pred, y_pred)).T,
Y_pred, rtol=1e-5)
return ret
开发者ID:manhhomienbienthuy,项目名称:scikit-learn,代码行数:52,代码来源:test_ridge.py
示例5: test_cross_val_score_multilabel
def test_cross_val_score_multilabel():
X = np.array([[-3, 4], [2, 4], [3, 3], [0, 2], [-3, 1], [-2, 1], [0, 0], [-2, -1], [-1, -2], [1, -2]])
y = np.array([[1, 1], [0, 1], [0, 1], [0, 1], [1, 1], [0, 1], [1, 0], [1, 1], [1, 0], [0, 0]])
clf = KNeighborsClassifier(n_neighbors=1)
scoring_micro = make_scorer(precision_score, average="micro")
scoring_macro = make_scorer(precision_score, average="macro")
scoring_samples = make_scorer(precision_score, average="samples")
score_micro = cval.cross_val_score(clf, X, y, scoring=scoring_micro, cv=5)
score_macro = cval.cross_val_score(clf, X, y, scoring=scoring_macro, cv=5)
score_samples = cval.cross_val_score(clf, X, y, scoring=scoring_samples, cv=5)
assert_almost_equal(score_micro, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 3])
assert_almost_equal(score_macro, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 4])
assert_almost_equal(score_samples, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 4])
开发者ID:mrterry,项目名称:scikit-learn,代码行数:13,代码来源:test_cross_validation.py
示例6: make_scoring
def make_scoring(scoring):
"""
Score is reversed if greater_is_better is False.
"""
if scoring == 'r2':
return metrics.make_scorer(metrics.r2_score)
elif scoring == 'mean_absolute_error':
return metrics.make_scorer(metrics.mean_absolute_error, greater_is_better=False)
elif scoring == 'mean_squared_error':
return metrics.make_scorer(metrics.mean_squared_error, greater_is_better=False)
elif scoring == 'median_absolute_error':
return metrics.make_scorer(metrics.median_absolute_error, greater_is_better=False)
else:
raise ValueError("Not supported scoring")
开发者ID:jskDr,项目名称:jamespy_py3,代码行数:14,代码来源:jgrid.py
示例7: fit_predict_model
def fit_predict_model(city_data):
"""Find and tune the optimal model. Make a prediction on housing data."""
# Get the features and labels from the Boston housing data
# and shuffle them randomly
X, y = shuffle(city_data.data, city_data.target)
# Setup a Decision Tree Regressor
regressor = DecisionTreeRegressor()
parameters = {"max_depth": (1, 2, 3, 4, 5, 6, 7, 8, 9, 10)}
# 1. Find the best performance metric
# should be the same as your performance_metric procedure
# http://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html
scorer = make_scorer(METRIC, greater_is_better=False)
# 2. Use gridearch to fine tune the Decision Tree Regressor and find the best model
# http://scikit-learn.org/stable/modules/generated/sklearn.grid_search.GridSearchCV.html#sklearn.grid_search.GridSearchCV
reg = grid_search.GridSearchCV(regressor, parameters, scoring=scorer, cv=10)
# Fit the learner to the training data
print "Final Model: "
print reg.fit(X, y)
print "Best model parameter: " + str(reg.best_params_)
print "Best model estimator: " + str(reg.best_estimator_)
# Use the model to predict the output of a particular sample
x = [11.95, 0.00, 18.100, 0, 0.6590, 5.6090, 90.00, 1.385, 24, 680.0, 20.20, 332.09, 12.13]
y = reg.predict(x)
print "House: " + str(x)
print "Prediction: " + str(y)
return (reg.best_params_["max_depth"], y)
开发者ID:ancheremukhin,项目名称:boston_housing,代码行数:34,代码来源:boston_housing.py
示例8: test
def test():
x_train,y_train=load_svmlight_file("D:/traindata/12trainset")
x_train.todense()
x_test,y_test=load_svmlight_file("D:/traindata/12testset")
x_test.todense()
print(x_train.shape)
#classifier
clf=SVC(kernel='rbf')
ovrclf=OneVsRestClassifier(clf,-1)
#parameter
parameters=[{'estimator__C':[2**-5,2**-4,2**-3,2**-2,2**-1,1,2**1,2**2,2**3,2**4,2**5],
'estimator__kernel':['rbf'],
'estimator__gamma':[2**-5,2**-4,2**-3,2**-2,2**-1,1,2**1,2**2,2**3,2**4,2**5]},
{'estimator__C':[2**-5,2**-4,2**-3,2**-2,2**-1,1,2**1,2**2,2**3,2**4,2**5],
'estimator__kernel':['linear']}]
para={'estimator__C':[2**-5,2**-4],
'estimator__kernel':['rbf'],
'estimator__gamma':[2**-1,1]}
#scoring
sougou_score=make_scorer(score_func,greater_is_better=False)
#cross_validation iterator
sfk=c_v.StratifiedKFold(y_train,shuffle=True,n_folds=5,random_state=0)
#grid search
gsclf=g_s.GridSearchCV(ovrclf,param_grid=para,cv=sfk,scoring=sougou_score)
gsclf.fit(x_train,y_train)
print("best score: ",gsclf.best_score_)
print("best parameters: ",gsclf.best_params_)
y_pred=gsclf.predict(x_test)
#result
target_names=['0','1','2','3']
sum_y = np.sum((np.array(y_pred)-np.array(y_test))**2)
print(classification_report(y_test,y_pred,target_names=target_names))
print("sougouVal: ",float(sum_y)/y_pred.shape[0])
print(time.time()-start_time)
开发者ID:lkprof,项目名称:sema,代码行数:35,代码来源:svm.py
示例9: fit_predict_model
def fit_predict_model(city_data):
"""Find and tune the optimal model. Make a prediction on housing data."""
# Get the features and labels from the Boston housing data
X, y = city_data.data, city_data.target
# Setup a Decision Tree Regressor
regressor = DecisionTreeRegressor()
parameters = {"max_depth": (1, 2, 3, 4, 5, 6, 7, 8, 9, 10)}
###################################
### Step 4. YOUR CODE GOES HERE ###
###################################
# 1. Find the best performance metric
# should be the same as your performance_metric procedure
# http://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html
regScorer = make_scorer(performance_metric, greater_is_better=False)
# 2. Use gridearch to fine tune the Decision Tree Regressor and find the best model
# http://scikit-learn.org/stable/modules/generated/sklearn.grid_search.GridSearchCV.html#sklearn.grid_search.GridSearchCV
reg = GridSearchCV(regressor, parameters, scoring=regScorer)
# Fit the learner to the training data
print "Final Model: "
print reg.fit(X, y)
print "Optimal parameter: " + ` reg.best_params_ `
print "Best Score: " + ` reg.best_score_ `
print reg.grid_scores_
x = [11.95, 0.00, 18.100, 0, 0.6590, 5.6090, 90.00, 1.385, 24, 680.0, 20.20, 332.09, 12.13]
z = reg.predict(x)
print "House: " + str(x)
print "Prediction: " + str(z)
开发者ID:bblalock,项目名称:Udacity-Machine-Learning-Engineer-Projects,代码行数:35,代码来源:boston_housing.py
示例10: main
def main(argv):
pd.set_option("display.width", 200)
pd.set_option("display.height", 500)
warnings.filterwarnings("ignore")
global file_path, gini_scorer
# Normalized Gini Scorer
gini_scorer = metrics.make_scorer(normalized_gini, greater_is_better=True)
if platform.system() == "Windows":
file_path = "C:/Python/Others/data/Kaggle/Liberty_Mutual_Group/"
else:
file_path = "/home/roshan/Desktop/DS/Others/data/Kaggle/Liberty_Mutual_Group/"
########################################################################################################################
# Read the input file , munging and splitting the data to train and test
########################################################################################################################
Train_DS = pd.read_csv(file_path + "train.csv", sep=",", index_col=0)
Actual_DS = pd.read_csv(file_path + "test.csv", sep=",", index_col=0)
Sample_DS = pd.read_csv(file_path + "sample_submission.csv", sep=",")
Parms_XGB_DS = pd.read_csv(file_path + "Parms_DS_XGB_1001.csv", sep=",")
Parms_RF_DS = pd.read_csv(file_path + "Parms_DS_RF2.csv", sep=",")
Train_DS, Actual_DS, y = Data_Munging(Train_DS, Actual_DS)
pred_Actual = RFR_Regressor(Train_DS, y, Actual_DS, Sample_DS, Parms_RF_DS, Grid=False, Ensemble=False)
开发者ID:roshankr,项目名称:DS_Competition,代码行数:29,代码来源:LMG_v_02.py
示例11: fit_predict_model
def fit_predict_model(city_data):
"""Find and tune the optimal model. Make a prediction on housing data."""
# Get the features and labels from the Boston housing data
X, y = city_data.data, city_data.target
# Setup a Decision Tree Regressor
regressor = DecisionTreeRegressor()
parameters = {'max_depth':(1,2,3,4,5,6,7,8,9,10)}
my_scorer = make_scorer(performance_metric, greater_is_better = False)
#Use gridearch to fine tune the Decision Tree Regressor and find the best model
print "Final Model: "
x = [11.95, 0.00, 18.100, 0, 0.6590, 5.6090, 90.00, 1.385, 24, 680.0, 20.20, 332.09, 12.13]
#Run gridsearch several times to get average result for best prediction
predictions = []
best_predictor = []
for i in range(10):
grid = grid_search.GridSearchCV(regressor, param_grid=parameters, scoring=my_scorer, cv = 5)
grid.fit(X,y)
y1 = grid.best_estimator_.predict(x)
best_predictor.append(grid.best_params_.itervalues().next())
predictions.append(y1)
average_predicted_price = np.mean(predictions)
print "Best model parameter: " + str(int(np.mean(best_predictor)))
print "Prediction: " + str(average_predicted_price)
开发者ID:nvrcght,项目名称:predicting_boston_house_prices,代码行数:30,代码来源:boston_housing.py
示例12: main
def main():
#Load the data. A pandas DataFrame is returned.Only the training data is selected
data = load_data()[0]
selected_columns = ["baselineDAS", "Age", "Gender"]
y = np.array(data["Response.deltaDAS"])
x = np.array(data[selected_columns])
#Build a method with the dictionary and another one with the grid of parameters
methods = {
'sumSVR': sumSVR
}
params_grid = {
'sumSVR': {
'dim': [3],
'C': np.arange(0.1, 2, 0.4),
'epsilon': np.arange(0.01, 0.1, 0.02),
#'degree': np.arange(1, 10),
'kernel_functions':[[cosine_similarity, cosine_similarity, DiracKernel]],
'w': list(product(range(1,6), repeat=3))
}
}
#Build and run the comparison. tr_scoring has to be constructed like it is shown here.
comp = MethodComparison(methods, params_grid=params_grid)
scores = comp.process(x, y, scorer, repeats=10, train_cv=3,
tr_scoring=make_scorer(scorer, greater_is_better=True), n_jobs=8)
return scores
开发者ID:vmolina,项目名称:RAclinical,代码行数:27,代码来源:weighted_kernels.py
示例13: fit_predict_model
def fit_predict_model(city_data):
"""Find and tune the optimal model. Make a prediction on housing data."""
# Get the features and labels from the Boston housing data
X, y = city_data.data, city_data.target
# Setup a Decision Tree Regressor
regressor = DecisionTreeRegressor()
parameters = {'max_depth':(1,2,3,4,5,6,7,8,9,10)}
# 1. Find the best performance metric
# should be the same as your performance_metric procedure
# http://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html
MSE_scorer = make_scorer(mean_squared_error)
# 2. Use gridearch to fine tune the Decision Tree Regressor and find the best model
# http://scikit-learn.org/stable/modules/generated/sklearn.grid_search.GridSearchCV.html#sklearn.grid_search.GridSearchCV
reg = grid_search.GridSearchCV(regressor, parameters, MSE_scorer)
# Fit the learner to the training data
print "Final Model: "
print reg.fit(X, y)
# Use the model to predict the output of a particular sample
x = [11.95, 0.00, 18.100, 0, 0.6590, 5.6090, 90.00, 1.385, 24, 680.0, 20.20, 332.09, 12.13]
y = reg.predict(x)
print "House: " + str(x)
print "Prediction: " + str(y)
开发者ID:grgomez,项目名称:MachineLearning,代码行数:27,代码来源:boston_housing.py
示例14: run_gridsearch
def run_gridsearch(clf, parameters, X_train, y_train, X_test, y_test):
"""cross-validated optimised parameter search"""
start = time.time()
#Scorer object
scorer = make_scorer(accuracy_score, greater_is_better=True)
#Gridsearch
tuned_clf = GridSearchCV(clf, parameters,scoring=scorer)
print "Final Model: "
tuned_clf.fit(X_train, y_train)
print "Best Parameters: {:}".format(tuned_clf.best_params_)
#Calculate Accuracy for tuned clasifier
est = tuned_clf.best_estimator_
tuned_pred = est.predict(X_test)
print "accuracy score for tuned classifier: {:.3f}".format(accuracy_score(y_test, tuned_pred))
print "Training set: {} samples".format(X_train.shape[0])
print "Test set: {} samples".format(X_test.shape[0])
end = time.time()
print "Grid search time (secs): {:.3f}".format(end - start)
开发者ID:aldousbirchall,项目名称:MachineLearning_SmartBeta_Pipeline,代码行数:25,代码来源:SPX_Pipeline.py
示例15: fit_predict_model
def fit_predict_model(city_data):
"""Find and tune the optimal model. Make a prediction on housing data."""
# Get the features and labels from the Boston housing data
X, y = city_data.data, city_data.target
# Setup a Decision Tree Regressor
regressor = DecisionTreeRegressor()
# Setup parameters and scores for model optimization through Grid Search
parameters = {"max_depth": (1, 2, 3, 4, 5, 6, 7, 8, 9, 10)}
scorer = make_scorer(mean_squared_error, greater_is_better=False)
gs = GridSearchCV(regressor, parameters, scoring=scorer)
gs.fit(X, y)
# Select the best settings for regressor
reg = gs.best_estimator_
# Fit the learner to the training data
print "Final Model: "
print reg.fit(X, y)
# Use the model to predict the output of a particular sample
x = [11.95, 0.00, 18.100, 0, 0.6590, 5.6090, 90.00, 1.385, 24, 680.0, 20.20, 332.09, 12.13]
y = reg.predict(x)
print "House: " + str(x)
print "Prediction: " + str(y)
开发者ID:kornev,项目名称:Udacity_Machine_Learning_Engineer,代码行数:27,代码来源:boston_housing.py
示例16: ada_boost
def ada_boost():
savefile = open('traindata.pkl', 'rb')
(x_train, y_train, t1) = cPickle.load(savefile)
savefile.close()
savefile = open('testdata.pkl', 'rb')
(x_test, t1, name1) = cPickle.load(savefile)
savefile.close()
# X_train, X_valid, y_train, y_valid = cross_validation.train_test_split(
# X, y, test_size=0.1, random_state=42)
x_train = np.asarray(x_train,dtype=np.float32)
y_train = np.asarray(y_train, dtype='int32')-1
nest = 190
lr = .1
md = 6
# clf1 = DecisionTreeClassifier(max_depth=2)
# clf = AdaBoostClassifier(clf1, n_estimators=200, learning_rate=.25)
clf = GradientBoostingClassifier(n_estimators=nest, learning_rate=lr, max_depth=md, random_state=0)
# clf = RandomForestClassifier(n_estimators=200) #.81
# clf = ExtraTreesClassifier(n_estimators=1000, max_depth=None, min_samples_split=10, random_state=0,n_jobs=8) #.81
# clf = KNeighborsClassifier(15)
if 1:
clf.fit(x_train, y_train)
ypred = clf.predict_proba(x_test)
y_str = ['Class_1','Class_2','Class_3','Class_4','Class_5','Class_6','Class_7','Class_8','Class_9']
kcsv.print_csv(ypred, name1, y_str,indexname='id')
print (nest, lr, md)
if 0:
multiclass_log_loss = make_scorer(score_func=logloss_mc, greater_is_better=True, needs_proba=True)
scores = cross_val_score(clf, x_train, y_train, n_jobs=8, cv=5,scoring=multiclass_log_loss)
print scores
print (nest, lr, md, scores.mean())
开发者ID:coreyhahn,项目名称:kaggle_otto,代码行数:35,代码来源:ada_base00.py
示例17: fit_model
def fit_model(X, y):
""" Tunes a decision tree regressor model using GridSearchCV on the input data X
and target labels y and returns this optimal model. """
# Create a decision tree regressor object
regressor = DecisionTreeRegressor()
# Set up the parameters we wish to tune
parameters = {'max_depth':(1,2,3,4,5,6,7,8,9,10)}
# Make an appropriate scoring function
scoring_function = make_scorer(mean_squared_error, greater_is_better = False)
# Make the GridSearchCV object. It returns an object that has an attribute
# best_estimator (only if refit == true, default) that has the model with the paramenter
# that better represent the data (= best tree depth). This passage doesn't calculate
# anything it just sets create the object grid search and set it in reg
reg = grid_search.GridSearchCV(regressor, parameters, scoring_function)
# Fit the learner to the data to obtain the optimal model with tuned parameters.
# The best model will be saved in reg.best_estimator
reg.fit(X, y)
# Return the optimal model
return reg.best_estimator_
开发者ID:Kebniss,项目名称:Boston-housing,代码行数:25,代码来源:transcript_project1.py
示例18: plot_validation_curve
def plot_validation_curve(clf, X, y, param, name=None):
try:
name = clf.__class__.__name__ if name is None else name
if param is None:
return
scorer = metrics.make_scorer(metrics.average_precision_score)
train_scores, test_scores = validation_curve(clf, X, y, cv=5,
scoring=scorer, n_jobs=-1, param_name=param['name'],
param_range=param['range'])
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)
test_scores_std = np.std(test_scores, axis=1)
plt.title('Validation Curve of {} varying {}'.format(name, param['name']))
plt.xlabel(param['name'])
plt.ylabel("Score")
plt.ylim(-0.05, 1.05)
plt.xlim(min(param['range']), max(param['range']))
plt.plot(param['range'], train_scores_mean, label='Training score', color='r')
plt.fill_between(param['range'], train_scores_mean - train_scores_std,
train_scores_mean + train_scores_std, alpha=0.2, color='r')
plt.plot(param['range'], test_scores_mean, label='Cross-validation score',
color="g")
plt.fill_between(param['range'], test_scores_mean - test_scores_std,
test_scores_mean + test_scores_std, alpha=0.2, color='g')
plt.legend(loc='lower right')
plt.savefig(name+'_'+param['name']+'_validationcurve.png')
plt.clf()
except Exception as e:
print('ERROR: {}, {}'.format(name, str((e))))
pass
开发者ID:veksev,项目名称:spring16,代码行数:32,代码来源:viz.py
示例19: rf_grid_search
def rf_grid_search():
train_inp,valid_inp,train_target,valid_target = prepare_input()
#set up scorer for grid search. log-loss is error, not score, so set greater_is_better to false,
#and log-loss requires a probability
log_loss_scorer = make_scorer(log_loss,greater_is_better=False,needs_proba=True)
train_inp = train_inp[:100000]
train_target = train_target[:100000]
start = time.time()
random_forest = RandomForestClassifier(random_state=31)
# r_forest_parameters = {'n_estimators' : [120,300,500,800,1200],'max_depth':[5,8,15,25,30,None],'max_features':['log2','sqrt',None],
# 'min_samples_split':[1,2,5,10,15,100],'min_samples_leaf':[1,2,5,10]}
#75.1 minutes to run with these paramters - 72 fits
r_forest_parameters = {'min_samples_split':[2,5,10,20,50,100],'min_samples_leaf':[1,2,5,10,50,100]}
#grid search too slow to not use all cores, and wayyyy too slow to have no output.
r_forest_grid_obj = GridSearchCV(random_forest,r_forest_parameters,log_loss_scorer,verbose=2,n_jobs=-1)
r_forest_grid_obj = r_forest_grid_obj.fit(train_inp,train_target)
random_forest = r_forest_grid_obj.best_estimator_
print "Best params: " + str(r_forest_grid_obj.best_params_)
random_forest_train_error = log_loss(train_target,random_forest.predict_proba(train_inp))
random_forest_validation_error = log_loss(valid_target,random_forest.predict_proba(valid_inp))
print "Best random forest training error: {:02.4f}".format(random_forest_train_error)
print "Best random forest validation error: {:02.4f}".format(random_forest_validation_error)
end = time.time()
print "RF grid search took {:02.4f} seconds".format(end-start)
return random_forest
开发者ID:btaborsky,项目名称:red-hat-kaggle,代码行数:31,代码来源:red_hat.py
示例20: svm_grid_search
def svm_grid_search():
#get data
training_input,training_target,validation_input,validation_target = prepare_input()
#set up scorer for grid search. log-loss is error, not score, so set greater_is_better to false,
#and log-loss requires a probability
log_loss_scorer = make_scorer(log_loss,greater_is_better=False,needs_proba=True)
training_input = training_input[:100000]
training_target = training_target[:100000]
print training_input.shape[0]
print training_target.shape[0]
start = time.time()
svm = SVC(random_state=31,probability=True)
svm_parameters = {'C':[.001,.01,.1,1,10,100],'kernel':["rbf","sigmoid"]}
svm_grid_obj = GridSearchCV(svm,svm_parameters,log_loss_scorer,verbose=2,n_jobs=-1)
svm_grid_obj = svm_grid_obj.fit(training_input,training_target)
svm = svm_grid_obj.best_estimator_
print "Best params: " + str(svm_grid_obj.best_params_)
svm_train_error = log_loss(training_target,svm.predict_proba(training_input))
svm_validation_error = log_loss(validation_target,svm.predict_proba(validation_input))
print "Best SVM training error: {:02.4f}".format(svm_train_error)
print "Best SVM validation error: {:02.4f}".format(svm_validation_error)
end = time.time()
print "RF grid search took {:02.4f} seconds".format(end-start)
return svm
开发者ID:btaborsky,项目名称:red-hat-kaggle,代码行数:32,代码来源:red_hat.py
注:本文中的sklearn.metrics.make_scorer函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论