• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python metrics.make_scorer函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.metrics.make_scorer函数的典型用法代码示例。如果您正苦于以下问题:Python make_scorer函数的具体用法?Python make_scorer怎么用?Python make_scorer使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了make_scorer函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: training

def training(matrix, Y, SVM):
    """ def  training(matrix , Y , svm ):
			matrix: is the train data
			Y: is the labels in array
			svm: is a boolean. If svm == True we perform svm otherwise we perform AdaBoostClassifier

			return: cross_validation scores
	"""

    if SVM:
        classifier = svm.SVC()
    else:
        classifier = AdaBoostClassifier(n_estimators=300)

    precision_micro_scorer = metrics.make_scorer(custom_precision_micro_score)
    precision_macro_scorer = metrics.make_scorer(custom_precision_macro_score)
    recall_micro_scorer = metrics.make_scorer(custom_recall_micro_score)
    recall_macro_scorer = metrics.make_scorer(custom_recall_macro_score)

    precision_micro = cross_val_score(classifier, matrix, Y, cv=10, scoring=precision_micro_scorer)
    precision_macro = cross_val_score(classifier, matrix, Y, cv=10, scoring=precision_macro_scorer)
    recall_micro = cross_val_score(classifier, matrix, Y, cv=10, scoring=recall_micro_scorer)
    recall_macro = cross_val_score(classifier, matrix, Y, cv=10, scoring=recall_macro_scorer)

    return {"micro": (precision_micro, recall_micro), "macro": (precision_macro, recall_macro)}
开发者ID:Ethiy,项目名称:ALTEGRAD,代码行数:25,代码来源:training.py


示例2: __init__

    def __init__(self, n_features=None, tuning_ranges=None, models=None, cv=None, njobs=1, pre_dispatch='2*n_jobs',
                 stack=True, verbose=False, metric='lad'):
        if metric.lower() not in ['lad', 'mse']:
            raise ValueError('Metric must be either lad or mse.')

        if tuning_ranges is None:
            try:
                n_features is not None
            except ValueError:
                'Must supply one of n_features or tuning_ranges.'
            # use default values for grid search over tuning parameters for all models
            tuning_ranges = {'DecisionTreeClassifier': {'max_depth': [5, 10, 20, 50, None]},
                             'RandomForestRegressor': {'max_features':
                                                       list(np.unique(np.linspace(2, n_features, 5).astype(np.int)))},
                             'GbrAutoNtrees': {'max_depth': [1, 2, 3, 5, 10]}}
        if models is None:
            # initialize the list of sklearn objects corresponding to different statistical models
            models = []
            if 'DecisionTreeRegressor' in tuning_ranges:
                models.append(DecisionTreeRegressor())
            if 'RandomForestRegressor' in tuning_ranges:
                models.append(RandomForestRegressor(n_estimators=500, oob_score=True, n_jobs=njobs))
            if 'GbrAutoNtrees' in tuning_ranges:
                models.append(GbrAutoNtrees(subsample=0.75, n_estimators=500, learning_rate=0.01))

        super(RegressionSuite, self).__init__(tuning_ranges, models, cv, njobs, pre_dispatch, stack, verbose)

        self.scorer = make_scorer(accuracy_score)
        self.nfeatures = n_features
        self.metric = metric.lower()
        if self.metric == 'lad':
            self.scorer = make_scorer(mean_absolute_error, greater_is_better=False)
        elif self.metric == 'mse':
            self.scorer = make_scorer(mean_squared_error, greater_is_better=False)
开发者ID:albertfxwang,项目名称:bck_stats,代码行数:34,代码来源:sklearn_estimator_suite.py


示例3: _make_scoring_r0

def _make_scoring_r0( scoring):
	if scoring == 'r2':
		return metrics.make_scorer( metrics.r2_score)
	elif scoring == 'mean_absolute_error':
		return metrics.make_scorer( metrics.mean_absolute_error, greater_is_better=False)
	elif scoring == 'mean_squared_error':
		return metrics.make_scorer( metrics.mean_squared_error, greater_is_better=False)
	elif scoring == 'median_absolute_error':
		return metrics.make_scorer( metrics.median_absolute_error, greater_is_better=False)
	else:
		raise ValueError("Not supported scoring")
开发者ID:jskDr,项目名称:jamespy_py3,代码行数:11,代码来源:_jgrid_r0.py


示例4: _test_ridge_loo

def _test_ridge_loo(filter_):
    # test that can work with both dense or sparse matrices
    n_samples = X_diabetes.shape[0]

    ret = []

    fit_intercept = filter_ == DENSE_FILTER
    ridge_gcv = _RidgeGCV(fit_intercept=fit_intercept)

    # check best alpha
    ridge_gcv.fit(filter_(X_diabetes), y_diabetes)
    alpha_ = ridge_gcv.alpha_
    ret.append(alpha_)

    # check that we get same best alpha with custom loss_func
    f = ignore_warnings
    scoring = make_scorer(mean_squared_error, greater_is_better=False)
    ridge_gcv2 = RidgeCV(fit_intercept=False, scoring=scoring)
    f(ridge_gcv2.fit)(filter_(X_diabetes), y_diabetes)
    assert ridge_gcv2.alpha_ == pytest.approx(alpha_)

    # check that we get same best alpha with custom score_func
    func = lambda x, y: -mean_squared_error(x, y)
    scoring = make_scorer(func)
    ridge_gcv3 = RidgeCV(fit_intercept=False, scoring=scoring)
    f(ridge_gcv3.fit)(filter_(X_diabetes), y_diabetes)
    assert ridge_gcv3.alpha_ == pytest.approx(alpha_)

    # check that we get same best alpha with a scorer
    scorer = get_scorer('neg_mean_squared_error')
    ridge_gcv4 = RidgeCV(fit_intercept=False, scoring=scorer)
    ridge_gcv4.fit(filter_(X_diabetes), y_diabetes)
    assert ridge_gcv4.alpha_ == pytest.approx(alpha_)

    # check that we get same best alpha with sample weights
    if filter_ == DENSE_FILTER:
        ridge_gcv.fit(filter_(X_diabetes), y_diabetes,
                      sample_weight=np.ones(n_samples))
        assert ridge_gcv.alpha_ == pytest.approx(alpha_)

    # simulate several responses
    Y = np.vstack((y_diabetes, y_diabetes)).T

    ridge_gcv.fit(filter_(X_diabetes), Y)
    Y_pred = ridge_gcv.predict(filter_(X_diabetes))
    ridge_gcv.fit(filter_(X_diabetes), y_diabetes)
    y_pred = ridge_gcv.predict(filter_(X_diabetes))

    assert_allclose(np.vstack((y_pred, y_pred)).T,
                    Y_pred, rtol=1e-5)

    return ret
开发者ID:manhhomienbienthuy,项目名称:scikit-learn,代码行数:52,代码来源:test_ridge.py


示例5: test_cross_val_score_multilabel

def test_cross_val_score_multilabel():
    X = np.array([[-3, 4], [2, 4], [3, 3], [0, 2], [-3, 1], [-2, 1], [0, 0], [-2, -1], [-1, -2], [1, -2]])
    y = np.array([[1, 1], [0, 1], [0, 1], [0, 1], [1, 1], [0, 1], [1, 0], [1, 1], [1, 0], [0, 0]])
    clf = KNeighborsClassifier(n_neighbors=1)
    scoring_micro = make_scorer(precision_score, average="micro")
    scoring_macro = make_scorer(precision_score, average="macro")
    scoring_samples = make_scorer(precision_score, average="samples")
    score_micro = cval.cross_val_score(clf, X, y, scoring=scoring_micro, cv=5)
    score_macro = cval.cross_val_score(clf, X, y, scoring=scoring_macro, cv=5)
    score_samples = cval.cross_val_score(clf, X, y, scoring=scoring_samples, cv=5)
    assert_almost_equal(score_micro, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 3])
    assert_almost_equal(score_macro, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 4])
    assert_almost_equal(score_samples, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 4])
开发者ID:mrterry,项目名称:scikit-learn,代码行数:13,代码来源:test_cross_validation.py


示例6: make_scoring

def make_scoring(scoring):
    """
    Score is reversed if greater_is_better is False.
    """
    if scoring == 'r2':
        return metrics.make_scorer(metrics.r2_score)
    elif scoring == 'mean_absolute_error':
        return metrics.make_scorer(metrics.mean_absolute_error, greater_is_better=False)
    elif scoring == 'mean_squared_error':
        return metrics.make_scorer(metrics.mean_squared_error, greater_is_better=False)
    elif scoring == 'median_absolute_error':
        return metrics.make_scorer(metrics.median_absolute_error, greater_is_better=False)
    else:
        raise ValueError("Not supported scoring")
开发者ID:jskDr,项目名称:jamespy_py3,代码行数:14,代码来源:jgrid.py


示例7: fit_predict_model

def fit_predict_model(city_data):
    """Find and tune the optimal model. Make a prediction on housing data."""

    # Get the features and labels from the Boston housing data
    # and shuffle them randomly
    X, y = shuffle(city_data.data, city_data.target)

    # Setup a Decision Tree Regressor
    regressor = DecisionTreeRegressor()

    parameters = {"max_depth": (1, 2, 3, 4, 5, 6, 7, 8, 9, 10)}

    # 1. Find the best performance metric
    # should be the same as your performance_metric procedure
    # http://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html
    scorer = make_scorer(METRIC, greater_is_better=False)

    # 2. Use gridearch to fine tune the Decision Tree Regressor and find the best model
    # http://scikit-learn.org/stable/modules/generated/sklearn.grid_search.GridSearchCV.html#sklearn.grid_search.GridSearchCV
    reg = grid_search.GridSearchCV(regressor, parameters, scoring=scorer, cv=10)

    # Fit the learner to the training data
    print "Final Model: "
    print reg.fit(X, y)
    print "Best model parameter:  " + str(reg.best_params_)
    print "Best model estimator:  " + str(reg.best_estimator_)

    # Use the model to predict the output of a particular sample
    x = [11.95, 0.00, 18.100, 0, 0.6590, 5.6090, 90.00, 1.385, 24, 680.0, 20.20, 332.09, 12.13]
    y = reg.predict(x)
    print "House: " + str(x)
    print "Prediction: " + str(y)

    return (reg.best_params_["max_depth"], y)
开发者ID:ancheremukhin,项目名称:boston_housing,代码行数:34,代码来源:boston_housing.py


示例8: test

def test():
    x_train,y_train=load_svmlight_file("D:/traindata/12trainset")
    x_train.todense()
    x_test,y_test=load_svmlight_file("D:/traindata/12testset")
    x_test.todense()
    print(x_train.shape)
    #classifier
    clf=SVC(kernel='rbf')
    ovrclf=OneVsRestClassifier(clf,-1)
    #parameter
    parameters=[{'estimator__C':[2**-5,2**-4,2**-3,2**-2,2**-1,1,2**1,2**2,2**3,2**4,2**5],
                 'estimator__kernel':['rbf'],
                 'estimator__gamma':[2**-5,2**-4,2**-3,2**-2,2**-1,1,2**1,2**2,2**3,2**4,2**5]},
                {'estimator__C':[2**-5,2**-4,2**-3,2**-2,2**-1,1,2**1,2**2,2**3,2**4,2**5],
                 'estimator__kernel':['linear']}]
    para={'estimator__C':[2**-5,2**-4],
                 'estimator__kernel':['rbf'],
                 'estimator__gamma':[2**-1,1]}
    #scoring
    sougou_score=make_scorer(score_func,greater_is_better=False)
    #cross_validation iterator
    sfk=c_v.StratifiedKFold(y_train,shuffle=True,n_folds=5,random_state=0)
    #grid search
    gsclf=g_s.GridSearchCV(ovrclf,param_grid=para,cv=sfk,scoring=sougou_score)
    gsclf.fit(x_train,y_train)
    print("best score: ",gsclf.best_score_)
    print("best parameters: ",gsclf.best_params_)
    y_pred=gsclf.predict(x_test)

    #result
    target_names=['0','1','2','3']
    sum_y = np.sum((np.array(y_pred)-np.array(y_test))**2)
    print(classification_report(y_test,y_pred,target_names=target_names))
    print("sougouVal: ",float(sum_y)/y_pred.shape[0])
    print(time.time()-start_time)
开发者ID:lkprof,项目名称:sema,代码行数:35,代码来源:svm.py


示例9: fit_predict_model

def fit_predict_model(city_data):
    """Find and tune the optimal model. Make a prediction on housing data."""

    # Get the features and labels from the Boston housing data
    X, y = city_data.data, city_data.target

    # Setup a Decision Tree Regressor
    regressor = DecisionTreeRegressor()

    parameters = {"max_depth": (1, 2, 3, 4, 5, 6, 7, 8, 9, 10)}

    ###################################
    ### Step 4. YOUR CODE GOES HERE ###
    ###################################

    # 1. Find the best performance metric
    # should be the same as your performance_metric procedure
    # http://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html
    regScorer = make_scorer(performance_metric, greater_is_better=False)

    # 2. Use gridearch to fine tune the Decision Tree Regressor and find the best model
    # http://scikit-learn.org/stable/modules/generated/sklearn.grid_search.GridSearchCV.html#sklearn.grid_search.GridSearchCV
    reg = GridSearchCV(regressor, parameters, scoring=regScorer)

    # Fit the learner to the training data
    print "Final Model: "
    print reg.fit(X, y)
    print "Optimal parameter: " + ` reg.best_params_ `
    print "Best Score: " + ` reg.best_score_ `
    print reg.grid_scores_

    x = [11.95, 0.00, 18.100, 0, 0.6590, 5.6090, 90.00, 1.385, 24, 680.0, 20.20, 332.09, 12.13]
    z = reg.predict(x)
    print "House: " + str(x)
    print "Prediction: " + str(z)
开发者ID:bblalock,项目名称:Udacity-Machine-Learning-Engineer-Projects,代码行数:35,代码来源:boston_housing.py


示例10: main

def main(argv):

    pd.set_option("display.width", 200)
    pd.set_option("display.height", 500)

    warnings.filterwarnings("ignore")

    global file_path, gini_scorer

    # Normalized Gini Scorer
    gini_scorer = metrics.make_scorer(normalized_gini, greater_is_better=True)

    if platform.system() == "Windows":
        file_path = "C:/Python/Others/data/Kaggle/Liberty_Mutual_Group/"
    else:
        file_path = "/home/roshan/Desktop/DS/Others/data/Kaggle/Liberty_Mutual_Group/"

    ########################################################################################################################
    # Read the input file , munging and splitting the data to train and test
    ########################################################################################################################
    Train_DS = pd.read_csv(file_path + "train.csv", sep=",", index_col=0)
    Actual_DS = pd.read_csv(file_path + "test.csv", sep=",", index_col=0)
    Sample_DS = pd.read_csv(file_path + "sample_submission.csv", sep=",")
    Parms_XGB_DS = pd.read_csv(file_path + "Parms_DS_XGB_1001.csv", sep=",")
    Parms_RF_DS = pd.read_csv(file_path + "Parms_DS_RF2.csv", sep=",")

    Train_DS, Actual_DS, y = Data_Munging(Train_DS, Actual_DS)

    pred_Actual = RFR_Regressor(Train_DS, y, Actual_DS, Sample_DS, Parms_RF_DS, Grid=False, Ensemble=False)
开发者ID:roshankr,项目名称:DS_Competition,代码行数:29,代码来源:LMG_v_02.py


示例11: fit_predict_model

def fit_predict_model(city_data):
    """Find and tune the optimal model. Make a prediction on housing data."""

    # Get the features and labels from the Boston housing data
    X, y = city_data.data, city_data.target

    # Setup a Decision Tree Regressor
    regressor = DecisionTreeRegressor()

    parameters = {'max_depth':(1,2,3,4,5,6,7,8,9,10)}

    my_scorer = make_scorer(performance_metric, greater_is_better = False)
    
    #Use gridearch to fine tune the Decision Tree Regressor and find the best model
    print "Final Model: "
    x = [11.95, 0.00, 18.100, 0, 0.6590, 5.6090, 90.00, 1.385, 24, 680.0, 20.20, 332.09, 12.13]
    
    #Run gridsearch several times to get average result for best prediction
    predictions = []
    best_predictor = []
    for i in range(10):
        grid = grid_search.GridSearchCV(regressor, param_grid=parameters, scoring=my_scorer, cv = 5)
        grid.fit(X,y)
        y1 = grid.best_estimator_.predict(x) 
        best_predictor.append(grid.best_params_.itervalues().next())
        predictions.append(y1)
    average_predicted_price = np.mean(predictions)

    print "Best model parameter:  " + str(int(np.mean(best_predictor)))
    print "Prediction: " + str(average_predicted_price)
开发者ID:nvrcght,项目名称:predicting_boston_house_prices,代码行数:30,代码来源:boston_housing.py


示例12: main

def main():
    #Load the data. A pandas DataFrame is returned.Only the training data is selected
    data = load_data()[0]
    selected_columns = ["baselineDAS", "Age", "Gender"]
    y = np.array(data["Response.deltaDAS"])
    x = np.array(data[selected_columns])
    #Build a method with the dictionary and another one with the grid of parameters
    methods = {
        'sumSVR': sumSVR
    }

    params_grid = {
        'sumSVR': {
            'dim': [3],
            'C': np.arange(0.1, 2, 0.4),
            'epsilon': np.arange(0.01, 0.1, 0.02),
            #'degree': np.arange(1, 10),
            'kernel_functions':[[cosine_similarity, cosine_similarity, DiracKernel]],
            'w': list(product(range(1,6), repeat=3))
        }
    }
    #Build and run the comparison. tr_scoring has to be constructed like it is shown here.
    comp = MethodComparison(methods, params_grid=params_grid)
    scores = comp.process(x, y, scorer, repeats=10, train_cv=3,
                          tr_scoring=make_scorer(scorer, greater_is_better=True), n_jobs=8)

    return scores
开发者ID:vmolina,项目名称:RAclinical,代码行数:27,代码来源:weighted_kernels.py


示例13: fit_predict_model

def fit_predict_model(city_data):
    """Find and tune the optimal model. Make a prediction on housing data."""

    # Get the features and labels from the Boston housing data
    X, y = city_data.data, city_data.target

    # Setup a Decision Tree Regressor
    regressor = DecisionTreeRegressor()

    parameters = {'max_depth':(1,2,3,4,5,6,7,8,9,10)}

    # 1. Find the best performance metric
    # should be the same as your performance_metric procedure
    # http://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html
    MSE_scorer = make_scorer(mean_squared_error) 
    # 2. Use gridearch to fine tune the Decision Tree Regressor and find the best model
    # http://scikit-learn.org/stable/modules/generated/sklearn.grid_search.GridSearchCV.html#sklearn.grid_search.GridSearchCV
    reg = grid_search.GridSearchCV(regressor, parameters, MSE_scorer)
    # Fit the learner to the training data
    print "Final Model: "
    print reg.fit(X, y)
    
    # Use the model to predict the output of a particular sample
    x = [11.95, 0.00, 18.100, 0, 0.6590, 5.6090, 90.00, 1.385, 24, 680.0, 20.20, 332.09, 12.13]
    y = reg.predict(x)
    print "House: " + str(x)
    print "Prediction: " + str(y)
开发者ID:grgomez,项目名称:MachineLearning,代码行数:27,代码来源:boston_housing.py


示例14: run_gridsearch

def run_gridsearch(clf, parameters, X_train, y_train, X_test, y_test):
    """cross-validated optimised parameter search"""
    start = time.time()
    
    #Scorer object
    scorer = make_scorer(accuracy_score, greater_is_better=True)
    
    #Gridsearch
    tuned_clf = GridSearchCV(clf, parameters,scoring=scorer)
    
    print "Final Model: "
    
    tuned_clf.fit(X_train, y_train)
    print "Best Parameters: {:}".format(tuned_clf.best_params_)
    
    #Calculate Accuracy for tuned clasifier
    est = tuned_clf.best_estimator_ 
    tuned_pred = est.predict(X_test)
    
    print "accuracy score for tuned classifier: {:.3f}".format(accuracy_score(y_test, tuned_pred))
    print "Training set: {} samples".format(X_train.shape[0])
    print "Test set: {} samples".format(X_test.shape[0])
    
    end = time.time()
    print "Grid search time (secs): {:.3f}".format(end - start)
开发者ID:aldousbirchall,项目名称:MachineLearning_SmartBeta_Pipeline,代码行数:25,代码来源:SPX_Pipeline.py


示例15: fit_predict_model

def fit_predict_model(city_data):
    """Find and tune the optimal model. Make a prediction on housing data."""

    # Get the features and labels from the Boston housing data
    X, y = city_data.data, city_data.target

    # Setup a Decision Tree Regressor
    regressor = DecisionTreeRegressor()

    # Setup parameters and scores for model optimization through Grid Search
    parameters = {"max_depth": (1, 2, 3, 4, 5, 6, 7, 8, 9, 10)}
    scorer = make_scorer(mean_squared_error, greater_is_better=False)
    gs = GridSearchCV(regressor, parameters, scoring=scorer)
    gs.fit(X, y)

    # Select the best settings for regressor
    reg = gs.best_estimator_

    # Fit the learner to the training data
    print "Final Model: "
    print reg.fit(X, y)

    # Use the model to predict the output of a particular sample
    x = [11.95, 0.00, 18.100, 0, 0.6590, 5.6090, 90.00, 1.385, 24, 680.0, 20.20, 332.09, 12.13]
    y = reg.predict(x)
    print "House: " + str(x)
    print "Prediction: " + str(y)
开发者ID:kornev,项目名称:Udacity_Machine_Learning_Engineer,代码行数:27,代码来源:boston_housing.py


示例16: ada_boost

def ada_boost():
    savefile = open('traindata.pkl', 'rb')
    (x_train, y_train, t1) = cPickle.load(savefile)
    savefile.close()
    savefile = open('testdata.pkl', 'rb')
    (x_test, t1, name1) = cPickle.load(savefile)
    savefile.close()
    
#    X_train, X_valid, y_train, y_valid = cross_validation.train_test_split(
#    X, y, test_size=0.1, random_state=42)
    
    x_train = np.asarray(x_train,dtype=np.float32)
    y_train = np.asarray(y_train, dtype='int32')-1   
    
    nest = 190
    lr = .1
    md = 6
#    clf1 = DecisionTreeClassifier(max_depth=2)
#    clf = AdaBoostClassifier(clf1, n_estimators=200, learning_rate=.25)
    clf = GradientBoostingClassifier(n_estimators=nest, learning_rate=lr, max_depth=md, random_state=0)
#    clf = RandomForestClassifier(n_estimators=200) #.81
#    clf = ExtraTreesClassifier(n_estimators=1000, max_depth=None, min_samples_split=10, random_state=0,n_jobs=8) #.81
#    clf = KNeighborsClassifier(15)
    if 1:
        clf.fit(x_train, y_train)
        ypred = clf.predict_proba(x_test)
        y_str = ['Class_1','Class_2','Class_3','Class_4','Class_5','Class_6','Class_7','Class_8','Class_9']
        kcsv.print_csv(ypred, name1, y_str,indexname='id')
        print (nest, lr, md) 
    
    if 0:
        multiclass_log_loss = make_scorer(score_func=logloss_mc, greater_is_better=True, needs_proba=True)
        scores = cross_val_score(clf, x_train, y_train, n_jobs=8, cv=5,scoring=multiclass_log_loss)
        print scores
        print (nest, lr, md, scores.mean())  
开发者ID:coreyhahn,项目名称:kaggle_otto,代码行数:35,代码来源:ada_base00.py


示例17: fit_model

def fit_model(X, y):
    """ Tunes a decision tree regressor model using GridSearchCV on the input data X 
        and target labels y and returns this optimal model. """

    # Create a decision tree regressor object
    regressor = DecisionTreeRegressor()

    # Set up the parameters we wish to tune
    parameters = {'max_depth':(1,2,3,4,5,6,7,8,9,10)}

    # Make an appropriate scoring function
    scoring_function = make_scorer(mean_squared_error, greater_is_better = False)

    # Make the GridSearchCV object. It returns an object that has an attribute 
    # best_estimator (only if refit == true, default) that has the model with the paramenter 
    # that better represent the data (= best tree depth). This passage doesn't calculate 
    # anything it just sets create the object grid search and set it in reg 
    reg = grid_search.GridSearchCV(regressor, parameters, scoring_function)

    # Fit the learner to the data to obtain the optimal model with tuned parameters. 
    # The best model will be saved in reg.best_estimator
    reg.fit(X, y)

    # Return the optimal model
    return reg.best_estimator_
开发者ID:Kebniss,项目名称:Boston-housing,代码行数:25,代码来源:transcript_project1.py


示例18: plot_validation_curve

def plot_validation_curve(clf, X, y, param, name=None):
    try:
        name = clf.__class__.__name__ if name is None else name
        if param is None:
                return
        scorer = metrics.make_scorer(metrics.average_precision_score)
        train_scores, test_scores = validation_curve(clf, X, y, cv=5,
                scoring=scorer, n_jobs=-1, param_name=param['name'],
                param_range=param['range'])
        train_scores_mean = np.mean(train_scores, axis=1)
        train_scores_std = np.std(train_scores, axis=1)
        test_scores_mean = np.mean(test_scores, axis=1)
        test_scores_std = np.std(test_scores, axis=1)

        plt.title('Validation Curve of {} varying {}'.format(name, param['name']))
        plt.xlabel(param['name'])
        plt.ylabel("Score")
        plt.ylim(-0.05, 1.05)
        plt.xlim(min(param['range']), max(param['range']))
        plt.plot(param['range'], train_scores_mean, label='Training score', color='r')
        plt.fill_between(param['range'], train_scores_mean - train_scores_std,
                         train_scores_mean + train_scores_std, alpha=0.2, color='r')
        plt.plot(param['range'], test_scores_mean, label='Cross-validation score',
                     color="g")
        plt.fill_between(param['range'], test_scores_mean - test_scores_std,
                         test_scores_mean + test_scores_std, alpha=0.2, color='g')
        plt.legend(loc='lower right')
        plt.savefig(name+'_'+param['name']+'_validationcurve.png')
        plt.clf()
    except Exception as e:
        print('ERROR: {}, {}'.format(name, str((e))))
        pass
开发者ID:veksev,项目名称:spring16,代码行数:32,代码来源:viz.py


示例19: rf_grid_search

def rf_grid_search():

	train_inp,valid_inp,train_target,valid_target = prepare_input()
	#set up scorer for grid search. log-loss is error, not score, so set greater_is_better to false,
	#and log-loss requires a probability
	log_loss_scorer = make_scorer(log_loss,greater_is_better=False,needs_proba=True)

	train_inp = train_inp[:100000]
	train_target = train_target[:100000]

	start = time.time()
	random_forest = RandomForestClassifier(random_state=31)
	# r_forest_parameters = {'n_estimators' : [120,300,500,800,1200],'max_depth':[5,8,15,25,30,None],'max_features':['log2','sqrt',None],
	# 'min_samples_split':[1,2,5,10,15,100],'min_samples_leaf':[1,2,5,10]}
	
	#75.1 minutes to run with these paramters - 72 fits

	r_forest_parameters = {'min_samples_split':[2,5,10,20,50,100],'min_samples_leaf':[1,2,5,10,50,100]}
	#grid search too slow to not use all cores, and wayyyy too slow to have no output.
	r_forest_grid_obj = GridSearchCV(random_forest,r_forest_parameters,log_loss_scorer,verbose=2,n_jobs=-1)
	r_forest_grid_obj = r_forest_grid_obj.fit(train_inp,train_target)
	random_forest = r_forest_grid_obj.best_estimator_
	print "Best params: " + str(r_forest_grid_obj.best_params_)	
	random_forest_train_error = log_loss(train_target,random_forest.predict_proba(train_inp))
	random_forest_validation_error = log_loss(valid_target,random_forest.predict_proba(valid_inp))
	print "Best random forest training error: {:02.4f}".format(random_forest_train_error)
	print "Best random forest validation error: {:02.4f}".format(random_forest_validation_error)
	end = time.time()
	print "RF grid search took {:02.4f} seconds".format(end-start)

	return random_forest
开发者ID:btaborsky,项目名称:red-hat-kaggle,代码行数:31,代码来源:red_hat.py


示例20: svm_grid_search

def svm_grid_search():

	#get data
	training_input,training_target,validation_input,validation_target = prepare_input()

	#set up scorer for grid search. log-loss is error, not score, so set greater_is_better to false,
	#and log-loss requires a probability
	log_loss_scorer = make_scorer(log_loss,greater_is_better=False,needs_proba=True)

	training_input = training_input[:100000]
	training_target = training_target[:100000]

	print training_input.shape[0]
	print training_target.shape[0]

	start = time.time()
	svm = SVC(random_state=31,probability=True)
	
	
	svm_parameters = {'C':[.001,.01,.1,1,10,100],'kernel':["rbf","sigmoid"]}
	svm_grid_obj = GridSearchCV(svm,svm_parameters,log_loss_scorer,verbose=2,n_jobs=-1)
	svm_grid_obj = svm_grid_obj.fit(training_input,training_target)
	svm = svm_grid_obj.best_estimator_
	print "Best params: " + str(svm_grid_obj.best_params_)	
	svm_train_error = log_loss(training_target,svm.predict_proba(training_input))
	svm_validation_error = log_loss(validation_target,svm.predict_proba(validation_input))
	print "Best SVM training error: {:02.4f}".format(svm_train_error)
	print "Best SVM validation error: {:02.4f}".format(svm_validation_error)
	end = time.time()
	print "RF grid search took {:02.4f} seconds".format(end-start)

	return svm
开发者ID:btaborsky,项目名称:red-hat-kaggle,代码行数:32,代码来源:red_hat.py



注:本文中的sklearn.metrics.make_scorer函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python metrics.matthews_corrcoef函数代码示例发布时间:2022-05-27
下一篇:
Python metrics.log_loss函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap