本文整理汇总了Python中sklearn.metrics.log_loss函数的典型用法代码示例。如果您正苦于以下问题:Python log_loss函数的具体用法?Python log_loss怎么用?Python log_loss使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了log_loss函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: xgboostcv
def xgboostcv(max_depth,
eta,
num_rounds,
gamma,
min_child_weight,
max_delta_step,
subsample,
colsample_bytree,
silent=True,
seed=1234):
print ('\nRunning XGBOOST on the cluster')
# Call xgboost in distributed mode (CLI input for params)
xgb_run = ['max_depth=%s' % int(max_depth),
'eta=%s' % eta,
'silent=%s' % silent,
'gamma=%s' % gamma,
'min_child_weight=%s' % int(min_child_weight),
'max_delta_step=%s' % max_delta_step,
'subsample=%s' % subsample,
'eval_metric=logloss',
'colsample_bytree=%s' % colsample_bytree,
'seed=%s' % seed,
'objective=binary:logistic',
'eval[eval_set]=%s' % deval,
'eval[train_set]=%s' % dtrain,
'num_round=%s' % int(num_rounds),
'data=%s' % dtrain,
'model_out=%s' % model_ouput]
argv = ['wormhole/repo/dmlc-core/tracker/dmlc_yarn.py', # Where your instance is found!!
'-n',
'16',
'wormhole/bin/xgboost.dmlc', # Where your instance is found!!
'./examples/xgboost-avazu.txt'] + xgb_run
print(' '.join(argv))
# Cluster specific ENV VARS.
Popen(argv,
env = {'JAVA_HOME': '/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.45-28.b13.el6_6.x86_64/',
'HADOOP_HOME': '/usr/',
'HADOOP_HDFS_HOME': '/usr/lib/hadoop-hdfs/',
'PATH': os.getenv('PATH')}).communicate()
# Export model to local filesystem
try:
os.remove("avazu.model")
except OSError:
pass
Popen(["hadoop","fs","-copyToLocal","/tmp/avazu.model", "."]).communicate()
# Delete stored model.
Popen(["hadoop","fs","-rm","/tmp/avazu.model", "."]).communicate()
# Load Model file
bst = xgb.Booster(model_file='avazu.model')
preds = bst.predict(dtest)
y_pred = bst.predict(dtest)
y_valid = dtest.get_label()
print('logloss = ', log_loss(y_valid, y_pred))
# We are maximizing the function.
return -log_loss(y_valid, y_pred)
开发者ID:qichaotang,项目名称:BayesBoost,代码行数:60,代码来源:xgboost-avazu.py
示例2: learn
def learn(learning_rate, X_train, y_train, X_test, y_test):
model = GradientBoostingClassifier(
n_estimators=250,
verbose=True,
random_state=241,
learning_rate=learning_rate
)
model.fit(X_train, y_train)
# plot scores
test_score = list(range(250))
train_score = list(range(250))
for i, predictions in enumerate(model.staged_decision_function(X_test)):
predictions = [x[0] for x in predictions.tolist()] # unpack this stupid format
predictions = [1/(1 + math.exp(-x)) for x in predictions]
test_score[i] = log_loss(y_test, predictions)
for i, predictions in enumerate(model.staged_decision_function(X_train)):
predictions = [x[0] for x in predictions.tolist()] # unpack this stupid format
predictions = [1/(1 + math.exp(-x)) for x in predictions]
train_score[i] = log_loss(y_train, predictions)
plt.figure()
plt.plot(test_score, 'r', linewidth=2)
plt.plot(train_score, 'g', linewidth=2)
plt.legend(['test', 'train'])
plt.show()
return train_score, test_score
开发者ID:universome,项目名称:intro-into-ml,代码行数:30,代码来源:gbm.py
示例3: fit_model_and_test
def fit_model_and_test(params):
crimes = np.load(DATA_FILE)
features_train = crimes['features_train']
all_labels = sorted(list(set(np.unique(crimes['labels_train'])) | set(np.unique(crimes['labels_val']))))
hidden_units = int(params['hidden_units'])
batch_size = 64
labels_train = create_labels(crimes['labels_train'], all_labels)
labels_vals = create_labels(crimes['labels_val'], all_labels)
labels_full = create_labels(crimes['labels'], all_labels)
labels_train = np_utils.to_categorical(labels_train)
labels_vals = np_utils.to_categorical(labels_vals)
labels_full = np_utils.to_categorical(labels_full)
model = create_model_and_fit(features_train,labels_train, hidden_units, len(all_labels), params['layers'],
params['input_dropout'], params['hidden_dropout'],
batch_size, crimes['features_val'], labels_vals)
loss_train = log_loss(labels_train, model.predict_proba(crimes['features_train']))
loss_val = log_loss(labels_vals, model.predict_proba(crimes['features_val']))
loss_all = log_loss(labels_full, model.predict_proba(crimes['features']))
print 'loss_all: ', loss_all
print 'loss_train: ', loss_train
print 'loss_val: ', loss_val
sys.stdout.flush()
return loss_val, model, crimes, all_labels
开发者ID:ManasMahanta,项目名称:misc,代码行数:28,代码来源:crimes_job_nn.py
示例4: Test
def Test():
"""Testing ConstrainedMultinomialRegression
Compare the results with scikit-learn LogisticRegression v.15
Returns
-------
Log Loss for Logistic Regression, ConstrainedMultinomialRegression
Accuracy for Logistic Regression, ConstrainedMultinomialRegression
"""
n = 1000; p = 10; k = 3
X = np.random.randn(n, p)
beta = np.random.binomial(1, .5, (p, k))
log_odd = X.dot(beta)
prob = np.exp(log_odd)/(1 + np.exp(log_odd))
y = np.array([np.argmax(i) for i in prob])
lb = LabelBinarizer()
Y = lb.fit_transform(y)
w = randn(k,p)
cut = n/2
train = np.arange(cut); valid = np.arange(cut,n) # Split Train and Test
b = [(0,None)]*(p+1)*k # Constraint on Beta
cl1 = LogisticRegression()
cl2 = ConstrainedMultinomialClassifier(bounds = b)
cl1.fit(X[train], y[train])
cl2.fit(X[train], y[train])
prob1 = cl1.predict_proba(X[valid])
prob2 = cl2.predict_proba(X[valid])
print log_loss(y[valid], prob1)
print log_loss(y[valid], prob2)
yhat1 = cl1.predict(X[valid])
yhat2 = cl2.predict(X[valid])
print accuracy_score(y[valid], yhat1)
print accuracy_score(y[valid], yhat2)
开发者ID:cwjacklin,项目名称:Otto,代码行数:34,代码来源:CMC.py
示例5: modelfit
def modelfit(alg, dtrain, predictors, dtest=None, dscore=None, useTrainCV=True, cv_folds=5, early_stopping_rounds=50):
if useTrainCV:
xgb_param = alg.get_xgb_params()
xgtrain = xgb.DMatrix(dtrain[predictors].values, label=dtrain[target].values)
cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], nfold=cv_folds,
metrics=['logloss'], early_stopping_rounds=early_stopping_rounds, show_progress=False)
alg.set_params(n_estimators=cvresult.shape[0])
#Fit the algorithm on the data
alg.fit(dtrain[predictors], dtrain['target'], eval_metric='logloss')
#Predict training set:
dtrain_predictions = alg.predict(dtrain[predictors])
dtrain_predprob = alg.predict_proba(dtrain[predictors])[:,1]
if isinstance(dtest, pd.DataFrame):
dtest_predprob = alg.predict_proba(dtest[predictors])[:,1]
if isinstance(dscore, pd.DataFrame):
dscore_predprob = alg.predict_proba(dscore[predictors])[:,1]
np.savetxt('XGBoost_pred_raw.csv', dscore_predprob, delimiter=",")
#Print model report:
print "\nModel Report"
print "Accuracy : %.4g" % metrics.accuracy_score(dtrain['target'].values, dtrain_predictions)
print "Metric Score (Train): %f" % metrics.log_loss(dtrain['target'], dtrain_predprob)
if isinstance(dtest, pd.DataFrame):
print "Metric Score (Test): %f" % metrics.log_loss(dtest['target'], dtest_predprob)
feat_imp = pd.Series(alg.booster().get_fscore()).sort_values(ascending=False)
feat_imp.plot(kind='bar', title='Feature Importances')
plt.ylabel('Feature Importance Score')
plt.show()
开发者ID:rschork,项目名称:schork_utilities,代码行数:32,代码来源:xgboost_crossval.py
示例6: generic_cv_reg
def generic_cv_reg(X,y,model,n_folds,random_state) :
kf = cross_validation.KFold(y.shape[0],n_folds=n_folds, shuffle=True, random_state=random_state)
trscores, cvscores, times = [], [], []
i = 0
stack_train = np.zeros((len(y))) # stacked predictions
threshold = 0.000001
for i, (train_fold, validate) in enumerate(kf) :
i = i + 1
t = time()
trscore = log_loss(y.iloc[train_fold], model.fit(X.iloc[train_fold], y.iloc[train_fold]).predict(X.iloc[train_fold]))
validation_prediction = model.predict(X.iloc[validate])
validation_prediction[validation_prediction>1-threshold] = 1-threshold
validation_prediction[validation_prediction<threshold] = threshold
cvscore = log_loss(y.iloc[validate], validation_prediction)
trscores.append(trscore); cvscores.append(cvscore); times.append(time()-t)
stack_train[validate] = validation_prediction
print("TRAIN %.5f | TEST %.5f | TIME %.2fm (1-fold)" % (np.mean(trscores), np.mean(cvscores), np.mean(times)/60))
print(model.get_params(deep = True))
print("\n")
return np.mean(cvscores), stack_train
开发者ID:turboNinja2,项目名称:BNP-Kaggle,代码行数:28,代码来源:_cv_tools.py
示例7: cross_valid
def cross_valid(X, y, params, iterations, n_folds=6, silent=True):
print 'Running cross validation'
pprint.pprint(params)
print 'Iterations:', iterations
print 'X shape', X.shape
y_size = len(y)
if hasattr(X, 'values'):
X = X.values
y = np.array(y)
kf = cross_validation.KFold(y_size, n_folds=n_folds, shuffle=True,
random_state=params['seed'])
y_pred = np.zeros((y_size, 9))
logs = []
for train, test in kf:
X_train, X_test = X[train, :], X[test, :]
y_train, y_test = y[train], y[test]
predictions = predict(X_train, y_train, X_test, params, iterations,
None if silent else y_test)
y_pred[test] = predictions
logs.append(metrics.log_loss(y_test, predictions))
print 'Current log_loss:', logs[-1]
print 'Final log_loss: %s (avg: %s, stddev: %s)' % (
metrics.log_loss(y, y_pred),
np.mean(logs),
np.std(logs))
开发者ID:Anhmike,项目名称:kaggle-malware-classification,代码行数:31,代码来源:ml_tools.py
示例8: ctr_gbdt
def ctr_gbdt(model='sklearn-clicklog', from_cache=False, train_dataset_length=100000, test_dataset_length=100000):
TRAIN_FILE, TEST_FILE = create_dataset(model, from_cache, train_dataset_length, test_dataset_length)
prediction_model = GradientBoostingClassifier(
loss='deviance',
learning_rate=0.1,
n_estimators=30,
subsample=1.0,
min_samples_split=2,
min_samples_leaf=1,
min_weight_fraction_leaf=0.0,
max_depth=5,
)
x_train, y_train = clean_data(TRAIN_FILE)
x_test, y_test = clean_data(TEST_FILE)
with Timer('fit model'):
prediction_model.fit(x_train, y_train)
with Timer('evaluate model'):
y_prediction_train = prediction_model.predict_proba(x_train)
y_prediction_test = prediction_model.predict_proba(x_test)
loss_train = log_loss(y_train, y_prediction_train)
loss_test = log_loss(y_test, y_prediction_test)
print 'loss_train: %s' % loss_train
print 'loss_test: %s' % loss_test
开发者ID:kazarinov,项目名称:hccf,代码行数:29,代码来源:sklearn_experiments.py
示例9: check_lambda
def check_lambda(dirnm, datanm_train, datanm_valid, datanm_orig_train, datanm_orig_valid, samples_per_class, Cs, num_classes):
spct = 10*70
tdata, tlabels = load_full(dirnm+datanm_train, spct)
print tdata.shape, tlabels.shape
spct = 10
otdata, otlabels = load_full(dirnm+datanm_orig_train, spct)
spct = 10*30
vdata, vlabels = load_full(dirnm+datanm_valid, spct)
spct = 10
ovdata, ovlabels = load_full(dirnm+datanm_orig_valid, spct)
# artif
ans = np.zeros((len(Cs), 4))
for i, C in enumerate(Cs):
clf = LogisticRegression(C =C, penalty='l2', multi_class = 'ovr',
tol=0.001, n_jobs = -1, verbose = 0, solver = 'newton-cg')
clf.fit(tdata, tlabels)
out_train = clf.predict_proba(tdata)
out_valid = clf.predict_proba(vdata)
out_train_real = clf.predict_proba(otdata)
out_valid_real = clf.predict_proba(ovdata)
ans[i, 0] += log_loss(tlabels, out_train)
ans[i, 1] += log_loss(vlabels, out_valid)
ans[i, 2] += log_loss(otlabels, out_train_real)
ans[i, 3] += log_loss(ovlabels, out_valid_real)
np.savez("logreg_lambda", ans= ans, Cs = Cs, num_classes = num_classes, samples_per_class = samples_per_class)
return ans
开发者ID:dmitro-nazarenko,项目名称:chemfin-open,代码行数:34,代码来源:logreg.py
示例10: check_lambda
def check_lambda(datanm, samples_per_class,depv, num_classes, criterion, num_iter = 100):
data, labels = load_full(datanm, samples_per_class)
slo = StratifiedShuffleSplit(labels, n_iter=num_iter, test_size=0.3, train_size=0.7, random_state=None)
ans = np.zeros((len(depv), 4))
for train_index, test_index in slo:
train_data = [data[train_index, :], labels[train_index]]
valid_data = [data[test_index , :], labels[test_index ]]
for i, d in enumerate(depv):
clf = DecisionTreeClassifier(criterion=criterion, splitter='best',
max_depth=d, min_samples_split=2,
min_samples_leaf=1, min_weight_fraction_leaf=0.0,
max_features=None, random_state=None,
max_leaf_nodes=None, class_weight=None, presort=False)
clf.fit(train_data[0], train_data[1])
out_train = clf.predict_proba(train_data[0])
out_valid = clf.predict_proba(valid_data[0])
ans[i, 0] += log_loss(train_data[1], out_train)
ans[i, 1] += log_loss(valid_data[1], out_valid)
ans[i, 2] += brier(train_data[1], out_train, num_classes)
ans[i, 3] += brier(valid_data[1], out_valid, num_classes)
ans[:, :] /= num_iter
np.savez("rand_forest_lambda_" + criterion, ans= ans, mdep = mdep, num_iter = num_iter, num_classes = num_classes, samples_per_class = samples_per_class)
return ans
开发者ID:dmitro-nazarenko,项目名称:chemfin-open,代码行数:28,代码来源:rand_forest.py
示例11: svm_model
def svm_model(train_data_features, train_data_split_crossfold_features, test_data_features, labels, labels_cross_validation_classwise, using_cross_validation2, kf, settings):
if using_cross_validation2:
C_base = 4.5
C_step = 0.5#0.005
C = C_base
_results = []
if(len(train_data_cross_validation_classwise_features) > 0):
"""train_all = np.append(train_data_features, train_data_cross_validation_classwise_features, axis=0)
labels_all = np.append(labels, labels_cross_validation_classwise)
kf_all = KFold(len(train_all)-1, n_folds=int(settings['Data']['CrossValidation2']), shuffle=True)
for train, test in kf_all:
svc = SVC(kernel="linear", C=C, probability=True)
model = svc.fit(train_all[train], labels_all[train])
predicted_classes = model.predict(train_all[test])
predicted_classes_train = model.predict(train_all[train])
class_probabilities = model.predict_proba(train_all[test])
print("C: ",C," n points:", len(predicted_classes), " percentage: ",(labels_all[test] != predicted_classes).sum()*100/len(predicted_classes),"% percentage_train: ", (labels_all[train] != predicted_classes_train).sum()*100/len(predicted_classes_train),"%")
_results.append((labels_all[test] != predicted_classes).sum())
C += C_step"""
for c in pl.frange(C_base,9, C_step):
svc = SVC(kernel="linear", C=c, probability=True)
model = svc.fit(train_data_features, labels)
predicted_classes = model.predict(train_data_cross_validation_classwise_features)
class_probabilities = model.predict_proba(train_data_cross_validation_classwise_features)
print("C: ",c," N points:", len(predicted_classes), " percentage: ",(labels_cross_validation_classwise != predicted_classes).sum()*100/len(predicted_classes),"%")
print("Log_loss: ", log_loss(labels_cross_validation_classwise, class_probabilities))
for c in pl.frange(1,3, 1):
svc = SVC(kernel="linear", C=c, probability=True)
model = svc.fit(train_data_features, labels)
predicted_classes = model.predict(train_data_cross_validation_classwise_features)
class_probabilities = model.predict_proba(train_data_cross_validation_classwise_features)
print("C: ",c," N points:", len(predicted_classes), " percentage: ",(labels_cross_validation_classwise != predicted_classes).sum()*100/len(predicted_classes),"%")
print("Log_loss: ", log_loss(labels_cross_validation_classwise, class_probabilities))
else:
for train, test in kf:
svc = SVC(kernel="linear", C=C, probability=True)
model = svc.fit(train_data_features[train], labels[train])
predicted_classes = model.predict(train_data_features[test])
predicted_classes_train = model.predict(train_data_features[train])
class_probabilities = model.predict_proba(train_data_features[test])
print("C: ",C," n points:", len(predicted_classes), " percentage: ",(labels[test] != predicted_classes).sum()*100/len(predicted_classes),"% percentage_train: ", (labels[train] != predicted_classes_train).sum()*100/len(predicted_classes_train),"%")
_results.append((labels[test] != predicted_classes).sum())
C += C_step
C = C_base + C_step * _results.index(min(_results))
print("C: ", C)
if(len(train_data_cross_validation_classwise_features) > 0):
svc = SVC(kernel="linear", C=C, probability=True)
model = svc.fit(train_data_features, labels)
predicted_classes = model.predict(train_data_cross_validation_classwise_features)
class_probabilities = model.predict_proba(train_data_cross_validation_classwise_features)
print("C: ",C," N points:", len(predicted_classes), " percentage: ",(labels_cross_validation_classwise != predicted_classes).sum()*100/len(predicted_classes),"%")
print("Log_loss: ", log_loss(labels_cross_validation_classwise, class_probabilities))
svc = SVC(kernel="linear", C=C, probability=True)
model = svc.fit(train_data_features, labels)
return model.predict_proba(test_data_features), model.predict(test_data_features), model
else:
svc = SVC(kernel="linear", C=8, probability=True)
model = svc.fit(train_data_features, labels)
return model.predict_proba(test_data_features), model.predict(test_data_features), model
开发者ID:dvn123,项目名称:MachineLearning,代码行数:60,代码来源:machine_learning_models.py
示例12: main
def main():
X, Y, encoder, scale = load_train_data('train.csv')
estimators = 500
X_train, X_valid, Y_train, Y_valid = train_test_split(X, Y, test_size=0.2, random_state=0)
X_train_real, X_test_real, Y_train_real, Y_test_real = train_test_split(X_train, Y_train, test_size=0.2, random_state=42)
log.info('Loaded training file')
X_test, _ = load_csv_file('test.csv', cut_end=False)
log.info('Loaded test file')
#Classifier Setup
tree_clf = ExtraTreesClassifier(n_estimators=estimators, n_jobs=-1,
random_state=42, max_depth=55, min_samples_split=1)
clf = make_pipeline(TfidfTransformer(), DenseTransformer(), tree_clf)
log.info('Fitting GradientBoost')
clf.fit(X_train_real, Y_train_real)
clf_probs = clf.predict_proba(X_test_real)
score = log_loss(Y_test_real, clf_probs)
log.info('Log Loss score un-trained = %f' % score)
# Calibrate Classifier using ground truth in X,Y_valid
sig_clf = CalibratedClassifierCV(clf, method="isotonic", cv="prefit")
log.info('Fitting CalibratedClassifierCV')
sig_clf.fit(X_valid, Y_valid)
sig_clf_probs = sig_clf.predict_proba(X_test_real)
sig_score = log_loss(Y_test_real, sig_clf_probs)
log.info('Log loss score trained = %f' % sig_score)
# Ok lets predict the test data with our funky new classifier
sig_submission_probs = sig_clf.predict_proba(X_test)
write_out_submission(sig_submission_probs, 'submission.csv')
开发者ID:Almclean,项目名称:otto-group,代码行数:31,代码来源:main.py
示例13: xgb_base
def xgb_base(train2, y, test2, v, z, xgb_params, N_splits, N_seeds, cname, base_seed=42):
v[cname], z[cname] = 0, 0
scores = []
dtest = xgb.DMatrix(test2)
for s in range(N_seeds):
xgb_params['seed'] = s + base_seed
skf = model_selection.StratifiedKFold(n_splits=N_splits, shuffle=True, random_state=s + base_seed)
for n, (itrain, ival) in enumerate(skf.split(train2, y)):
dtrain = xgb.DMatrix(train2.ix[itrain], y[itrain])
dvalid = xgb.DMatrix(train2.ix[ival], y[ival])
watch = [(dtrain, 'train'), (dvalid, 'valid')]
clf = xgb.train(xgb_params, dtrain, 10000, watch, early_stopping_rounds=100, verbose_eval=False)
p = clf.predict(dvalid)
v.loc[ival, cname] += pconvert(p)
score = metrics.log_loss(y[ival], p)
z[cname] += pconvert(clf.predict(dtest))
print(cname, 'seed %d step %d of %d: '%(xgb_params['seed'], n+1, skf.n_splits), score, now())
scores.append(score)
z[cname] /= N_splits * N_seeds
v[cname] /= N_seeds
print('validation loss: ', metrics.log_loss(y, prestore(v[cname])))
cv=np.array(scores)
print(cv, cv.mean(), cv.std())
开发者ID:ivan-filonov,项目名称:mlbootcamp_5,代码行数:26,代码来源:predict_2017_07_01_5.py
示例14: check_vb
def check_vb(datanm, samples_per_class, Cs, num_classes, num_iter = 100):
data, labels = load_full(datanm, samples_per_class)
slo = StratifiedShuffleSplit(labels, n_iter=num_iter, test_size=0.5, train_size=0.5, random_state=None)
ans = np.zeros((len(Cs), samples_per_class/2, 2))
for train_index, test_index in slo:
train_data = [data[train_index, :], labels[train_index]]
valid_data = [data[test_index , :], labels[test_index ]]
for l in xrange(samples_per_class/2):
ind_train = []
ind_valid = []
for k in xrange(num_classes):
ind_train = ind_train + np.where(train_data[1] == k)[0].tolist()[:l+1]
ind_valid = ind_valid + np.where(valid_data[1] == k)[0].tolist()[:l+1]
ctrain_data = [ train_data[0][ind_train], train_data[1][ind_train] ]
cvalid_data = [ valid_data[0][ind_valid], valid_data[1][ind_valid] ]
for i, C in enumerate(Cs):
clf = LogisticRegression(C =C , penalty='l2', multi_class = 'ovr',
tol=0.001, n_jobs = -1 , verbose = 0)#, solver = 'newton-cg')
clf.fit(ctrain_data[0], ctrain_data[1])
out_train = clf.predict_proba(ctrain_data[0])
out_valid = clf.predict_proba(cvalid_data[0])
ans[i, l, 0] += log_loss(ctrain_data[1], out_train)
ans[i, l, 1] += log_loss(cvalid_data[1], out_valid)
ans /= num_iter
np.savez("logreg_bv", ans= ans, Cs = Cs, num_iter = num_iter, num_classes = num_classes, samples_per_class = samples_per_class)
return ans
开发者ID:dmitro-nazarenko,项目名称:chemfin-open,代码行数:33,代码来源:logreg.py
示例15: generic_cv_np
def generic_cv_np(X,y,model,n_folds,random_state) :
kf = cross_validation.KFold(y.shape[0],n_folds=n_folds, shuffle=True, random_state=random_state)
trscores, cvscores, times = [], [], []
i = 0
stack_train = np.zeros((len(y))) # stacked predictions
for i, (train_fold, validate) in enumerate(kf) :
i = i + 1
t = time()
model.fit(X[train_fold,], y[train_fold])
trscore = log_loss(y[train_fold], model.predict_proba(X[train_fold,]))
validation_prediction = model.predict_proba(X[validate,])
cvscore = log_loss(y[validate], validation_prediction)
trscores.append(trscore); cvscores.append(cvscore); times.append(time()-t)
stack_train[validate] = validation_prediction
print("TRAIN %.5f | TEST %.5f | TIME %.2fm (1-fold)" % (np.mean(trscores), np.mean(cvscores), np.mean(times)/60))
print(model.get_params())
print("\n")
return np.mean(cvscores), stack_train
开发者ID:turboNinja2,项目名称:BNP-Kaggle,代码行数:25,代码来源:_cv_tools.py
示例16: go_by_category_2
def go_by_category_2(category):
input, targets, scaler = TrainingFactory.get_training_data_by_category(category,10000)
input_train, input_test, target_train, target_test = train_test_split(input, targets, test_size=0.1)
test_data_sparse = TestingFactory.get_test_data(limit=1000)
test_data_scaled = scaler.transform(test_data_sparse)
test_data = csr_matrix(test_data_scaled)
classif = SVC(kernel='rbf',C=0.1, tol=0.001, probability=True)
classif.fit(input_train, target_train)
output_targets_proba = classif.predict_proba(input_test)
outputs_predicted_proba = [item[1] for item in output_targets_proba]
output_targets = classif.predict(input_test)
# print output_targets.tolist()
# print outputs_predicted_proba
# print target_test
print log_loss(target_test, output_targets)
accuracy = accuracy_score(target_test, output_targets)
print accuracy
print confusion_matrix(target_test, output_targets)
testing_output = classif.predict_proba(test_data)
testing_output_proba = [item[1] for item in testing_output]
print testing_output_proba
return accuracy, output_targets, testing_output_proba
开发者ID:cginestra,项目名称:san_francisco_crime,代码行数:31,代码来源:classifier.py
示例17: train_model_with_feature
def train_model_with_feature(config_name, clf_name, fill_na_opt, PCA_n_comp, clf, X, X_test, y):
if PCA_n_comp!=-1:
pca = PCA(PCA_n_comp) #PCA dimension reduction
logger.info('PCA fit on count matrix')
# rescale num to (0,1)
X_all = pca.fit_transform( minmax_scale(np.vstack([X, X_test])) )
X, X_test = X_all[:X.shape[0], :], X_all[X.shape[0]:, :]
logger.info('PCA fit done')
logger.info('start training')
print 'training size', X.shape, 'test size', X_test.shape
X_train, X_val, y_train, y_val = train_test_split(X, y, train_size=0.9)
if clf_name=='xgb':
clf.fit(X_train,y_train,eval_metric='mlogloss')
else:
clf.fit(X_train,y_train)
logger.info(clf_name+'-'+fill_na_opt+'-pca('+str(PCA_n_comp)+') train log-loss='\
+str(log_loss(y_train, clf.predict_proba(X_train))))
logger.info(clf_name+'-'+fill_na_opt+'-pca('+str(PCA_n_comp)+') validate log-loss='\
+str(log_loss(y_val, clf.predict_proba(X_val))))
clf.fit(X, y)
y_pred = clf.predict_proba(X_test)
df_test[group_list] = y_pred
logger.info('finish training')
# , 'phone_brand_en', 'device_model_en'
df_test.to_csv('output/'+config_name+'-'+clf_name+'-'+fill_na_opt+'-pca'+\
str(PCA_n_comp)+'-'+str(datetime.datetime.now().strftime('%Y-%m-%d-%H-%M'))\
+'.csv', columns=['device_id']+group_list, index=False)
logger.info('finish outputing result')
开发者ID:chu-NMSU,项目名称:Talking-Data,代码行数:30,代码来源:model_clf.py
示例18: plot_score
def plot_score(test_predictions, y_test, train_predictions, y_train, color, learning_rate):
test_loss = [log_loss(y_test, pred) for pred in test_predictions]
train_loss = [log_loss(y_train, pred) for pred in train_predictions]
plt.plot(test_loss, color, linewidth=2)
plt.plot(train_loss, color+'--', linewidth=2)
looses[learning_rate] = test_loss
开发者ID:abonec,项目名称:python_machine_learning,代码行数:7,代码来源:gradient_boost.py
示例19: gb_get_min_loss
def gb_get_min_loss(clf, verbose=False):
j = 0
min_loss_test = 1
print()
for i, quality_train, quality_test in zip(
range(1, 250 + 1),
clf.staged_predict_proba(X_train),
clf.staged_predict_proba(X_test)
):
loss_train = log_loss(y_train, quality_train)
loss_test = log_loss(y_test, quality_test)
if min_loss_test > loss_test:
min_loss_test = loss_test
j = i
if (verbose):
print(
'Iteration:', i, ' ',
'Train:', '{0:.3f}'.format(loss_train), ' ',
'Test:', '{0:.3f}'.format(loss_test), ' ',
'-' if min_loss_test == loss_test else '+'
)
return min_loss_test, j
开发者ID:denkorzh,项目名称:coursera-hse-machine-learning,代码行数:26,代码来源:02-gradient-boosting.py
示例20: train_model
def train_model(estimator, xtr, xcv, ytr, ycv):
model_list = get_model_name_list()
#for rfc, rfr, etc, etr
if type(estimator) in model_list[:4]:
estimator.fit(xtr, ytr)
#for rfc, rtc
if hasattr(estimator, 'predict_proba'):
train_predict = estimator.predict_proba(xtr)
cv_predict = estimator.predict_proba(xcv)
#for rfr, etr
else:
train_predict = estimator.predict(xtr)
cv_predict = estimator.predict(xcv)
best_iter = 0
#for xgbc, xgbr
elif type(estimator) in model_list[4:]:
estimator.fit(xtr, ytr, early_stopping_rounds=35, eval_metric='logloss',
eval_set=[(xcv, ycv)], verbose=True)
best_iter = estimator.best_iteration
#for xgbc
if hasattr(estimator, 'predict_proba'):
train_predict = estimator.predict_proba(xtr, ntree_limit=best_iter)
cv_predict = estimator.predict_proba(xcv, ntree_limit=best_iter)
#for xgbr
else:
train_predict = estimator.predict(xtr, ntree_limit=best_iter)
cv_predict = estimator.predict(xcv, ntree_limit=best_iter)
train_loss = log_loss(ytr, train_predict)
cv_loss = log_loss(ycv, cv_predict)
return train_loss, cv_loss, best_iter
开发者ID:MitinRoman,项目名称:bnp_paribas-1,代码行数:30,代码来源:cross_validation.py
注:本文中的sklearn.metrics.log_loss函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论