本文整理汇总了Python中sklearn.metrics.zero_one_loss函数的典型用法代码示例。如果您正苦于以下问题:Python zero_one_loss函数的具体用法?Python zero_one_loss怎么用?Python zero_one_loss使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了zero_one_loss函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_grid
def test_grid(features, target):
'''
Given a list of models for each genre, run the features through the models to
predict target labels, and compare the predictions to the true target labels.
'''
genre_list = ['animated', 'action', 'comedy', 'drama', 'family', 'fantasy', \
'horror', 'musical', 'mystery', 'romance', 'sci-fi', 'thriller', 'war', 'western']
ypred_mat = np.empty([target.shape[0], target.shape[1]])
for i in xrange(target.shape[1]):
filename = '../data/is_' + genre_list[i] + '.pkl'
ypred = test_prediction(filename, features, target[:,i])
for j, prob in enumerate(ypred):
ypred_mat[j,i] = prob
with open('../data/grid_pkl_500.txt','w') as f:
f.write("Model rounded by .25\n")
yrd = round_by(ypred_mat, .25)
f.write( metrics.classification_report(target, yrd) )
f.write( "Percent of misclassification: {}\n".format(metrics.zero_one_loss(target, yrd)) )
f.write("\nModel rounded by .3\n")
yrd = round_by(ypred_mat, .3)
f.write( metrics.classification_report(target, yrd) )
f.write( "Percent of misclassification: {}\n".format(metrics.zero_one_loss(target, yrd)) )
f.write("\nModel rounded by .2\n")
yrd = round_by(ypred_mat, .2)
f.write( metrics.classification_report(target, yrd) )
f.write( "Percent of misclassification: {}\n".format(metrics.zero_one_loss(target, yrd)) )
f.write("\nModel rounded by .1\n")
yrd = round_by(ypred_mat, .1)
f.write( metrics.classification_report(target, yrd) )
f.write( "Percent of misclassification: {}\n".format(metrics.zero_one_loss(target, yrd)) )
开发者ID:JenniferDunne,项目名称:ml-from-movie-reviews,代码行数:30,代码来源:classify_genres.py
示例2: cross_validation_example
def cross_validation_example():
""" Slightly more complex example : Perform grid search cross-validation to find optimal parameters for MinCq using
rbf kernels as voters.
"""
# We load iris dataset, We convert the labels to be -1 or 1, and we split it in two parts: train and test.
dataset = load_iris()
dataset.target[dataset.target == 0] = -1
dataset.target[dataset.target == 2] = -1
X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, random_state=42)
# The learning algorithm and its parameters.
learner = MinCqLearner(mu=0.0001, voters_type='kernel', kernel='rbf', gamma=0.0)
learner_params = {'mu': [0.0001, 0.001, 0.01],
'gamma': [0.0, 0.1, 1.0, 10]}
cv_classifier = GridSearchCV(learner, learner_params, scoring=accuracy_scorer)
cv_classifier = cv_classifier.fit(X_train, y_train)
predictions_train = cv_classifier.predict(X_train)
predictions_test = cv_classifier.predict(X_test)
print_sklearn_grid_scores("Iris", "RbfMinCq", learner_params, cv_classifier.grid_scores_)
print("Best parameters: {}".format(str(cv_classifier.best_params_)))
print("Training set risk: {:.4f}".format(zero_one_loss(y_train, predictions_train)))
print("Testing set risk: {:.4f}".format(zero_one_loss(y_test, predictions_test)))
开发者ID:GRAAL-Research,项目名称:MinCq,代码行数:26,代码来源:example.py
示例3: drawLearningCurve
def drawLearningCurve(model, x_train, y_train, x_test, y_test, num_points = 50):
# adapted from http://sachithdhanushka.blogspot.de/2013/09/learning-curve-generator-for-learning.html
train_error = np.zeros(num_points)
crossval_error = np.zeros(num_points)
sizes = np.linspace(2, len(x_train), num=num_points).astype(int)
for i,size in enumerate(sizes):
#getting the predicted results of the model
model.fit(x_train[:size], y_train[:size])
#compute the validation error
y_pred = model.predict(x_test[:size])
crossval_error[i] = zero_one_loss(y_test[:size], y_pred, normalize=True)
#compute the training error
y_pred = model.predict(x_train[:size])
train_error[i] = zero_one_loss(y_train[:size], y_pred, normalize=True)
#draw the plot
print crossval_error
print train_error
fig,ax = plt.subplots()
ax.plot(sizes,crossval_error,lw = 2, label='cross validation error')
ax.plot(sizes,train_error, lw = 4, label='training error')
ax.set_xlabel('cross val error')
ax.set_ylabel('rms error')
ax.legend(loc = 0)
ax.set_title('Learning Curve' )
return fig
开发者ID:saihttam,项目名称:kaggle-scikit,代码行数:31,代码来源:random_forest_n_pca.py
示例4: plot_adaclassifier
def plot_adaclassifier(classifier, n_estimators, X_train, X_test, y_train, y_test):
fig = plt.figure()
ax = fig.add_subplot(111)
#ax.plot([1, n_estimators], [dt_stump_err] * 2, 'k-',
# label='Decision Stump Error')
#ax.plot([1, n_estimators], [dt_err] * 2, 'k--',
# label='Decision Tree Error')
ada_err_test = np.zeros((n_estimators,))
for i, y_pred in enumerate(classifier.staged_predict(X_test)):
ada_err_test[i] = zero_one_loss(y_pred, y_test)
ada_err_train = np.zeros((n_estimators,))
for i, y_pred in enumerate(classifier.staged_predict(X_train)):
ada_err_train[i] = zero_one_loss(y_pred, y_train)
ax.plot(np.arange(n_estimators) + 1, ada_err_test,
label='AdaBoost Test Error',
color='red')
ax.plot(np.arange(n_estimators) + 1, ada_err_train,
label='AdaBoost Train Error',
color='blue')
ax.set_ylim((0.0, 1.0))
ax.set_xlabel('n_estimators')
ax.set_ylabel('error rate')
leg = ax.legend(loc='upper right', fancybox=True)
leg.get_frame().set_alpha(0.7)
return fig
开发者ID:njpayne,项目名称:euclid,代码行数:34,代码来源:plot_learning_curve.py
示例5: run_gamma
def run_gamma(x, y):
perc = 0.6
n = x.shape[0]
gamma_list = (np.power(2.0, range(-4, 12))/(n*perc)).tolist()
n_iter = 2
train_err_libsvm = np.zeros((len(gamma_list), n_iter))
test_err_libsvm = np.zeros((len(gamma_list), n_iter))
train_err_dsvm = np.zeros((len(gamma_list), n_iter))
test_err_dsvm = np.zeros((len(gamma_list), n_iter))
train_err_pegasos = np.zeros((len(gamma_list), n_iter))
test_err_pegasos = np.zeros((len(gamma_list), n_iter))
ss = cv.StratifiedShuffleSplit(y, n_iter=n_iter, test_size=1-perc, train_size=None, random_state=0)
for k, (train, test) in enumerate(ss):
ntr = len(train)
lmda = 1.0 / ntr
print "#iter: %d" % k
x_train, x_test, y_train, y_test = x[train], x[test], y[train], y[test]
mM_scale = preprocessing.MinMaxScaler(feature_range=(-1, 1))
x_train = mM_scale.fit_transform(x_train)
x_test = mM_scale.transform(x_test)
for j, gm in enumerate(gamma_list):
print "check lamda %f, gamma %f" % (lmda, gm)
clf = svm.SVC(C=lmda * ntr, kernel='rbf', gamma=gm, cache_size=600)
clf.fit(x_train, y_train)
pred = clf.predict(x_train)
train_err_libsvm[j, k] = zero_one_loss(y_train, pred)
pred = clf.predict(x_test)
test_err_libsvm[j, k] = zero_one_loss(y_test, pred)
dsvm = DualKSVM(lmda=lmda, gm=gm, kernelstr='rbf', nsweep=ntr/2, b=5, c=1)
dsvm.fit(x_train, y_train, x_test, y_test, )
train_err_dsvm[j, k] = dsvm.err_tr[-1]
test_err_dsvm[j, k] = dsvm.err_te[-1]
kpega = Pegasos(ntr, lmda, gm, nsweep=2, batchsize=2)
kpega.train_test(x_train, y_train, x_test, y_test)
train_err_pegasos[j, k] = kpega.err_tr[-1]
test_err_pegasos[j, k] = kpega.err_te[-1]
avg_train_err_libsvm = np.mean(train_err_libsvm, axis=1)
avg_test_err_libsvm = np.mean(test_err_libsvm, axis=1)
avg_train_err_dsvm = np.mean(train_err_dsvm, axis=1)
avg_test_err_dsvm = np.mean(test_err_dsvm, axis=1)
avg_train_err_pegasos = np.mean(train_err_pegasos, axis=1)
avg_test_err_pegasos = np.mean(test_err_pegasos, axis=1)
plt.figure()
# color_list = ['b', 'r', 'g', 'c', ]
# marker_list = ['o', 'x', '>', 's']
plt.loglog(gamma_list, avg_train_err_libsvm, 'bo-', label='libsvm train')
plt.loglog(gamma_list, avg_test_err_libsvm, 'ro-', label='libsvm test')
plt.loglog(gamma_list, avg_train_err_dsvm, 'gx-', label='dsvm train')
plt.loglog(gamma_list, avg_test_err_dsvm, 'cx-', label='dsvm test')
plt.loglog(gamma_list, avg_train_err_pegasos, 'mD-', label='pegasos train')
plt.loglog(gamma_list, avg_test_err_pegasos, 'kD-', label='pegasos test')
plt.legend(bbox_to_anchor=(0, 1.17, 1, .1), loc=2, ncol=2, mode="expand", borderaxespad=0)
plt.savefig('../output/usps_diff_gamma.pdf')
开发者ID:percyqdeng,项目名称:dualsvm,代码行数:55,代码来源:test_usps.py
示例6: build_tree
def build_tree(clf,type,i,X_train, X_test, y_train, y_test,attribute_names,class_names):
print("------------Run "+type+ "_"+str(i)+"----------")
clf.fit(X_train, y_train)
print("Training error =", zero_one_loss(y_train, clf.predict(X_train)))
predicted_test = clf.predict(X_test)
print("Test error =",zero_one_loss(y_test, predicted_test ) )
figure_name = type+"_"+str(i)
visualize_tree(clf,attribute_names,class_names,figure_name)
print(classification_report( y_test,predicted_test ))
print(confusion_matrix(y_test,predicted_test))
return zero_one_loss(y_test, predicted_test )
开发者ID:akash13singh,项目名称:dm-assignment2,代码行数:11,代码来源:dtree.py
示例7: simple_classification_example
def simple_classification_example():
""" Simple example : with fixed hyperparameters, run four versions of MinCq on a single dataset.
"""
# MinCq parameters, fixed to a given value as this is a simple example.
mu = 0.001
# We load iris dataset, We convert the labels to be -1 or 1, and we split it in two parts: train and test.
dataset = load_iris()
dataset.target[dataset.target == 0] = -1
dataset.target[dataset.target == 2] = -1
X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, random_state=42)
# We train MinCq using decision stumps as voters, on the training set.
learner = MinCqLearner(mu, voters_type='stumps')
learner.fit(X_train, y_train)
# We predict the train and test labels and print the risk.
predictions_train = learner.predict(X_train)
predictions_test = learner.predict(X_test)
print("\nStumpsMinCq")
print("-----------")
print("Training set risk: {:.4f}".format(zero_one_loss(y_train, predictions_train)))
print("Testing set risk: {:.4f}\n".format(zero_one_loss(y_test, predictions_test)))
# We do the same again, now with a linear kernel.
learner = MinCqLearner(mu, voters_type='kernel', kernel='linear')
learner.fit(X_train, y_train)
predictions_train = learner.predict(X_train)
predictions_test = learner.predict(X_test)
print("\nLinearMinCq")
print("-----------")
print("Training set risk: {:.4f}".format(zero_one_loss(y_train, predictions_train)))
print("Testing set risk: {:.4f}\n".format(zero_one_loss(y_test, predictions_test)))
# We do the same again, now with a polynomial kernel.
learner = MinCqLearner(mu, voters_type='kernel', kernel='poly')
learner.fit(X_train, y_train)
predictions_train = learner.predict(X_train)
predictions_test = learner.predict(X_test)
print("\nPolyMinCq")
print("-----------")
print("Training set risk: {:.4f}".format(zero_one_loss(y_train, predictions_train)))
print("Testing set risk: {:.4f}\n".format(zero_one_loss(y_test, predictions_test)))
# We do the same again, now with an RBF kernel.
learner = MinCqLearner(mu, voters_type='kernel', kernel='rbf', gamma=0.0)
learner.fit(X_train, y_train)
predictions_train = learner.predict(X_train)
predictions_test = learner.predict(X_test)
print("\nRbfMinCq")
print("--------")
print("Training set risk: {:.4f}".format(zero_one_loss(y_train, predictions_train)))
print("Testing set risk: {:.4f}\n".format(zero_one_loss(y_test, predictions_test)))
开发者ID:GRAAL-Research,项目名称:MinCq,代码行数:60,代码来源:example.py
示例8: test_losses
def test_losses():
"""Test loss functions"""
y_true, y_pred, _ = make_prediction(binary=True)
n_samples = y_true.shape[0]
n_classes = np.size(unique_labels(y_true))
# Classification
# --------------
with warnings.catch_warnings(record=True):
# Throw deprecated warning
assert_equal(zero_one(y_true, y_pred), 11)
assert_almost_equal(zero_one_loss(y_true, y_pred),
11 / float(n_samples), 2)
assert_equal(zero_one_loss(y_true, y_pred, normalize=False), 11)
assert_almost_equal(zero_one_loss(y_true, y_true), 0.0, 2)
assert_almost_equal(hamming_loss(y_true, y_pred),
2 * 11. / (n_samples * n_classes), 2)
assert_equal(accuracy_score(y_true, y_pred),
1 - zero_one_loss(y_true, y_pred))
with warnings.catch_warnings(True):
# Throw deprecated warning
assert_equal(zero_one_score(y_true, y_pred),
1 - zero_one_loss(y_true, y_pred))
# Regression
# ----------
assert_almost_equal(mean_squared_error(y_true, y_pred),
10.999 / n_samples, 2)
assert_almost_equal(mean_squared_error(y_true, y_true),
0.00, 2)
# mean_absolute_error and mean_squared_error are equal because
# it is a binary problem.
assert_almost_equal(mean_absolute_error(y_true, y_pred),
10.999 / n_samples, 2)
assert_almost_equal(mean_absolute_error(y_true, y_true), 0.00, 2)
assert_almost_equal(explained_variance_score(y_true, y_pred), 0.16, 2)
assert_almost_equal(explained_variance_score(y_true, y_true), 1.00, 2)
assert_equal(explained_variance_score([0, 0, 0], [0, 1, 1]), 0.0)
assert_almost_equal(r2_score(y_true, y_pred), 0.12, 2)
assert_almost_equal(r2_score(y_true, y_true), 1.00, 2)
assert_equal(r2_score([0, 0, 0], [0, 0, 0]), 1.0)
assert_equal(r2_score([0, 0, 0], [0, 1, 1]), 0.0)
开发者ID:Jim-Holmstroem,项目名称:scikit-learn,代码行数:49,代码来源:test_metrics.py
示例9: experiment_neighbors_k_nearest_neighbors
def experiment_neighbors_k_nearest_neighbors():
avgError = []
x_learners = []
for k_neighbors in range(1, 20, 1):
k = 10
skf = StratifiedKFold(labels,n_folds=k)
averageError = 0.0
for train_index, test_index in skf:
X_train, X_test = mfcc[:,train_index], mfcc[:,test_index]
y_train, y_test = labels[train_index], labels[test_index]
knc = KNeighborsClassifier(n_neighbors=k_neighbors, weights='distance')
knc.fit(X_train.T,y_train)
y_pred = knc.predict(X_test.T)
error = zero_one_loss(y_pred,y_test)
print error
averageError += (1./k) * error
print "Average error: %4.2f%s" % (100 * averageError,'%')
avgError.append(averageError)
x_learners.append(k_neighbors)
plt.plot(x_learners, avgError)
plt.ylabel('Average Error (k=10)')
plt.xlabel('Number of Neighbors')
plt.title('Error as a function of the number of neighbors taken into consideration')
plt.show()
开发者ID:michaely23,项目名称:MusicGenreClassification,代码行数:25,代码来源:script.py
示例10: cross_valid
def cross_valid(h, y, ratio_list):
"""
cross validation to tune the best cap probability for soft-margin boosting
"""
print " find optimal ratio"
n_samples = h.shape[0]
n_folds = 4
ntr = n_samples/n_folds
ratio_list = ratio_list[ratio_list >= 1.0/ntr]
kf = cv.KFold(n=n_samples, n_folds=n_folds)
err_tr = np.zeros((n_folds, len(ratio_list)))
err_te = np.zeros((n_folds, len(ratio_list)))
k = 0
for tr_ind, te_ind in kf:
print "nfold: %d" % (k)
xtr, ytr, xte, yte = h[tr_ind, :], y[tr_ind], h[te_ind, :], y[te_ind]
for i, r in enumerate(ratio_list):
pd = ParaBoost(epsi=0.005, has_dcap=True, ratio=r)
pd.train(xtr, ytr)
pred = pd.test_h(xte)
err_te[k, i] = zero_one_loss(y_true=yte, y_pred=pred)
err_tr[k, i] = pd.err_tr[-1]
k += 1
err_te_avg = np.mean(err_te, axis=0)
err_tr_avg = np.mean(err_tr, axis=0)
arg = np.argmin(err_te_avg)
best_ratio = ratio_list[arg]
err = err_te_avg[arg]
return best_ratio
开发者ID:percyqdeng,项目名称:boost,代码行数:29,代码来源:test_boost.py
示例11: apply_dbn
def apply_dbn(files, main_driver=1):
"""
Applies DBN for identifying trips which are not from the driver of interest
"""
(X_train, Y_train, weight, X, driver_trip_arr) = \
get_train_data(files, main_driver)
a = np.empty(shape=[0, 2])
net = DBN([len(COL), 10, 2],
learn_rates=0.3,
learn_rate_decays=0.9,
epochs=10,
verbose=0)
net.fit(X_train, Y_train)
Y_dbn = net.predict(X_train)
print main_driver, ':', 1 - zero_one_loss(Y_train, Y_dbn)
# print "Classification report:"
# print classification_report(Y_train, preds)
i = 0
Y = net.predict(X)
for y in Y:
driver_trip = driver_trip_arr[i][0]
prob = str(int(Y[i]))
a = np.append(a, np.array([[driver_trip, prob]]), axis=0)
i = i + 1
print main_driver, ': ', sum([1 for p in a if p[1] == '1'])
return a
开发者ID:Mbaroudi,项目名称:junk,代码行数:31,代码来源:run.py
示例12: experiment_learners_random_forest
def experiment_learners_random_forest():
avgError = []
x_learners = []
for maxLearners in range(10, 150, 20):
k = 10
skf = StratifiedKFold(labels,n_folds=k)
averageError = 0.0
for train_index, test_index in skf:
X_train, X_test = mfcc[:,train_index], mfcc[:,test_index]
y_train, y_test = labels[train_index], labels[test_index]
rf = RandomForestClassifier(n_estimators=maxLearners, max_depth = maxDepth, warm_start = False)
rf.fit(X_train.T,y_train)
y_pred = rf.predict(X_test.T)
error = zero_one_loss(y_pred,y_test)
print error
averageError += (1./k) * error
print "Average error: %4.2f%s" % (100 * averageError,'%')
avgError.append(averageError)
x_learners.append(maxLearners)
plt.plot(x_learners, avgError)
plt.ylabel('Average Error (k=10)')
plt.xlabel('Max Learners')
plt.title('Error as a function of the number of learners')
plt.show()
开发者ID:michaely23,项目名称:MusicGenreClassification,代码行数:25,代码来源:script.py
示例13: experiment_estimators_AdaBoostRandomForest
def experiment_estimators_AdaBoostRandomForest():
avgError = []
x_learners = []
rf = RandomForestClassifier(n_estimators=maxLearners, max_depth = maxDepth, warm_start = False)
for lr in frange(0.01, 1., 0.25):
k = 10
skf = StratifiedKFold(labels,n_folds=k)
averageError = 0.0
for train_index, test_index in skf:
X_train, X_test = mfcc[:,train_index], mfcc[:,test_index]
y_train, y_test = labels[train_index], labels[test_index]
adb = AdaBoostClassifier(base_estimator=rf, n_estimators=100, learning_rate=lr)
adb.fit(X_train.T,y_train)
y_pred = adb.predict(X_test.T)
error = zero_one_loss(y_pred,y_test)
print error
averageError += (1./k) * error
print "Average error: %4.2f%s" % (100 * averageError,'%')
avgError.append(averageError)
x_learners.append(lr)
# graph the errors now.
plt.plot(x_learners, avgError)
plt.ylabel('Average Error (k=10)')
plt.xlabel('Learning Rate')
plt.title('Error as a function of the learning rate')
plt.show()
开发者ID:michaely23,项目名称:MusicGenreClassification,代码行数:26,代码来源:script.py
示例14: experiment_pca_n_components_random_forest
def experiment_pca_n_components_random_forest():
pca = decomposition.PCA()
rf = RandomForestClassifier(n_estimators=maxLearners, max_depth = maxDepth, warm_start = False)
pipe = Pipeline(steps=[('pca', pca), ('rf', rf)])
avgError = []
x_learners = []
for k_components in range(10, 100, 10):
k = 10
skf = StratifiedKFold(labels,n_folds=k)
averageError = 0.0
for train_index, test_index in skf:
X_train, X_test = mfcc[:,train_index], mfcc[:,test_index]
y_train, y_test = labels[train_index], labels[test_index]
estimator = GridSearchCV(pipe, dict(pca__n_components=[k_components]))
estimator.fit(X_train.T,y_train)
y_pred = estimator.predict(X_test.T)
error = zero_one_loss(y_pred,y_test)
print error
averageError += (1./k) * error
print "Average error: %4.2f%s" % (100 * averageError,'%')
avgError.append(averageError)
x_learners.append(k_components)
plt.plot(x_learners, avgError)
plt.ylabel('Average Error (k=10)')
plt.xlabel('Number of Components')
plt.title('Error as a function of the number of components')
plt.show()
开发者ID:michaely23,项目名称:MusicGenreClassification,代码行数:28,代码来源:script.py
示例15: classify
def classify(self, model, test_y, test_x):
pred = model.predict(test_x)
if not self.multi:
rec, spec, acc = self.score(pred, test_y)
return rec, spec, acc
else:
return 1 - zero_one_loss(test_y, pred)
开发者ID:ZeerakW,项目名称:statsml,代码行数:7,代码来源:fraud.py
示例16: exercise_2
def exercise_2():
#connect to openml api
apikey = 'ca2397ea8a2cdd9707ef39d76576e786'
connector = APIConnector(apikey=apikey)
dataset = connector.download_dataset(44)
X, y, attribute_names = dataset.get_dataset(target=dataset.default_target_attribute, return_attribute_names=True)
kf = cross_validation.KFold(len(X), n_folds=10, shuffle=False, random_state=0)
error = []
error_mean = []
lst = [int(math.pow(2, i)) for i in range(0, 8)]
clf = RandomForestClassifier(oob_score=True,
max_features="auto",
random_state=0)
for i in lst:
error_mean = []
for train_index, test_index in kf:
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
clf.set_params(n_estimators=i)
clf.fit(X_train, y_train)
error_mean.append( zero_one_loss(y_test, clf.predict(X_test)) )
error.append( np.array(error_mean).mean() )
#plot
plt.style.use('ggplot')
plt.plot(lst, error, '#009999', marker='o')
plt.xticks(lst)
plt.show()
开发者ID:rodmendozam,项目名称:Ass3_foundations,代码行数:28,代码来源:random_forest.py
示例17: clf_bias_var
def clf_bias_var(clf, X, y, n_replicas):
roc_auc_scorer = get_scorer("roc_auc")
# roc_auc_scorer(clf, X_test, y_test)
auc_scores = []
error_scores = []
counts = np.zeros(X.shape[0], dtype = np.float64)
sum_preds = np.zeros(X.shape[0], dtype = np.float64)
for it in xrange(n_replicas):
# generate train sets and test sets
train_indices = np.random.randint(X.shape[0], size = X.shape[0])
# get test sets
in_train = np.unique(train_indices)
mask = np.ones(X.shape[0], dtype = np.bool)
mask[in_train] = False
test_indices = np.arange(X.shape[0])[mask]
clf.fit(X[train_indices], y[train_indices])
auc_scores.append(roc_auc_scorer(clf, X[test_indices], y[test_indices]))
error_scores.append(zero_one_loss(y[test_indices], clf.predict(X[test_indices])))
preds = clf.predict(X)
for index in test_indices:
counts[index] += 1
sum_preds[index] += preds[index]
test_mask = (counts > 0) # indices of samples that have been tested
# print('counts mean: {}'.format(np.mean(counts)))
# print('counts standard derivation: {}'.format(np.std(counts)))
bias, var = bias_var(y[test_mask], sum_preds[test_mask], counts[test_mask], n_replicas)
return auc_scores, error_scores, bias, var
开发者ID:lidalei,项目名称:DataMining,代码行数:35,代码来源:random_forests.py
示例18: exercise_1
def exercise_1():
X, y = make_blobs(n_samples=1000,centers=50, n_features=2, random_state=0)
n_samples = len(X)
kf = cross_validation.KFold(n_samples, n_folds=10, shuffle=False, random_state=None)
# kf = cross_validation.ShuffleSplit(1000,n_iter=25, test_size=0.1, train_size=0.9, random_state=None)
error_total = np.zeros([49, 1], dtype=float)
for k in range(1,50):
error = []
clf = KNeighborsClassifier(n_neighbors=k)
for train_index, test_index in kf:
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
clf.fit(X_train, y_train)
error.append( zero_one_loss(y_test, clf.predict(X_test)) )
# error.append(clf.predict(X_test))
# error.append( 1. - clf.score(X_test, y_test) ) #, accuracy_score(y_test, clf.predict(X_test))
# error.append(mean_squared_error(y_test, clf.predict(X_test)))
# error.append()
# print error
error_total[k-1, 0] = np.array(error).mean()
# print error_total
x = np.arange(1,50, dtype=int)
plt.style.use('ggplot')
plt.plot(x, error_total[:, 0], '#009999', marker='o')
# plt.errorbar(x, accuracy_lst[:, 0], accuracy_lst[:, 1], linestyle='None', marker='^')
plt.xticks(x, x)
plt.margins(0.02)
plt.xlabel('K values')
plt.ylabel('Missclasification Error')
plt.show()
开发者ID:palindrome6,项目名称:Data-Mining---Assignment-2,代码行数:33,代码来源:model_selection.py
示例19: run_ordinal_regression
def run_ordinal_regression(X_train, y_train, X_test, y_test, ordinal_regression_model):
print("Running ordinal regression with multiclass labels...")
ordinal_regression_clf = ordinal_regression_model(alpha=ALPHA, max_iter=MAX_ITER)
ordinal_regression_clf.fit(X_train, y_train)
y_pred = ordinal_regression_clf.predict(X_train)
training_err = metrics.zero_one_loss(y_train, y_pred, normalize=False)
print("%.4f = Training accuracy for ordinal regression with multiclass labels" %
(float(len(y_train) - training_err) / len(y_train)))
y_pred = ordinal_regression_clf.predict(X_test)
test_err = metrics.zero_one_loss(y_test, y_pred, normalize=False)
print("%.4f = Test accuracy for ordinal regression with multiclass labels" %
(float(len(y_test) - test_err) / len(y_test)))
return float(len(y_test) - test_err) / len(y_test)
开发者ID:meganlshao,项目名称:Imgur,代码行数:16,代码来源:imgur.py
示例20: compare_manual_vs_model
def compare_manual_vs_model():
with open(DATA_FOLDER + "labels_int.p", "r") as f:
y_dict = pickle.load(f)
print "Loading test data"
X_test, y_test, filenames_test = dataset.load_test()
y_pred = joblib.load("../models/pred_ml_improved.pkl")
relevant = []
for pred, correct, filename in zip(y_pred, y_test, filenames_test):
if filename in FILES:
relevant.append((pred, correct, filename, CLASSIFICATIONS[filename]))
model_predictions, correct, filename, manual_predictions = zip(*relevant)
manual_predictions = learn.multilabel_binary_y(manual_predictions)
model_predictions = np.array(model_predictions)
correct = learn.multilabel_binary_y(correct)
rules = infer_topology.infer_topology_rules()
improved_manual = infer_topology.apply_topology_rules(rules, manual_predictions)
prediction_names = ["MODEL", "MANUAL", "IMPROVED_MANUAL"]
predictions = [model_predictions, manual_predictions, improved_manual]
for name, pred in zip(prediction_names, predictions):
print "\n{}\n--".format(name)
print "Zero-one classification loss", zero_one_loss(correct, pred)
print "Hamming loss", hamming_loss(correct, pred)
print "Precision:", precision_score(correct, pred, average="weighted", labels=label_list)
print "Recall :", recall_score(correct, pred, average="weighted", labels=label_list)
print "F1 score :", f1_score(correct, pred, average="weighted", labels=label_list)
开发者ID:gzuidhof,项目名称:text-mining,代码行数:33,代码来源:manual_classifications.py
注:本文中的sklearn.metrics.zero_one_loss函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论