本文整理汇总了Python中skll.metrics.kappa函数的典型用法代码示例。如果您正苦于以下问题:Python kappa函数的具体用法?Python kappa怎么用?Python kappa使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了kappa函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: metrics_helper
def metrics_helper(human_scores, system_scores):
"""
This is a helper function that computes some basic
metrics for the system_scores against the human_scores.
"""
# compute the kappas
unweighted_kappa = kappa(human_scores, system_scores)
quadratic_weighted_kappa = kappa(human_scores,
round(system_scores),
weights='quadratic')
# compute the agreement statistics
human_system_agreement = agreement(human_scores, system_scores)
human_system_adjacent_agreement = agreement(human_scores,
system_scores,
tolerance=1)
# compute the pearson correlation after removing
# any cases where either of the scores are NaNs.
df = pd.DataFrame({'human': human_scores,
'system': system_scores}).dropna(how='any')
correlations = pearsonr(df['human'], df['system'])[0]
# compute the min/max/mean/std. dev. for the system and human scores
min_system_score = np.min(system_scores)
min_human_score = np.min(human_scores)
max_system_score = np.max(system_scores)
max_human_score = np.max(human_scores)
mean_system_score = np.mean(system_scores)
mean_human_score = np.mean(human_scores)
system_score_sd = np.std(system_scores, ddof=1)
human_score_sd = np.std(human_scores, ddof=1)
# compute standardized mean difference as recommended
# by Williamson et al (2012)
numerator = mean_system_score - mean_human_score
denominator = np.sqrt((system_score_sd**2 + human_score_sd**2)/2)
SMD = numerator/denominator
# return everything as a series
return pd.Series({'kappa': unweighted_kappa,
'wtkappa': quadratic_weighted_kappa,
'exact_agr': human_system_agreement,
'adj_agr': human_system_adjacent_agreement,
'SMD': SMD,
'corr': correlations,
'sys_min': min_system_score,
'sys_max': max_system_score,
'sys_mean': mean_system_score,
'sys_sd': system_score_sd,
'h_min': min_human_score,
'h_max': max_human_score,
'h_mean': mean_human_score,
'h_sd': human_score_sd,
'N': len(system_scores)})
开发者ID:WeilamChung,项目名称:rsmtool,代码行数:59,代码来源:analysis.py
示例2: agreementtest
def agreementtest(path1,path2):
#1. import the labels
from utils import loadLabels
label_human = loadLabels(path1,0,2)
label_machine = loadLabels(path2,0,2)
#2. transfer them into the list
y = []
y_pred = []
for key in label_human:
y += [label_human[key]]
y_pred += [label_machine[key]]
print len(y),len(y_pred)
#3. get the raw agreement
from pandas import DataFrame
from pandas import crosstab
result = DataFrame({'y_pred' : y_pred,
'y_human' : y})
crosstable = crosstab(result['y_pred'], result['y_human'])
print crosstable
acc = float(crosstable['1']['1']+crosstable['0']['0'])/len(y_pred)
prec = float(crosstable['1']['1'])/(crosstable['1']['1']+crosstable['0']['1'])
recall = float(crosstable['1']['1'])/(crosstable['1']['1']+crosstable['1']['0'])
F1_hand = 2 * prec * recall/( prec + recall)
#4. use the skll to get the kappa
from skll import metrics
kappa = metrics.kappa(y,y_pred)
return crosstable,acc,recall,prec,F1_hand,kappa
开发者ID:Yaru007,项目名称:Twitter-data-mining-framwork,代码行数:32,代码来源:metric.py
示例3: stats
def stats (list1,list2):
print "Predictions:"
print list1
print list(reversed(list2)) #COMPARABLE ORDER
print
list1fl=[class2float(i) for i in list1]
list2fl=[class2float(i) for i in list(reversed(list2))]
print list1fl
print list2fl
print
print kappa(list1fl,list2fl) #http://skll.readthedocs.org/en/latest/_modules/skll/metrics.html
print
print list2
开发者ID:manexagirrezabal,项目名称:char-rnn,代码行数:16,代码来源:callSampleMod-bidirectional.py
示例4: train_model
def train_model(train, folds):
y = train.median_relevance.values
x = train.drop(["median_relevance", "doc_id"], 1).values
clf = Pipeline([
('scl', StandardScaler(copy=True, with_mean=True, with_std=True)),
('svm', SVC(C=10.0, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False, max_iter=-1, random_state=None))
])
scores = []
for train_index, test_index in cross_validation.StratifiedKFold(
y=y,
n_folds=int(folds),
shuffle=True,
random_state=42):
x_train, x_test = x[train_index], x[test_index]
y_train, y_test = y[train_index], y[test_index]
clf.fit(x_train, y_train)
predicted = transform_regression(clf.predict(x_test))
s = kappa(y_test, predicted, weights="quadratic")
print s
scores.append(s)
warn("cv scores:")
warn(scores)
warn(np.mean(scores))
warn(np.std(scores))
clf.fit(x, y)
return clf
开发者ID:drsmithization,项目名称:kaggle_public,代码行数:35,代码来源:eval.py
示例5: testing
def testing(file):
"""
To test and see if the quadratic weighing kappa function is working properly
"""
f = open(file, 'r')
f.readline()
labels, estimate = [], []
for row in f:
label = row.strip().split("\t")[6]
if random() > 0.5:
estimate.append(int(4*int(label)*random()))
else:
estimate.append(int(int(label)*random()))
labels.append(int(label))
print kappa(labels, labels, weights = 'quadratic')
开发者ID:ChetanVashisht,项目名称:Automatic-Essay-Evaluation,代码行数:17,代码来源:remove.py
示例6: runSVM
def runSVM(self, y_test, y_train, x_train, x_test):
clf = svm.LinearSVC(class_weight="auto")
clf.fit(x_train, y_train)
direction = clf.coef_.tolist()[0]
y_pred = clf.predict(x_test)
y_pred = y_pred.tolist()
kappa_score = kappa(y_test, y_pred)
return kappa_score, direction
开发者ID:eygrr,项目名称:RulesFromAuto-encoders,代码行数:8,代码来源:SVM.py
示例7: print_kappa
def print_kappa(self, method, one_off=False):
mean_kappa_same = []
mean_kappa_diff = []
for i in range(0,50):
checked_pairs = []
checked_pairs_same = []
checked_pairs_diff = []
kappas_same = []
kappas_diff = []
# calculating agreement for pairs from the same batches and different batches
while len(checked_pairs_same) < 20 or len(checked_pairs_diff) < 20:
id1 = random.choice(self.ids)
id2 = random.choice(self.ids)
pair = sorted([id1, id2])
if pair not in checked_pairs and id1 != id2:
values_first = self.get_rating_values(id1)
values_second = self.get_rating_values(id2)
if len(values_first) != len(values_second) or len(values_first) == 0:
continue
if method == 'standard':
kappa = metrics.kappa(values_first, values_second)
else:
kappa = metrics.kappa(values_first, values_second, method, one_off)
if self.batch_hash[id1] == self.batch_hash[id2]:
kappas_same.append(kappa)
checked_pairs_same.append(pair)
else:
kappas_diff.append(kappa)
checked_pairs_diff.append(pair)
checked_pairs.append(pair)
mean_kappa_same.append(numpy.mean(kappas_same))
mean_kappa_diff.append(numpy.mean(kappas_diff))
print("Kappa same group: " + str(numpy.mean(mean_kappa_same)) + " different groups: " + str(numpy.mean(mean_kappa_diff)))
print("Confidence same: " + str(stats.norm.interval(0.999, loc=numpy.mean(mean_kappa_same), scale=numpy.std(mean_kappa_same)/math.sqrt(50))) + " different: " + str(stats.norm.interval(0.999, loc=numpy.mean(mean_kappa_diff), scale=numpy.std(mean_kappa_diff)/math.sqrt(50))))
开发者ID:amalinovskiy,项目名称:translation_rater,代码行数:42,代码来源:analysis.py
示例8: eval
def eval(self):
sys.stderr.write('Evaluating\n')
folds = StratifiedKFold(y=self.y_train, n_folds=self.folds, shuffle=True, random_state=1337)
scores = []
for train_index, test_index in folds:
self.fit(train_index)
predicted, y_test = self.predict(test_index)
k = kappa(y_test, transform(predicted), weights='quadratic')
print(k)
scores.append(k)
print(scores)
print(np.mean(scores))
print(np.std(scores))
开发者ID:drsmithization,项目名称:kaggle_public,代码行数:13,代码来源:model.py
示例9: evalerror
def evalerror(preds, dtrain):
labels = dtrain.get_label()
# TODO: delete
# print 'evalerror'
# print max(preds)
preds = np.round(preds, 0)
# print max(preds)
# print len(preds), preds
# return a pair metric_name, result
# since preds are margin(before logistic transformation, cutoff at 0)
return 'kappa', 1.0 - kappa(labels, preds, weights='quadratic')
开发者ID:Chenrongjing,项目名称:diabetic-retinopathy,代码行数:13,代码来源:cross_validation.py
示例10: kNNClass
def kNNClass(train_idx,test_idx,n_neighbors):
training_data=input_kmers_counts.loc[train_idx]
testing_data=input_kmers_counts.loc[test_idx]
clf = neighbors.KNeighborsClassifier(n_neighbors, weights="uniform")
clf.fit(training_data[kmer_colums], training_data["class"])
#print "predicting"
predicted_classes= clf.predict(testing_data[kmer_colums])
# compute kappa stat
confusion_matrix(testing_data["class"],predicted_classes)
# make a mapping
class_map=dict(zip(set(testing_data["class"]),range(0,4)))
kapp=kappa([class_map[x] for x in testing_data["class"]],[class_map[x] for x in predicted_classes])
cm=caret.confusionMatrix(robjects.FactorVector(predicted_classes),robjects.FactorVector(testing_data["class"]))
return kapp,cm
开发者ID:EdwardBetts,项目名称:metaviro,代码行数:14,代码来源:knn_test.py
示例11: kNNClass
def kNNClass(train_idx,test_idx,n_neighbors,k_mer_subset):
logger.info('computing for %s'%(k_mer_subset))
train_idx=train_idx
test_idx=test_idx
training_subset=normalized_counts.loc[train_idx][np.append(k_mer_subset,"class")]
testing_subset=normalized_counts.loc[test_idx][np.append(k_mer_subset,"class")]
clf = neighbors.KNeighborsClassifier(n_neighbors, weights="uniform")
clf.fit(training_subset[k_mer_subset], training_subset["class"])
#print "predicting"
predicted_classes= clf.predict(testing_data[k_mer_subset])
# compute kappa stat
confusion_matrix(testing_data["class"],predicted_classes)
# make a mapping
class_map=dict(zip(set(testing_data["class"]),range(0,4)))
kapp=kappa([class_map[x] for x in testing_data["class"]],[class_map[x] for x in predicted_classes])
cm=caret.confusionMatrix(robjects.FactorVector(predicted_classes),robjects.FactorVector( testing_data["class"]))
logger.info("Finished for %s with kappa==%f"%(k_mer_subset,kapp))
return kapp,cm
开发者ID:EdwardBetts,项目名称:metaviro,代码行数:18,代码来源:knn_feature_subsets.py
示例12: get_average_kappa
def get_average_kappa(arr_act, arr_pred):
"""
Calculates the average quadratic kappa
over the entire essay set
"""
assert(len(arr_act) == len(arr_pred))
total = len(arr_act)
kappa_val = 0
for i in xrange(0, total):
kappa_val += met.kappa([arr_act[i]], [arr_pred[i]], \
'quadratic')
# print arr_act[i], '-', arr_pred[i]
kappa_val = float(kappa_val) / float(total)
return kappa_val
开发者ID:vaibhav4595,项目名称:AutoEssayGrading,代码行数:18,代码来源:metrics.py
示例13: accuracy_stats
def accuracy_stats(Ypred, Ytest):
stats = {}
statkeys = ['AA', 'AP', 'f1', 'recall', 'kappa']
for key in statkeys:
stats[key] = []
for ypred, ytest in zip(Ypred, Ytest):
stats['AA'].append(accuracy_score(ytest.ravel(), ypred.ravel()))
stats['AP'].append(precision_score(ytest.ravel(), ypred.ravel()))
stats['f1'].append(f1_score(ytest.ravel(), ypred.ravel()))
stats['recall'].append(recall_score(ytest.ravel(), ypred.ravel()))
stats['kappa'].append(kappa(ytest.ravel(), ypred.ravel()))
return stats
开发者ID:jejjohnson,项目名称:manifold_learning,代码行数:18,代码来源:classification_list.py
示例14: scores
def scores(X,y,y_proba,name="nan",to_plot=False):
# print(name+' Classifier:\n {}'.format(metrics.classification_report(X,y)))
cm= metrics.confusion_matrix(X,y)
print cm
if(to_plot):
plt_cm(X,y,[-1,1])
auc_compute(X,y)
auc=roc_auc_score(X,y_proba)
print(name+' Classifier auc: %f' % auc)
accuracy=metrics.accuracy_score(X,y)
print(name+' Classifier accuracy: %f' % (accuracy))
f1=metrics.f1_score(X,y,pos_label=1)
print(name+' Classifier f1: %f' % (f1))
precision=metrics.precision_score(X,y)
print(name+' Classifier precision_score: %f' % (precision))
recall=metrics.recall_score(X,y)
print(name+' Classifier recall_score: %f' % (recall))
kappa_score=kappa(X,y)
print(name+' Classifier kappa_score:%f' % (kappa_score))
return [auc,f1.mean(),accuracy.mean(),precision.mean(),recall.mean(),kappa_score]
开发者ID:Zerowxm,项目名称:kdd-cup2009,代码行数:21,代码来源:utils.py
示例15: train_model
def train_model(train, folds):
y = train.median_relevance.values
x = train.drop(["median_relevance", "doc_id"], 1).values
xg_params = {
"silent": 1,
"objective": "reg:linear",
"nthread": 4,
"bst:max_depth": 10,
"bst:eta": 0.1,
"bst:subsample": 0.5
}
num_round = 600
scores = []
for train_index, test_index in cross_validation.StratifiedKFold(
y=y,
n_folds=int(folds),
shuffle=True,
random_state=42):
x_train, x_test = x[train_index], x[test_index]
y_train, y_test = y[train_index], y[test_index]
xg_train = xg.DMatrix(x_train, label=y_train)
xg_test = xg.DMatrix(x_test, label=y_test)
watchlist = [(xg_train, "train"), (xg_test, "test")]
bst = xg.train(xg_params, xg_train, num_round, watchlist, feval=evalerror)
predicted = transform_regression(bst.predict(xg_test))
s = kappa(y_test, predicted, weights="quadratic")
print s
scores.append(s)
warn("cv scores:")
warn(scores)
warn(np.mean(scores))
warn(np.std(scores))
开发者ID:drsmithization,项目名称:kaggle_public,代码行数:40,代码来源:eval_xgboost.py
示例16: run
def run(self):
"""
run Forrest
"""
for loopcount in range(self.ntasks):
seed = time.time()
resultsline = []
# do stuff
training = random.sample(range(self.matrix.shape[0]), int(self.trainingratio*self.matrix.shape[0]))
testing = list(set(range(self.matrix.shape[0])).difference(training))
print self.matrix[training,:].shape, len([targ[i] for i in training]), self.matrix[testing,:].shape
clf = neighbors.KNeighborsClassifier(n_neighbors, weights="distance")
clf.fit(self.matrix[training,:].todense(), [targ[i] for i in training])
#~ clf.fit(self.matrix[training,:], [targ[i] for i in training])
classes=clf.predict(self.matrix[testing,:].todense())
#~ classes=clf.predict(self.matrix[testing,:])
# print(confusion_matrix(classes,[targ[i] for i in testing]))
# print(kappa(classes,[targ[i] for i in testing]))
resultsline = []
resultsline = resultsline+info_log
resultsline.append(seed)
resultsline.append(kappa(classes,[targ[i] for i in testing]))
conf = []
for row in confusion_matrix(classes,[targ[i] for i in testing]):
conf = conf+list(row)
resultsline = resultsline+conf
self.result_queue.put([str(item) for item in resultsline])
# store the results in the results queue once all the contigs are processed
# please run please run please run please run please run please run
#
sys.stdout.write("Done with worker for %d tasks: %d loops done\n" % (self.ntasks, loopcount+1))
开发者ID:EdwardBetts,项目名称:metaviro,代码行数:39,代码来源:knn_multi.py
示例17: test_invalid_weighted_kappa
def test_invalid_weighted_kappa():
kappa([1, 2, 1], [1, 2, 1], weights='invalid', allow_off_by_one=False)
kappa([1, 2, 1], [1, 2, 1], weights='invalid', allow_off_by_one=True)
开发者ID:wavelets,项目名称:skll,代码行数:3,代码来源:test_skll.py
示例18: list
X = iris.data[:, :2] # we only take the first two features. We could
# avoid this ugly slicing by using a two-dim dataset
y = iris.target
training = random.sample(range(150), 100)
testing = list(set(range(150)).difference(training))
X_train = iris.data[training,:]
y_train = [y[i] for i in training]
X_test = iris.data[testing,:]
y_test = [y[i] for i in testing]
clf3 = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
clf3.fit(X_train, y_train)
print "kappa =", kappa(y_test,clf3.predict(X_test))
metric = LMNN(X_train, y_train)
metric.fit()
new_X_train = metric.transform()
new_X_test = metric.transform(X_test)
clf4 = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
clf4.fit(new_X_train, y_train)
print "kappa =", kappa(y_test,clf4.predict(new_X_test))
#
## Create color maps
#cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
#cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
开发者ID:EdwardBetts,项目名称:metaviro,代码行数:31,代码来源:lmnn_test1.py
示例19: confusion_matrix
training_set=indices[0:int(n_rows*training_ratio)]
testing_set=indices[-int(n_rows*training_ratio):]
training_data=input_kmers_counts.loc[training_set]
testing_data=input_kmers_counts.loc[testing_set]
clf = neighbors.KNeighborsClassifier(15, weights="uniform")
clf.fit(training_data[count_colums], training_data["class"])
#print "predicting"
predicted_classes= clf.predict(testing_data[count_colums])
# compute kappa stat
confusion_matrix(testing_data["class"],predicted_classes)
# make a mapping
class_map=dict(zip(set(testing_data["class"]),range(0,4)))
kappa([class_map[x] for x in testing_data["class"]],[class_map[x] for x in predicted_classes])
# fit a KNN on the normalized_counts
# kNNClass(training_set,testing_set,15,count_colums)
# We focus on the ambiguous k-mers, approx 15k; basically all-kmer appear more than once
ambiguous_kmers=all_nodes_df[all_nodes_df["degree"]>2]
len(set(ambiguous_kmers['kmer']))
len(set(all_nodes_df['kmer']))
# We do a PCA on that
amb_kmers_counts=pandas.pivot_table(ambiguous_kmers,values="degree",index=['sequence_description'],columns=["kmer"],fill_value=0)
kmer_colums=amb_kmers_counts.columns
开发者ID:EdwardBetts,项目名称:metaviro,代码行数:31,代码来源:build_de_bruijn.py
示例20: quadratic_kappa
def quadratic_kappa(true, predicted):
return kappa(true, predicted, weights='quadratic')
开发者ID:StevenReitsma,项目名称:kaggle-diabetic-retinopathy,代码行数:2,代码来源:net_512_b64_ns.py
注:本文中的skll.metrics.kappa函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论