本文整理汇总了Python中nltk.metrics.recall函数的典型用法代码示例。如果您正苦于以下问题:Python recall函数的具体用法?Python recall怎么用?Python recall使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了recall函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: get_performance
def get_performance(clf_sel, train_features, test_features):
ref_set = collections.defaultdict(set)
test_set = collections.defaultdict(set)
classification_error = False
clf = SklearnClassifier(clf_sel)
try:
classifier = clf.train(train_features)
except:
classification_error = True
# print (str(clf_sel.__class__),'NA')
if str(clf_sel.__class__) == "<class 'sklearn.naive_bayes.MultinomialNB'>":
pickle_cls(classifier, 'MultinomialNB')
# print(str(clf_sel), 'accuracy:'(nltk.classify.accuracy(classifier, test_features)) * 100)
if not classification_error:
clf_acc = nltk.classify.accuracy(classifier, test_features)
for i, (features, label) in enumerate(test_features):
ref_set[label].add(i)
predicted = classifier.classify(features)
test_set[predicted].add(i)
pos_precision = precision(ref_set['pos'], test_set['pos'])
pos_recall = recall(ref_set['pos'], test_set['pos'])
neg_precision = precision(ref_set['neg'], test_set['neg'])
neg_recall = recall(ref_set['neg'], test_set['neg'])
print(
"{0},{1},{2},{3},{4},{5}".format(clf_sel.__class__, clf_acc, pos_precision, pos_recall, neg_precision,
neg_recall))
开发者ID:koosha,项目名称:twitter-sentiment-analysis-v2,代码行数:33,代码来源:sentiment_analyzer.py
示例2: validate
def validate(self, validation_set):
if self.classifier is None:
raise Exception("self.classifier is None")
reference=defaultdict(set)
observed=defaultdict(set)
observed['neutral']=set()
for i, (tweet, label) in enumerate(validation_set):
reference[label].add(i)
observation=self.classify(tweet)
observed[observation].add(i)
acc=classify.accuracy(self.classifier, observed)
posp=precision(reference['positive'],observed['positive'])
posr=recall(reference['positive'], observed['positive'])
posf=f_measure(reference['positive'], observed['positive'])
negp=precision(reference['negative'],observed['negative'])
negr=recall(reference['negative'], observed['negative'])
negf=f_measure(reference['negative'], observed['negative'])
print "accuracy: %s" % acc
print "pos precision: %s" % posp
print "pos recall: %s" % posr
print "pos f-measure: %s" % posf
print "neg precision: %s" % negp
print "neg recall: %s" % negr
print "neg f-measure: %s" % negf
return (acc, posp, posr, posf, negp, negr, negf)
开发者ID:anov,项目名称:honors,代码行数:27,代码来源:classifier.py
示例3: print_precision_recall
def print_precision_recall(classifier, test_dict):
refsets = defaultdict(set)
testsets = defaultdict(set)
for i, (feats, label) in enumerate(test_dict):
refsets[label].add(i)
observed = classifier.classify(feats)
testsets[observed].add(i)
print 'pos precision:', precision(refsets['positive'], testsets['positive'])
print 'pos recall:', recall(refsets['positive'], testsets['positive'])
print 'pos F-measure:', f_measure(refsets['positive'], testsets['positive'])
print 'neg precision:', precision(refsets['negative'], testsets['negative'])
print 'neg recall:', recall(refsets['negative'], testsets['negative'])
print 'neg F-measure:', f_measure(refsets['negative'], testsets['negative'])
开发者ID:gleicon,项目名称:sentiment_analysis,代码行数:13,代码来源:filters.py
示例4: GetEvaluacion
def GetEvaluacion(self):
'''
Devuelve las medidas de precision, recall, y matriz de confusion del clasificador.
Para esto usamos las funciones precision, recall y confusion matrix de nltk y el conjunto
de testeo.
Retorna una tupla (positivos, negativos, matriz) donde positivos y negativos es otra tupla con los valores (precision, recall)
Precision: Fraccion de las instancias que se clasificaron correctamente / Fraccion de las instancias que se clasificaron en la clase:
TP / (TP + FP)
Cuanto mayor es esto, menor es la cantidad de falsos positivos, es decir, esto me da el porcentaje de los elementos que
fueron clasificados correctamente en esta clase.
Recall: Fraccion de las instancias que se clasificaron correctamente / Fraccion de las instancias que realmente estaban en la clase:
TP / (TP + FN)
Cuanto mayor es esto, menor es la cantidad de falsos negativos, es decir, del total de elementos que realmente existen
en la clase, cuantos clasifique.
Tanto precision y recall son para las clases y no en general (o sea, hay un valor de precision/recall para la clase de comentarios
positivos y otros para la clase de comentarios negativos)
Vale la pena leer de aca: http://streamhacker.com/2010/05/17/text-classification-sentiment-analysis-precision-recall/
'''
clasificador = self.GetClasificador()
corpus = self.DatosTesteo
#Las funciones de NLTK usan sets.
#Construyo sets de referencia y testeo para positivos y negativos.
refSet = {CLASE_POSITIVO:set(), CLASE_NEGATIVO:set()} #Tiene los valores reales
testSet = {CLASE_POSITIVO:set(), CLASE_NEGATIVO:set()} #Tiene los valores luego de clasificar.
#Valores para la matriz de conf.
refList = []
testList = []
#Tengo que construir conjuntos de referencia y testeo a partir de los de testeo, para poder usar las funciones de nltk
for i, c in enumerate(corpus):
refSet[c[1]].add(i) #Lo agrega en los positivos o negativos segun su clase.
clasificado = clasificador.classify(c[0])
testSet[clasificado].add(i)
refList.append(c[1])
testList.append(clasificado)
positivos = ( precision(refSet[CLASE_POSITIVO], testSet[CLASE_POSITIVO]), recall(refSet[CLASE_POSITIVO], testSet[CLASE_POSITIVO]) )
negativos = ( precision(refSet[CLASE_NEGATIVO], testSet[CLASE_NEGATIVO]), recall(refSet[CLASE_NEGATIVO], testSet[CLASE_NEGATIVO]) )
return (positivos, negativos, ConfusionMatrix(refList, testList))
开发者ID:cristianocca,项目名称:pln_tarea1,代码行数:51,代码来源:procesador.py
示例5: benchmarking
def benchmarking(self, classifier,_test_set,all_f_measure=[],all_precision=[],all_recall=[]):
from nltk import classify
accuracy = classify.accuracy(classifier, _test_set)
print("accuracy:",accuracy)
from nltk.metrics import precision
from nltk.metrics import recall
from nltk.metrics import f_measure
import collections
refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)
for i, (feats, label) in enumerate(_test_set):
refsets[label].add(i)
observed = classifier.classify(feats)
testsets[observed].add(i)
prec=precision(refsets['class'], testsets['class'])
rec=recall(refsets['class'], testsets['class'])
f1=f_measure(refsets['class'], testsets['class'])
print('precision:', prec)
print('recall:', rec)
print('F-measure:', f1)
all_f_measure.append(f1)
all_precision.append(prec)
all_recall.append(rec)
print('========Show top 10 most informative features========')
classifier.show_most_informative_features(10)
开发者ID:jerrygaoLondon,项目名称:oke-extractor,代码行数:30,代码来源:okeConceptRecogniser.py
示例6: precision_and_recall
def precision_and_recall(classifier, testfeats):
#Finds precision and recall on that big booty classifier.
refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)
#Feats is the dictionary of words
#label is the label, pos or neg
for i, (feats, label) in enumerate(testfeats):
#a mapping of which entries are pos and negative
#ex refsets[pos] = {1,2,3,4,6,7,11,78}
refsets[label].add(i)
#Classifies something as pos or neg given its feats
observed = classifier.classify(feats)
#a mapping of entries and their classifications
#ex testsets[pos] = {1,2,3,4,5,8,11}
testsets[observed].add(i)
prec = {}
rec = {}
for label in classifier.labels():
prec[label] = precision(refsets[label], testsets[label])
rec[label] = recall(refsets[label], testsets[label])
return prec, rec
开发者ID:efrenaguilar95,项目名称:Yelp_Analyzer,代码行数:29,代码来源:classifiers.py
示例7: multi_metrics
def multi_metrics(multi_classifier, test_feats):
mds = []
refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)
for i, (feat, labels) in enumerate(test_feats):
for label in labels:
refsets[label].add(i)
guessed = multi_classifier.classify(feat)
for label in guessed:
testsets[label].add(i)
mds.append(metrics.masi_distance(set(labels), guessed))
avg_md = sum(mds) / float(len(mds))
precisions = {}
recalls = {}
for label in multi_classifier.labels():
precisions[label] = metrics.precision(refsets[label], testsets[label])
recalls[label] = metrics.recall(refsets[label], testsets[label])
return precisions, recalls, avg_md
开发者ID:RomanZacharia,项目名称:python_text_processing_w_nltk2_cookbook,代码行数:25,代码来源:classification.py
示例8: calculate
def calculate(classifier, feature_set):
refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)
print("Calculating refsets for precision and recall")
for i, (feats, label) in enumerate(feature_set):
refsets[label].add(i)
observed = classifier.classify(feats)
testsets[observed].add(i)
print('country precision:', metrics.precision(refsets['country'], testsets['country']))
print('country recall:', metrics.recall(refsets['country'], testsets['country']))
print('religion precision:', metrics.precision(refsets['religion'], testsets['religion']))
print('religion recall:', metrics.recall(refsets['religion'], testsets['religion']))
print('astronomy precision:', metrics.precision(refsets['astronomy'], testsets['astronomy']))
print('astronomy recall:', metrics.recall(refsets['astronomy'], testsets['astronomy']))
开发者ID:Bakuchi,项目名称:naivebayes,代码行数:18,代码来源:Estimator.py
示例9: print_results
def print_results(classifier, featureset, results, name):
print '''
%s classifier results:
Classifier accuracy: %s
B Precision: %s
B Recall: %s
I Precision: %s
I Recall: %s
O Precision: %s
O Recall: %s
''' % (name,
accuracy(classifier, featureset),
precision(results[0]['B-SNP'], results[1]['B-SNP']),
recall(results[0]['B-SNP'], results[1]['B-SNP']),
precision(results[0]['I-SNP'], results[1]['I-SNP']),
recall(results[0]['I-SNP'], results[1]['I-SNP']),
precision(results[0]['O'], results[1]['O']),
recall(results[0]['O'], results[1]['O']))
开发者ID:urtonj,项目名称:PA4,代码行数:18,代码来源:Driver.py
示例10: eval_stats
def eval_stats(results):
'''
Compute recall, precision, and f-measure from passed results.
The expected format for results is a dictionary whose keys=<name of article>
and values=tuple (<test category>, <reference category>, <scores>), where:
test=category suggested by classifier, reference=pre-classified gold
category, scores=can be None or dictionary whose keys=category names and
values=matching score for this article.
'''
# Calculate number of correct matches
correct = 0
missed = defaultdict(tuple)
for article_name, (suggested, real, scores) in results.iteritems():
if suggested==real:
correct += 1
else:
missed[article_name] = (suggested, real)
success_ratio = correct / float(len(results))
print "Ratio: %0.3f" % success_ratio
# Print wrong matches
for name, (suggested, real) in missed.iteritems():
print "%s\t%s\t%s" % (name, suggested, real)
# Create sets of references / test classification for evaluation
cat_ref = defaultdict(set)
cat_test= defaultdict(set)
for name, (test_category, ref_category, scores) in results.iteritems():
cat_ref[ref_category].add(name) # gold-tagged categories
cat_test[test_category].add(name) # suggested categories
# Precision, recall, f-measure, support (num of reference articles in
# each category) for each category
print "\nCategory\tPrecision\tRecall\tF-measure\tSupport"
measures = defaultdict(tuple)
for category in cat_ref.keys():
cat_prec = metrics.precision(cat_ref[category], cat_test[category])
cat_rec = metrics.recall(cat_ref[category], cat_test[category])
cat_f = metrics.f_measure(cat_ref[category], cat_test[category])
cat_support = len(cat_ref[category])
measures[category] = (cat_prec, cat_rec, cat_f, cat_support)
print "%s\t%0.3f\t%0.3f\t%0.3f\t%d" % \
(category, cat_prec, cat_rec, cat_f, cat_support)
# Calculate precision, recall, f-measure for entire corpus:
# This is a weighted average of the values of separate categories
# SUM(product of all precisions, product of all supports)/sum(total number of supports)
avg_prec = weighted_average([(cat_measure[0], cat_measure[3]) for \
cat_measure in measures.values()])
avg_rec = weighted_average([(cat_measure[1], cat_measure[3]) for \
cat_measure in measures.values()])
avg_f = weighted_average([(cat_measure[2], cat_measure[3]) for \
cat_measure in measures.values()])
total_support = sum([cat_support[3] for cat_support in measures.values()])
print "%s\t%0.3f\t%0.3f\t%0.3f\t%d" % ("Total", avg_prec, avg_rec, avg_f, total_support)
开发者ID:campustimes,项目名称:pnlp-final-project,代码行数:56,代码来源:eval_class.py
示例11: evaluate_features
def evaluate_features(feature_extractor, N, only_acc=False):
from nltk.corpus import movie_reviews
from nltk.classify import NaiveBayesClassifier as naive
from nltk.classify.util import accuracy
from nltk.metrics import precision, recall, f_measure
from sys import stdout
negative = movie_reviews.fileids('neg')
positive = movie_reviews.fileids('pos')
negfeats = [(feature_extractor(movie_reviews.sents(fileids=[f])),
'neg') for f in negative]
posfeats = [(feature_extractor(movie_reviews.sents(fileids=[f])),
'pos') for f in positive]
negtrain, negtest = stratifiedSamples(negfeats, N)
postrain, postest = stratifiedSamples(posfeats, N)
trainfeats = negtrain + postrain
testfeats = negtest + postest
classifier = naive.train(trainfeats)
if only_acc: return accuracy(classifier, testfeats)
print 'accuracy: {}'.format(accuracy(classifier, testfeats))
# Precision, Recall, F-measure
from collections import defaultdict
refsets = defaultdict(set)
testsets = defaultdict(set)
for i, (feats, label) in enumerate(testfeats):
refsets[label].add(i)
observed = classifier.classify(feats)
testsets[observed].add(i)
print 'pos precision:', precision(refsets['pos'], testsets['pos'])
print 'pos recall:', recall(refsets['pos'], testsets['pos'])
print 'pos F-measure:', f_measure(refsets['pos'], testsets['pos'])
print 'neg precision:', precision(refsets['neg'], testsets['neg'])
print 'neg recall:', recall(refsets['neg'], testsets['neg'])
print 'neg F-measure:', f_measure(refsets['neg'], testsets['neg'])
stdout.flush()
classifier.show_most_informative_features()
return classifier
开发者ID:lxmonk,项目名称:nlg12_hw2,代码行数:42,代码来源:hw2.py
示例12: evaluate_features
def evaluate_features(self, feature_select):
#reading pre-labeled input and splitting into lines
posSentences = open('rt-polarity-pos.txt', 'r')
negSentences = open('rt-polarity-neg.txt', 'r')
posSentences = re.split(r'\n', posSentences.read())
negSentences = re.split(r'\n', negSentences.read())
posFeatures = []
negFeatures = []
#breaks up the sentences into lists of individual words (as selected by the input mechanism) and appends 'pos' or 'neg' after each list
for i in posSentences:
posWords = re.findall(r"[\w']+|[.,!?;]", i)
posWords = [feature_select(posWords), 'pos']
posFeatures.append(posWords)
for i in negSentences:
negWords = re.findall(r"[\w']+|[.,!?;]", i)
negWords = [feature_select(negWords), 'neg']
negFeatures.append(negWords)
#selects 3/4 of the features to be used for training and 1/4 to be used for testing
posCutoff = int(math.floor(len(posFeatures)*3/4))
negCutoff = int(math.floor(len(negFeatures)*3/4))
trainFeatures = posFeatures[:posCutoff] + negFeatures[:negCutoff]
testFeatures = posFeatures[posCutoff:] + negFeatures[negCutoff:]
#Training Phase:
classifier = NaiveBayesClassifier.train(trainFeatures)
referenceSets = collections.defaultdict(set)
testSets = collections.defaultdict(set)
#Testing Phase:
for i, (features, label) in enumerate(testFeatures):
referenceSets[label].add(i)
predicted = classifier.classify(features)
testSets[predicted].add(i)
print 'Trained on %d instances, Tested on %d instances' % (len(trainFeatures), len(testFeatures))
print 'Accuracy:', nltk.classify.util.accuracy(classifier, testFeatures)
print 'Positive Precision:', precision(referenceSets['pos'], testSets['pos'])
print 'Positive Recall:', recall(referenceSets['pos'], testSets['pos'])
print 'Negative Precision:', precision(referenceSets['neg'], testSets['neg'])
print 'Negative Recall:', recall(referenceSets['neg'], testSets['neg'])
开发者ID:sepidazar,项目名称:Text-Mining-Application,代码行数:42,代码来源:SentimentAnalysis.py
示例13: calcAllClassesRecall
def calcAllClassesRecall(classSet, refsets, testsets):
rSum = 0.0
denominator = 0
for category in classSet:
num = recall(refsets[category], testsets[category])
if num is None:
continue
rSum += num
denominator += 1
return rSum/denominator
开发者ID:peeceeprashant,项目名称:SharedTask,代码行数:11,代码来源:explicit_sense_perceptron_predict.py
示例14: main
def main():
global best_words
tweets = get_tweets_from_db()
tweet_list = tweets[1000:1599000]
test_list = tweets[:1000]+ tweets[1599000:]
word_scores = create_word_scores()
best_words = find_best_words(word_scores, 500000)
f = open('bestwords.pickle', 'wb')
pickle.dump(best_words, f)
f.close()
training_set = classify.apply_features(best_word_features, tweet_list)
print "extracted features"
# train the classifier with the training set
classifier = NaiveBayesClassifier.train(training_set)
print "trained classifier"
# create the pickle file
f = open('NBclassifier_new.pickle', 'wb')
pickle.dump(classifier, f)
f.close()
print "created pickle"
# test for precision and recall
refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)
test_set = classify.apply_features(best_word_features, test_list)
for i, (feats, label) in enumerate(test_set):
refsets[label].add(i)
observed = classifier.classify(feats)
testsets[observed].add(i)
print 'neg precision:', metrics.precision(refsets['0'], testsets['0'])
print 'neg recall:', metrics.recall(refsets['0'], testsets['0'])
print 'pos precision:', metrics.precision(refsets['4'], testsets['4'])
print 'pos recall:', metrics.recall(refsets['4'], testsets['4'])
# test_set = classify.apply_features(extract_features, test_list)
# print "extracted features"
print classify.accuracy(classifier, test_set)
print classifier.show_most_informative_features(30)
开发者ID:asdvalenzuela,项目名称:moodmap,代码行数:39,代码来源:buildClassifier.py
示例15: precision_recall
def precision_recall(classifier, testfeats):
refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)
for i, (feats, label) in enumerate(testfeats):
refsets[label].add(i)
observed = classifier.classify(feats)
testsets[observed].add(i)
precisions = {}
recalls = {}
for label in classifier.labels():
precisions[label] = metrics.precision(refsets[label], testsets[label])
recalls[label] = metrics.recall(refsets[label], testsets[label])
return precisions, recalls
开发者ID:shingjay,项目名称:dealchan,代码行数:13,代码来源:classification.py
示例16: precision_recall
def precision_recall(classifier, testfeats):
#gives precision and recall of classifiers
#precision = lack of false positives
#recall = lack of false negatives
refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)
for i, (feats, label) in enumerate(testfeats):
refsets[label].add(i)
observed = classifier.classify(feats)
testsets[observed].add(i)
precisions = {}
recalls = {}
for label in classifier.labels():
precisions[label] = precision(refsets[label], testsets[label])
recalls[label] = recall(refsets[label], testsets[label])
return precisions, recalls
开发者ID:DeamonSpawn,项目名称:UntitledSAProj,代码行数:18,代码来源:classifiers.py
示例17: recall
def recall(self, reference):
"""
Return the recall of an aligned sentence with respect to a
"gold standard" reference ``AlignedSent``.
:type reference: AlignedSent or Alignment
:param reference: A "gold standard" reference aligned sentence.
:rtype: float or None
"""
# Get alignments in set of 2-tuples form
# The "sure" recall is used so we don't penalize for missing an
# alignment that was only marked as "possible".
align = self.alignment
if isinstance(reference, AlignedSent):
sure = reference.alignment
else:
sure = Alignment(reference)
# Call NLTKs existing functions for recall
return recall(sure, align)
开发者ID:avadnal,项目名称:CLIR--Project-1,代码行数:21,代码来源:nltk_align.py
示例18: train_classifiers
def train_classifiers(posFeatures,negFeatures):
#selects 3/4 of the features to be used for training and 1/4 to be used for testing
posCutoff = int(math.floor(len(posFeatures)*3/4))
negCutoff = int(math.floor(len(negFeatures)*3/4))
trainFeatures = posFeatures[:posCutoff] + negFeatures[:negCutoff]
testFeatures = posFeatures[posCutoff:] + negFeatures[negCutoff:]
#trains a Naive Bayes Classifier
print ("----------------Naive Bayes Classifier-----------")
classifier = NaiveBayesClassifier.train(trainFeatures)
#initiates referenceSets and testSets
referenceSets = collections.defaultdict(set)
testSets = collections.defaultdict(set)
#puts correctly labeled sentences in referenceSets and the predictively labeled version in testsets
for i, (features, label) in enumerate(testFeatures):
referenceSets[label].add(i)
predicted = classifier.classify(features)
testSets[predicted].add(i)
#prints metrics to show how well the feature selection did
print ('train on %d instances, test on %d instances' % (len(trainFeatures), len(testFeatures)))
print ('Original Naive Bayes Accuracy:', (nltk.classify.util.accuracy(classifier, testFeatures))*100)
print ('pos precision:', precision(referenceSets['pos'], testSets['pos']))
print ('pos recall:', recall(referenceSets['pos'], testSets['pos']))
print ('neg precision:',precision(referenceSets['neg'], testSets['neg']))
print ('neg recall:', recall(referenceSets['neg'], testSets['neg']))
classifier.show_most_informative_features(10)
#Pickle the algorithm for future use
save_classifier = open("pickled_algos/originalnaivebayes.pickle","wb")
pickle.dump(classifier, save_classifier)
save_classifier.close()
MNB_classifier = SklearnClassifier(MultinomialNB())
MNB_classifier.train(trainFeatures)
print("MNB_classifier accuracy percent:", (nltk.classify.accuracy(MNB_classifier, testFeatures))*100)
#Pickle the algorithm for future use
save_classifier = open("pickled_algos/MNB_classifier.pickle","wb")
pickle.dump(MNB_classifier, save_classifier)
save_classifier.close()
BernoulliNB_classifier = SklearnClassifier(BernoulliNB())
BernoulliNB_classifier.train(trainFeatures)
print("BernoulliNB_classifier accuracy percent:", (nltk.classify.accuracy(BernoulliNB_classifier, testFeatures))*100)
#Pickle the algorithm for future use
save_classifier = open("pickled_algos/BernoulliNB_classifier.pickle","wb")
pickle.dump(BernoulliNB_classifier, save_classifier)
save_classifier.close()
LogisticRegression_classifier = SklearnClassifier(LogisticRegression())
LogisticRegression_classifier.train(trainFeatures)
print("LogisticRegression_classifier accuracy percent:", (nltk.classify.accuracy(LogisticRegression_classifier, testFeatures))*100)
#Pickle the algorithm for future use
save_classifier = open("pickled_algos/LogisticRegression_classifier.pickle","wb")
pickle.dump(LogisticRegression_classifier, save_classifier)
save_classifier.close()
LinearSVC_classifier = SklearnClassifier(LinearSVC())
LinearSVC_classifier.train(trainFeatures)
print("LinearSVC_classifier accuracy percent:", (nltk.classify.accuracy(LinearSVC_classifier, testFeatures))*100)
#Pickle the algorithm for future use
save_classifier = open("pickled_algos/LinearSVC_classifier.pickle","wb")
pickle.dump(LinearSVC_classifier, save_classifier)
save_classifier.close()
SGDC_classifier = SklearnClassifier(SGDClassifier())
SGDC_classifier.train(trainFeatures)
print("SGDClassifier accuracy percent:",nltk.classify.accuracy(SGDC_classifier, testFeatures)*100)
#Pickle the algorithm for future use
save_classifier = open("pickled_algos/SGDC_classifier.pickle","wb")
pickle.dump(SGDC_classifier, save_classifier)
save_classifier.close()
Dec_Tree_Classifier = SklearnClassifier(DecisionTreeClassifier())
Dec_Tree_Classifier.train(trainFeatures)
print("DecisionTreeClassifier Accuracy:",(nltk.classify.accuracy(Dec_Tree_Classifier,testFeatures))*100)
#Pickle the algorithm for future use
save_classifier = open("pickled_algos/decision_tree.pickle","wb")
pickle.dump(Dec_Tree_Classifier, save_classifier)
save_classifier.close()
"""
# Grad_Boost_Classifier = SklearnClassifier(GradientBoostingClassifier())
# Grad_Boost_Classifier.train(trainFeatures)
# print("Gradient Boosting Classifier Accuracy:", (nltk.classify.accuracy(Grad_Boost_Classifier,testFeatures))*100)
#.........这里部分代码省略.........
开发者ID:vatsalgit,项目名称:Project_Sentiment_Analysis,代码行数:101,代码来源:train_classifiers.py
示例19: print
trainfeats = negfeats[:4000] + posfeats[:4000]
testfeats = negfeats[4000:] + posfeats[4000:]
print("train on %d instances, test on %d instances" % (len(trainfeats), len(testfeats)))
classifier = NaiveBayesClassifier.train(trainfeats)
refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)
for i, (feats, label) in enumerate(testfeats):
refsets[label].add(i)
observed = classifier.classify(feats)
testsets[observed].add(i)
# cross validation 3-fold
feats = negfeats + posfeats
M = math.floor(len(feats) / 3)
result = []
for n in range(3):
val_set = feats[n * M :][:M]
train_set = feats[(n + 1) * M :] + feats[: n * M]
classifier = nltk.NaiveBayesClassifier.train(train_set)
result.append("{:.4f}".format(round(nltk.classify.accuracy(classifier, val_set) * 100, 4)))
print("cross_validation:", result)
print("pos precision:", precision(refsets["pos"], testsets["pos"]))
print("pos recall:", recall(refsets["pos"], testsets["pos"]))
print("pos F-measure:", f_measure(refsets["pos"], testsets["pos"]))
print("neg precision:", precision(refsets["neg"], testsets["neg"]))
print("neg recall:", recall(refsets["neg"], testsets["neg"]))
print("neg F-measure:", f_measure(refsets["neg"], testsets["neg"]))
classifier.show_most_informative_features()
开发者ID:efrenaguilar95,项目名称:Yelp_Analyzer,代码行数:30,代码来源:nbClassifierV2.py
示例20: enumerate
# Now create the data structure for model evaluation
#
refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)
for i, (feats, label) in enumerate(testfeats):
refsets[label].add(i)
observed = classifier.classify(feats)
testsets[observed].add(i)
#print len(refsets)
#print len(testsets)
#print refsets
precisions = {}
recalls = {}
for label in classifier.labels():
precisions[label] = metrics.precision(refsets[label],testsets[label])
recalls[label] = metrics.recall(refsets[label], testsets[label])
#
# Let us calculate Precision & Recall and compare with nltk
#
# Luckily the data structures are symmetric
#
c_00=len(refsets[labels[0]].intersection(testsets[labels[0]]))
c_01=len(refsets[labels[0]].intersection(testsets[labels[1]]))
c_10=len(refsets[labels[1]].intersection(testsets[labels[0]]))
c_11=len(refsets[labels[1]].intersection(testsets[labels[1]]))
#
print ' | H | S |'
print '--|-------|-------|'
print 'H | %5d | %5d |' % (c_00,c_01)
print '--|-------|-------|'
print 'S | %5d | %5d |' % (c_10,c_11)
开发者ID:altonga,项目名称:pydata,代码行数:31,代码来源:spam-02.py
注:本文中的nltk.metrics.recall函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论