本文整理汇总了Python中nltk.classify.accuracy函数的典型用法代码示例。如果您正苦于以下问题:Python accuracy函数的具体用法?Python accuracy怎么用?Python accuracy使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了accuracy函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: category_by_movie
def category_by_movie():
from nltk.corpus import movie_reviews as mr
from nltk import FreqDist
from nltk import NaiveBayesClassifier
from nltk import classify
from nltk.corpus import names
from nltk.classify import apply_features
import random
documents = [(list(mr.words(f)), c) for c in mr.categories() for f in
mr.fileids(c)]
random.shuffle(documents)
all_words = FreqDist(w.lower() for w in mr.words())
word_features = all_words.keys()[:2000]
def document_features(document):
document_words = set(document)
features = {}
for word in word_features:
features['contains(%s)' % word] = (word in document_words)
return features
#print document_features(mr.words('pos/cv957_8737.txt'))
#print documents[0]
features = [(document_features(d), c) for (d, c) in documents]
train_set, test_set = features[100:], features[:100]
classifier = NaiveBayesClassifier.train(train_set)
print classify.accuracy(classifier, train_set)
开发者ID:brenden17,项目名称:infinity,代码行数:30,代码来源:category_nltk.py
示例2: cross_validate
def cross_validate(classifier, training_set, test_set):
chosen_classif = classifier
best_accuracy = 0.0
best_train_accuracy = None
best_classifier = None
k_fold = cross_validation.KFold(len(training_set), n_folds=10)
for train_indices, test_indices in k_fold:
train = itemgetter(*train_indices)(training_set)
test = itemgetter(*test_indices)(training_set)
classifier = chosen_classif.train(train)
print '--------------------------------'
train_accuracy = classify.accuracy(classifier, train)
print 'Training set accuracy:' + str(train_accuracy)
if len(test_indices) == 1:
test = (test,)
accuracy = classify.accuracy(classifier, test)
if accuracy > best_accuracy:
best_classifier = classifier
best_accuracy = accuracy
best_train_accuracy = train_accuracy
print 'Cross validation set accuracy: ' + str(accuracy)
get_fscore(classifier, test)
print 'Best classifier CV accuracy: ' + str(best_accuracy)
test_accuracy = classify.accuracy(best_classifier, test_set)
print 'Best classifier accuracy: ' + str(test_accuracy)
print 'Best classifier precision recall fscore: '
print get_fscore(best_classifier, test_set)
return [test_accuracy, best_train_accuracy, best_classifier]
开发者ID:jedijulia,项目名称:nlp-tourism,代码行数:28,代码来源:sentiment_analyzer.py
示例3: evaluate
def evaluate(train_set, test_set, classifier, name):
refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)
for i, (features, label) in enumerate(test_set):
refsets[label].add(i)
observed = classifier.classify(features)
testsets[observed].add(i)
# Get accuracy on training set, test set and get positive and negative recall.
trainacc = 100 * classify.accuracy(classifier, train_set)
testacc = 100 * classify.accuracy(classifier, test_set)
spam_false = 100 - nltk.recall(refsets['spam'], testsets['spam'])*100
ham_false = 100 - nltk.recall(refsets['ham'], testsets['ham'])*100
return trainacc, testacc, spam_false, ham_false
开发者ID:Vermeij,项目名称:Spamfilter,代码行数:13,代码来源:evaluate.py
示例4: main
def main():
spam = load_dataset('spam', sys.argv[1], True)
ham = load_dataset('ham', sys.argv[2], True)
training_spam = spam[:11500]
training_ham = ham[:11500]
test_spam = spam[1000:]
test_ham = ham[1000:]
nbc = NaiveBayesClassifier.train(training_ham + training_spam)
cPickle.dump(nbc, sys.stdout)
sys.stderr.writelines(['Spam accuracy: %f\n' % accuracy(nbc, test_spam),
'Ham accuracy: %f\n' % accuracy(nbc, test_ham)])
开发者ID:cscenter,项目名称:BuzzScore,代码行数:13,代码来源:classifier_trainer.py
示例5: classifier
def classifier(lambda_):
clf = get_classifier('%f' % lambda_, __train_fs, lambda_)
logging.debug("Finished training the classifier lambda=%f ..." % lambda_)
dev_acc = accuracy(clf, __dev_fs)
logging.debug("classifier lambda=%f accuracy on DEV is: %3.5f",
lambda_, dev_acc)
train_acc = accuracy(clf, __train_fs)
logging.debug("classifier lambda=%f accuracy on TRAIN is: %3.5f",
lambda_, train_acc)
# clf.show_most_informative_features()
result = [clf.classify(fs) for fs,label in __dev_fs]
gold = [label for fs,label in __dev_fs]
cm = nltk.ConfusionMatrix(gold, result)
cf_text = cm.pp(sort_by_count=True, show_percents=True, truncate=20)
return (lambda_, dev_acc, train_acc, cf_text, clf)
开发者ID:aboSamoor,项目名称:NLP,代码行数:15,代码来源:rami_learning.py
示例6: main_function
def main_function():
conn = MySQLdb.connect(host=DATABASES['date_cutoff']['HOST'],
user=DATABASES['date_cutoff']['USER'],
passwd=DATABASES['date_cutoff']['PASSWORD'],
db=DATABASES['date_cutoff']['NAME'])
training_tweets = classify.get_training_tweets(conn_analysis)
training_feature_set = process_tweets(training_tweets)
config_megam('/opt/packages')
classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0)
error_dict = {'+':0, '-':0, 'I':0, 'O':0}
count_dict = {'+':0, '-':0, 'I':0, 'O':0}
guess_dict = {'+':0, '-':0, 'I':0, 'O':0}
full_matrix = {'+':{'+':0, '-':0, 'I':0, 'O':0},
'-':{'+':0, '-':0, 'I':0, 'O':0},
'I':{'+':0, '-':0, 'I':0, 'O':0},
'O':{'+':0, '-':0, 'I':0, 'O':0}}
test_tweets = classify.get_test_tweets(conn_analysis)
test_feature_set = process_tweets(test_tweets)
classifier.show_most_informative_features(10)
classifier_accuracy = accuracy(classifier, test_feature_set)
print "classifier accuracy: " + repr(classifier_accuracy)
开发者ID:7andrew7,项目名称:vaccine-sentiment,代码行数:28,代码来源:max-ent-bigrams.py
示例7: main_function
def main_function():
conn = MySQLdb.connect(host="localhost", user="root", passwd="tanzania", db="twitter_analysis")
hq_conn = MySQLdb.connect(host="localhost", user="root", passwd="tanzania", db="twitter")
training_tweets = get_test_tweets(conn)
training_feature_set = process_tweets(training_tweets)
classifier = DecisionTreeClassifier.train(training_feature_set)
test_tweets = get_training_tweets(conn)
test_feature_set = process_tweets(test_tweets)
classifier_accuracy = accuracy(classifier, test_feature_set)
alt_full_matrix = {'+':{'+':0, '-':0, 'E':0},
'-':{'+':0, '-':0, 'E':0},
'E':{'+':0, '-':0, 'E':0}}
#for f in test_tweets:
#f = test_tweets[0]
#print f
#guess = classifier.classify(process_tweet(f[1]))
#print guess
# update_tweet_polarity(f[0], guess, conn)
## pl = classifier.prob_classify(process_tweet(f[1]))
# idx = f[2]
# if idx == 'I' or idx == 'O':
# idx = 'E'
# alt_full_matrix[idx][guess] += 1
#print alt_full_matrix
print "classifier accuracy: " + repr(classifier_accuracy)
开发者ID:7andrew7,项目名称:vaccine-sentiment,代码行数:34,代码来源:decision-tree.py
示例8: main_function
def main_function():
conn = MySQLdb.connect(
host=DATABASES["date_cutoff"]["HOST"],
user=DATABASES["date_cutoff"]["USER"],
passwd=DATABASES["date_cutoff"]["PASSWORD"],
db=DATABASES["date_cutoff"]["NAME"],
)
training_tweets = get_training_tweets(conn)
training_feature_set = classify.process_tweets(training_tweets)
classifier = NaiveBayesClassifier.train(training_feature_set)
error_dict = {"+": 0, "-": 0, "I": 0, "O": 0}
count_dict = {"+": 0, "-": 0, "I": 0, "O": 0}
guess_dict = {"+": 0, "-": 0, "I": 0, "O": 0}
full_matrix = {
"+": {"+": 0, "-": 0, "I": 0, "O": 0},
"-": {"+": 0, "-": 0, "I": 0, "O": 0},
"I": {"+": 0, "-": 0, "I": 0, "O": 0},
"O": {"+": 0, "-": 0, "I": 0, "O": 0},
}
count_table = {"+": 0, "-": 0, "I": 0, "O": 0}
test_tweets = get_test_tweets(conn)
test_feature_set = classify.process_tweets(test_tweets)
classifier_accuracy = accuracy(classifier, test_feature_set)
print count_table
print "classifier accuracy: " + repr(classifier_accuracy)
开发者ID:khandelwal,项目名称:vaccine-sentiment,代码行数:33,代码来源:bayes-refactored.py
示例9: demo
def demo():
def gender_features(word):
return {"last_letter": word[-1], "penultimate_letter": word[-2]}
from nltk.classify import accuracy
from nltk.corpus import names
import random
names = [(name, "male") for name in names.words("male.txt")] + [
(name, "female") for name in names.words("female.txt")
]
import random
random.seed(60221023)
random.shuffle(names)
featuresets = [(gender_features(n), g) for (n, g) in names]
train_set, test_set = featuresets[500:], featuresets[:500]
print "--- nltk.classify.svm demo ---"
print "Number of training examples:", len(train_set)
classifier = SvmClassifier.train(train_set)
print "Total SVM dimensions:", len(classifier._svmfeatureindex)
print "Label mapping:", classifier._labelmapping
print "--- Processing an example instance ---"
print "Reference instance:", names[0]
print "NLTK-format features:\n " + str(test_set[0])
print "SVMlight-format features:\n " + str(
map_instance_to_svm(test_set[0], classifier._labelmapping, classifier._svmfeatureindex)
)
distr = classifier.prob_classify(test_set[0][0])
print "Instance classification and confidence:", distr.max(), distr.prob(distr.max())
print "--- Measuring classifier performance ---"
print "Overall accuracy:", accuracy(classifier, test_set)
开发者ID:trunghlt,项目名称:nltk,代码行数:35,代码来源:svm.py
示例10: update_category_by_pos
def update_category_by_pos():
from nltk.corpus import brown
from nltk import NaiveBayesClassifier
from nltk import classify
from nltk.tag import untag
from nltk import DecisionTreeClassifier
def pos_features(sentence, i):
features = {'suffix(1)':sentence[i][-1:],
'suffix(2)':sentence[i][-2:],
'suffix(3)':sentence[i][-3:]
}
features['prev-word'] = '<start>' if i==0 else sentence[i-1]
return features
print pos_features(brown.sents()[0], 8)
tagged_sents = brown.tagged_sents(categories='news')
featuresets = []
for tagged_sent in tagged_sents:
untagged_sent = untag(tagged_sent)
for i, (word, tag) in enumerate(tagged_sent):
featuresets.append((pos_features(untagged_sent, i), tag))
size = int(len(featuresets) * 0.1)
train_set, test_set = featuresets[size:], featuresets[:size]
# classifier = NaiveBayesClassifier.train(train_set)
classifier = DecisionTreeClassifier.train(train_set)
print 'NaiveBay %f' % classify.accuracy(classifier, test_set)
开发者ID:brenden17,项目名称:infinity,代码行数:30,代码来源:category_nltk.py
示例11: weka
def weka(train_set, test_set, algorithm="svm"):
from nltk.classify import weka
print "--- nltk.classify.weka %s ---" % algorithm
temp_dir = tempfile.mkdtemp()
classifier = nltk.classify.WekaClassifier.train(temp_dir + "/cred.model", train_set, algorithm)
print "Overall accuracy:", accuracy(classifier, test_set)
开发者ID:stanvp,项目名称:webcredibility,代码行数:7,代码来源:classifier.py
示例12: category_by_pos
def category_by_pos():
from nltk.corpus import brown
from nltk import FreqDist
from nltk import DecisionTreeClassifier
from nltk import NaiveBayesClassifier
from nltk import classify
suffix_fdist = FreqDist()
for word in brown.words():
word = word.lower()
suffix_fdist.inc(word[-1:])
suffix_fdist.inc(word[-2:])
suffix_fdist.inc(word[-3:])
common_suffixes = suffix_fdist.keys()[:100]
# print common_suffixes
def pos_features(word):
features = {}
for suffix in common_suffixes:
features['endswith(%s)' % suffix] = word.lower().endswith(suffix)
return features
tagged_words = brown.tagged_words(categories='news')
featuresets = [(pos_features(n), g) for (n, g) in tagged_words]
size = int(len(featuresets) * 0.1)
train_set, test_set = featuresets[size:], featuresets[:size]
# classifier = DecisionTreeClassifier.train(train_set)
# print 'Decision Tree %f' % classify.accuracy(classifier, test_set)
classifier = NaiveBayesClassifier.train(train_set)
print 'NaiveBay %f' % classify.accuracy(classifier, test_set)
开发者ID:brenden17,项目名称:infinity,代码行数:32,代码来源:category_nltk.py
示例13: demo
def demo():
def gender_features(word):
return {'last_letter': word[-1], 'penultimate_letter': word[-2]}
from nltk.classify import accuracy
from nltk.corpus import names
import random
names = ([(name, 'male') for name in names.words('male.txt')] +
[(name, 'female') for name in names.words('female.txt')])
import random
random.seed(60221023)
random.shuffle(names)
featuresets = [(gender_features(n), g) for (n,g) in names]
train_set, test_set = featuresets[500:], featuresets[:500]
print '--- nltk.classify.svm demo ---'
print 'Number of training examples:', len(train_set)
classifier = SvmClassifier.train(train_set)
print 'Total SVM dimensions:', len(classifier._svmfeatureindex)
print 'Label mapping:', classifier._labelmapping
print '--- Processing an example instance ---'
print 'Reference instance:', names[0]
print 'NLTK-format features:\n ' + str(test_set[0])
print 'SVMlight-format features:\n ' + str(map_instance_to_svm(test_set[0], classifier._labelmapping, classifier._svmfeatureindex))
distr = classifier.prob_classify(test_set[0][0])
print 'Instance classification and confidence:', distr.max(), distr.prob(distr.max())
print '--- Measuring classifier performance ---'
print 'Overall accuracy:', accuracy(classifier, test_set)
开发者ID:approximatelylinear,项目名称:nltk,代码行数:32,代码来源:svm.py
示例14: benchmarking
def benchmarking(self, classifier,_test_set,all_f_measure=[],all_precision=[],all_recall=[]):
from nltk import classify
accuracy = classify.accuracy(classifier, _test_set)
print("accuracy:",accuracy)
from nltk.metrics import precision
from nltk.metrics import recall
from nltk.metrics import f_measure
import collections
refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)
for i, (feats, label) in enumerate(_test_set):
refsets[label].add(i)
observed = classifier.classify(feats)
testsets[observed].add(i)
prec=precision(refsets['class'], testsets['class'])
rec=recall(refsets['class'], testsets['class'])
f1=f_measure(refsets['class'], testsets['class'])
print('precision:', prec)
print('recall:', rec)
print('F-measure:', f1)
all_f_measure.append(f1)
all_precision.append(prec)
all_recall.append(rec)
print('========Show top 10 most informative features========')
classifier.show_most_informative_features(10)
开发者ID:jerrygaoLondon,项目名称:oke-extractor,代码行数:30,代码来源:okeConceptRecogniser.py
示例15: test_raw_mail
def test_raw_mail(org_email):
features_test = {}
wordtokens_test = [word_limit.lemmatize(key.lower()) for key in
word_tokenize(org_email)]
for key in wordtokens_test:
if key not in stpwords:
features_test[key] = True
return features_test
#Extracting the features(Tonenized, stemmed and non-stopwords emails) from all the emails
feature_sets = [(raw_mail(n), g) for (n,g) in mail_shuffle]
#Splitting the test and training data sets from the whole email set features
size_feature = int(len(feature_sets) * 0.10)
train_set, test_set = feature_sets[size_feature:], feature_sets[:size_feature]
classifier = NaiveBayesClassifier.train(train_set)
#print (test_set[1:5])
#Printing the accuracy of the machine
print ('accuracy of the machine: ', (classify.accuracy(classifier,test_set))*100)
#Printing the top 50 features
classifier.show_most_informative_features(50)
#Printing the spam and ham labels
print ('labels:',classifier.labels())
#Classification of user entered email
while(True):
featset = raw_mail(input("Enter text to classify: "))
print (classifier.classify(featset))
开发者ID:Pooshan,项目名称:Project__spam-and-ham-detection-using-natural-language-processing,代码行数:32,代码来源:NLP-spam-ham.py
示例16: validate
def validate(self, validation_set):
if self.classifier is None:
raise Exception("self.classifier is None")
reference=defaultdict(set)
observed=defaultdict(set)
observed['neutral']=set()
for i, (tweet, label) in enumerate(validation_set):
reference[label].add(i)
observation=self.classify(tweet)
observed[observation].add(i)
acc=classify.accuracy(self.classifier, observed)
posp=precision(reference['positive'],observed['positive'])
posr=recall(reference['positive'], observed['positive'])
posf=f_measure(reference['positive'], observed['positive'])
negp=precision(reference['negative'],observed['negative'])
negr=recall(reference['negative'], observed['negative'])
negf=f_measure(reference['negative'], observed['negative'])
print "accuracy: %s" % acc
print "pos precision: %s" % posp
print "pos recall: %s" % posr
print "pos f-measure: %s" % posf
print "neg precision: %s" % negp
print "neg recall: %s" % negr
print "neg f-measure: %s" % negf
return (acc, posp, posr, posf, negp, negr, negf)
开发者ID:anov,项目名称:honors,代码行数:27,代码来源:classifier.py
示例17: cross_validation
def cross_validation(data_set, n_folds=8):
kf = KFold(len(data_set), n_folds=n_folds)
best_accuracy = -1
training_accuracy = 0
for train, cv in kf:
classifier = SklearnClassifier(
Pipeline([('tfidf', TfidfTransformer()),
('nb', LinearSVC(C=1, tol=0.000001))]))
training_data = data_set[0:cv[0]] + data_set[cv[-1]:]
cv_data = data_set[cv[0]:cv[-1]+1]
classifier.train(training_data)
accuracy = classify.accuracy(classifier, cv_data)
if accuracy > best_accuracy:
best_classifier = classifier
best_accuracy = accuracy
training_accuracy = classify.accuracy(classifier, training_data)
return best_classifier, training_accuracy, best_accuracy
开发者ID:name3anad,项目名称:SVMDemo,代码行数:17,代码来源:classifier.py
示例18: wsd_classifier
def wsd_classifier(trainer, word, features, stopwords_list = STOPWORDS, number=300, log=False, distance=3, confusion_matrix=False):
print "Reading data..."
global _inst_cache
if word not in _inst_cache:
_inst_cache[word] = [(i, i.senses[0]) for i in senseval.instances(word)]
events = _inst_cache[word][:]
senses = list(set(l for (i, l) in events))
instances = [i for (i, l) in events]
vocab = extract_vocab(instances, stopwords=stopwords_list, n=number)
print ' Senses: ' + ' '.join(senses)
# Split the instances into a training and test set,
#if n > len(events): n = len(events)
n = len(events)
random.seed(5444522)
random.shuffle(events)
training_data = events[:int(0.8 * n)]
test_data = events[int(0.8 * n):n]
# Train classifier
print 'Training classifier...'
classifier = trainer([(features(i, vocab, distance), label) for (i, label) in training_data])
# Test classifier
print 'Testing classifier...'
acc = accuracy(classifier, [(features(i, vocab, distance), label) for (i, label) in test_data] )
print 'Accuracy: %6.4f' % acc
if log==True:
#write error file
print 'Writing errors to errors.txt'
output_error_file = open('errors.txt', 'w')
errors = []
for (i, label) in test_data:
guess = classifier.classify(features(i, vocab, distance))
if guess != label:
con = i.context
position = i.position
item_number = str(test_data.index((i, label)))
word_list = []
for (word, tag) in con:
word_list.append(word)
hard_highlighted = word_list[position].upper()
word_list_highlighted = word_list[0:position] + [hard_highlighted] + word_list[position+1:]
sentence = ' '.join(word_list_highlighted)
errors.append([item_number, sentence, guess,label])
error_number = len(errors)
output_error_file.write('There are ' + str(error_number) + ' errors!' + '\n' + '----------------------------' +
'\n' + '\n')
for error in errors:
output_error_file.write(str(errors.index(error)+1) +') ' + 'example number: ' + error[0] + '\n' +
' sentence: ' + error[1] + '\n' +
' guess: ' + error[2] + '; label: ' + error[3] + '\n' + '\n')
output_error_file.close()
if confusion_matrix==True:
gold = [label for (i, label) in test_data]
derived = [classifier.classify(features(i,vocab)) for (i,label) in test_data]
cm = nltk.ConfusionMatrix(gold,derived)
print cm
return cm
开发者ID:natviv,项目名称:WSD_Rationales,代码行数:58,代码来源:wsd_code.py
示例19: testall_accuracy
def testall_accuracy(self, testset=[]):
# default test set is class field (all test files)
if not testset:
testset = self.test_feature_set_custom
print 'Measuring classifier performance...'
acc = accuracy(self.classifier, self.test_feature_set_custom)
print 'Overall accuracy:', acc
return acc
开发者ID:ananana,项目名称:authorship,代码行数:9,代码来源:authorship.py
示例20: buildClassifier
def buildClassifier(hamDir, spamDir):
spamEmails = []
hamEmails = []
allEmails = []
features = []
# Using glob instead of os.listdir to ignore hidden files
for email in glob.glob(spamDir + "/*"):
f = open(email)
spamEmails.append(f.read())
f.close()
for email in glob.glob(hamDir + "/*"):
f = open(email)
hamEmails.append(f.read())
f.close()
for email in spamEmails:
allEmails.append((email, 'spam'))
for email in hamEmails:
allEmails.append((email, 'ham'))
# Shuffle to get the accuracy of the 70:30 ratio. Otherwise, if no check were to be done, would not need to shuffle.
random.shuffle(allEmails)
# Make a list of feature per email
for (email, label) in allEmails:
features.append((emailFeatures(email), label))
# 70:30 ratio for training:testing
print "Using a 70:30 ratio for training:testing, the accuracy is as follows: "
totalSize = int(len(features) * 0.7)
trainingEmails, testingEmails = features[:totalSize], features[totalSize:]
print "training size: %d; testing size: %d" %(len(trainingEmails), len(testingEmails))
classifier = NaiveBayesClassifier.train(trainingEmails)
print classify.accuracy(classifier, testingEmails)
print "Now creating and saving a full size classifier made up of %d emails..." %len(features)
classifier = NaiveBayesClassifier.train(features)
saveClassifier(classifier, "full-classifier.pickle")
开发者ID:enmalik,项目名称:Spam-Filter,代码行数:44,代码来源:classifer.py
注:本文中的nltk.classify.accuracy函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论