本文整理汇总了Python中nltk.classify.NaiveBayesClassifier类的典型用法代码示例。如果您正苦于以下问题:Python NaiveBayesClassifier类的具体用法?Python NaiveBayesClassifier怎么用?Python NaiveBayesClassifier使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了NaiveBayesClassifier类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: train
def train(test=False):
negids = movie_reviews.fileids('neg')
posids = movie_reviews.fileids('pos')
negfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'neg') for f in negids]
posfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'pos') for f in posids]
if(test):
negcutoff = len(negfeats)*3/4
poscutoff = len(posfeats)*3/4
trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]
print 'train on %d instances, test on %d instances' % (len(trainfeats), len(testfeats))
classifier = NaiveBayesClassifier.train(trainfeats)
print 'accuracy:', nltk.classify.util.accuracy(classifier, testfeats)
classifier.show_most_informative_features()
else:
return NaiveBayesClassifier.train(negfeats+posfeats)
开发者ID:jnu,项目名称:texecutions,代码行数:26,代码来源:sentiment.py
示例2: train_and_show_results
def train_and_show_results(pos, neg, pos_bigrams, neg_bigrams, pos_control, neg_control, pos_control_bigrams, neg_control_bigrams):
if pos_control == None or neg_control == None or pos_control_bigrams == None or neg_control_bigrams == None:
negcutoff = len(neg)*3/4
poscutoff = len(pos)*3/4
neg_bigrams_cutoff = len(neg_bigrams)*3/4
pos_bigrams_cutoff = len(pos_bigrams)*3/4
test_bag_of_words = neg[negcutoff:] + pos[poscutoff:]
test_bigrams = neg_bigrams[neg_bigrams_cutoff:] + pos_bigrams[pos_bigrams_cutoff:]
train_corpora_bag_of_words = neg[:negcutoff] + pos[:poscutoff]
train_corpora_bigrams = neg_bigrams[:neg_bigrams_cutoff] + pos_bigrams[:pos_bigrams_cutoff]
else:
test_bag_of_words = neg_control + pos_control
test_bigrams = neg_control_bigrams + pos_control_bigrams
train_corpora_bag_of_words = neg+pos
train_corpora_bigrams = neg_bigrams + pos_bigrams
print "negative corpus: ", len(neg)
print "positive corpus: ", len(pos)
if neg_control != None:
print "negative test corpus: ", len(neg_control)
print "positive test corpus: ", len(pos_control)
print 'bag of words and bigrams - Naive Bayes'
naive_bayes = NaiveBayesClassifier.train(train_corpora_bag_of_words)
naive_bayes_bigrams = NaiveBayesClassifier.train(train_corpora_bigrams)
save_dataset('naive_bayes.dat', naive_bayes)
save_dataset('naive_bayes_bigrams.dat', naive_bayes_bigrams)
print 'bag of words and bigrams - Maximum Entropy'
maximum_entropy = nltk.MaxentClassifier.train(train_corpora_bag_of_words, max_iter=2)
maximum_entropy_bigrams = nltk.MaxentClassifier.train(train_corpora_bigrams, max_iter=2)
save_dataset('maximum_entropy.dat', maximum_entropy)
save_dataset('maximum_entropy_bigrams.dat', maximum_entropy_bigrams)
print 'Naive Bayesian results'
print 'bag of words'
print 'Accuracy:', nltk.classify.util.accuracy(naive_bayes, test_bag_of_words)
naive_bayes.show_most_informative_features()
print_precision_recall(naive_bayes, test_bag_of_words)
print '\nbigrams'
print 'Accuracy:', nltk.classify.util.accuracy(naive_bayes_bigrams, test_bigrams)
naive_bayes_bigrams.show_most_informative_features()
print_precision_recall(naive_bayes_bigrams, test_bigrams)
print 'Maximum Entropy results'
print 'bag of words'
print 'Accuracy:', nltk.classify.util.accuracy(maximum_entropy, test_bag_of_words)
maximum_entropy.show_most_informative_features()
print_precision_recall(maximum_entropy, test_bag_of_words)
print '\nbigrams'
print 'Accuracy:', nltk.classify.util.accuracy(maximum_entropy_bigrams, test_bigrams)
maximum_entropy_bigrams.show_most_informative_features()
print_precision_recall(maximum_entropy_bigrams, test_bigrams)
开发者ID:gleicon,项目名称:sentiment_analysis,代码行数:60,代码来源:train_classifier.py
示例3: evaluate_features
def evaluate_features(feature_select):
posFeatures = []
negFeatures = []
inposFeatures = []
innegFeatures = []
#http://stackoverflow.com/questions/367155/splitting-a-string-into-words-and-punctuation
#breaks up the sentences into lists of individual words (as selected by the input mechanism) and appends 'pos' or 'neg' after each list
with open(RT_POLARITY_POS_FILE, 'r') as posSentences:
for i in posSentences:
posWords = re.findall(r"[\w']+|[.,!?;]", i.rstrip())
posWords = [feature_select(posWords), 'pos']
posFeatures.append(posWords)
with open(RT_POLARITY_NEG_FILE, 'r') as negSentences:
for i in negSentences:
negWords = re.findall(r"[\w']+|[.,!?;]", i.rstrip())
negWords = [feature_select(negWords), 'neg']
negFeatures.append(negWords)
"""
with open(RT_INPUT_POS_FILE, 'r') as posSentences:
for i in posSentences:
inposWords = re.findall(r"[\w']+|[.,!?;]", i.rstrip())
inposWords = [feature_select(inposWords), 'pos']
inposFeatures.append(inposWords)
"""
with open(RT_INPUT_NEG_FILE, 'r') as negSentences:
for i in negSentences:
innegWords = re.findall(r"[\w']+|[.,!?;]", i.rstrip())
innegWords = [feature_select(innegWords), 'neg']
innegFeatures.append(innegWords)
#selects 3/4 of the features to be used for training and 1/4 to be used for testing
#posCutoff = int(math.floor(len(posFeatures)*3/4))
#negCutoff = int(math.floor(len(negFeatures)*3/4))
trainFeatures = posFeatures + negFeatures
testFeatures = innegFeatures #+ inposFeatures
#trains a Naive Bayes Classifier
classifier = NaiveBayesClassifier.train(trainFeatures)
#initiates referenceSets and testSets
referenceSets = collections.defaultdict(set)
testSets = collections.defaultdict(set)
fileOutput ={'key':[],'pos':[],'neg':[]}
#puts correctly labeled sentences in referenceSets and the predictively labeled version in testsets
for i, (features, label) in enumerate(testFeatures):
#print features , label
referenceSets[label].add(i)
predicted = classifier.prob_classify(features)
print "\n"
fileOutput['key'].append(i)
fileOutput['pos'].append(predicted.prob("pos"))
fileOutput['neg'].append(predicted.prob("neg"))
#posValues = predicted.prob("pos")
#negValues = predicted.prob("neg")
fileOutput.values()
testSets[predicted].add(i)
#print i
#print testSets[predicted]
return fileOutput
开发者ID:hmanikfan,项目名称:ABCDEFG,代码行数:60,代码来源:ML_NaiveBayes_ProbabilisticClassifier.py
示例4: classify_and_evaluate
def classify_and_evaluate(reviews, feature_extractor=word_feats):
random.shuffle(reviews)
pos_reviews = filter(lambda x: x['class'] == 'POSITIVE', reviews)
neg_reviews = filter(lambda x: x['class'] == 'NEGATIVE', reviews)
# get unique features
pos_features = []
neg_features = []
for review in pos_reviews:
split_reviews = review['text'].split(' ')
split_reviews = [x for x in split_reviews if x]
pos_features.append((feature_extractor(split_reviews), 'pos'))
for review in neg_reviews:
split_reviews = review['text'].split(' ')
split_reviews = [x for x in split_reviews if x]
neg_features.append((feature_extractor(split_reviews), 'neg'))
# divide groups
pos_offset = int(math.floor(len(pos_reviews) * 3 / 4))
neg_offset = int(math.floor(len(neg_reviews) * 3 / 4))
training = pos_features[:pos_offset] + neg_features[:neg_offset]
testing = pos_features[pos_offset:] + neg_features[neg_offset:]
# train classifier
classifier = NaiveBayesClassifier.train(training)
print 'treinada em %d reviews, testada em %d reviews' % (len(training), len(testing))
print 'accuracy:', nltk.classify.util.accuracy(classifier, testing)
classifier.show_most_informative_features()
开发者ID:teago19,项目名称:sentimentAnalysis,代码行数:32,代码来源:classify.py
示例5: evaluate_classifier
def evaluate_classifier(featx):
negids = movie_reviews.fileids('neg')
posids = movie_reviews.fileids('pos')
negfeats = [(featx(movie_reviews.words(fileids=[f])), 'neg') for f in negids]
posfeats = [(featx(movie_reviews.words(fileids=[f])), 'pos') for f in posids]
negcutoff = len(negfeats)*3/4
poscutoff = len(posfeats)*3/4
trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]
classifier = NaiveBayesClassifier.train(trainfeats)
refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)
for i, (feats, label) in enumerate(testfeats):
refsets[label].add(i)
observed = classifier.classify(feats)
testsets[observed].add(i)
print 'accuracy:', nltk.classify.util.accuracy(classifier, testfeats)
print 'pos precision:', nltk.metrics.precision(refsets['pos'], testsets['pos'])
print 'pos recall:', nltk.metrics.recall(refsets['pos'], testsets['pos'])
print 'neg precision:', nltk.metrics.precision(refsets['neg'], testsets['neg'])
print 'neg recall:', nltk.metrics.recall(refsets['neg'], testsets['neg'])
classifier.show_most_informative_features()
开发者ID:zhougr1993,项目名称:Bayes_kick_momo_spam,代码行数:28,代码来源:test_sentiment.py
示例6: main
def main():
# vote_file = '/Users/nasrallah/Desktop/Insight/courtcast/data/supreme_court_dialogs_corpus_v1.01/supreme.votes.txt'
# votes = get_justice_votes(vote_file)
# for v in votes: print(v, votes[v])
# win_file = '/Users/nasrallah/Desktop/Insight/courtcast/data/supreme_court_dialogs_corpus_v1.01/supreme.outcome.txt'
# winners = get_winners(win_file)
# for w in winners: print(w, winners[w])
text_file = '/Users/nasrallah/Desktop/Insight/courtcast/data/supreme_court_dialogs_corpus_v1.01/supreme.conversations.txt'
#text_file = '/Users/nasrallah/Desktop/some_text.txt'
## Extract the feature sets
feature_sets = get_training_features(text_file)
## Shuffle the features to mix up pos and neg
#random.shuffle(feature_sets)
## Separate into train and test sets
cutoff = int(len(feature_sets)*3/4)
train_feature_sets = feature_sets[:cutoff]
test_feature_sets = feature_sets[cutoff:]
print('train on %d instances, test on %d instances' % (len(train_feature_sets), len(test_feature_sets)))
classifier = NaiveBayesClassifier.train(train_feature_sets)
print('accuracy:', nltk.classify.util.accuracy(classifier, test_feature_sets))
classifier.show_most_informative_features()
开发者ID:FrankYoshida,项目名称:CourtCast,代码行数:28,代码来源:training.py
示例7: evaluate_classifier
def evaluate_classifier(featx):
#negids = movie_reviews.fileids('neg')
#posids = movie_reviews.fileids('pos')
##For Movie Review train:
#negfeats = [(featx(movie_reviews.words(fileids=[f])), 'neg') for f in negids]
#posfeats = [(featx(movie_reviews.words(fileids=[f])), 'pos') for f in posids]
##For product reviews train:
negfeats = [(featx([wrd for wrd in nltk.word_tokenize(con) if wrd not in stpwrds]), 'neg') for con in traincons]
posfeats = [(featx([wrd for wrd in nltk.word_tokenize(pro) if wrd not in stpwrds]), 'pos') for pro in trainpros]
negcutoff = len(negfeats)*3/4
poscutoff = len(posfeats)*3/4
trainfeats = negfeats[:] + posfeats[:]
#trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]
classifier = NaiveBayesClassifier.train(trainfeats)
refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)
for i, (feats, label) in enumerate(testfeats):
refsets[label].add(i)
observed = classifier.classify(feats)
testsets[observed].add(i)
print 'accuracy:', nltk.classify.util.accuracy(classifier, testfeats)
print 'pos precision:', nltk.metrics.precision(refsets['pos'], testsets['pos'])
print 'pos recall:', nltk.metrics.recall(refsets['pos'], testsets['pos'])
print 'neg precision:', nltk.metrics.precision(refsets['neg'], testsets['neg'])
print 'neg recall:', nltk.metrics.recall(refsets['neg'], testsets['neg'])
classifier.show_most_informative_features()
return classifier
开发者ID:pdk2015,项目名称:productReview,代码行数:35,代码来源:productReview.py
示例8: __init__
def __init__(self):
# neg_phrases = filter_negative_phrases(load_csv_sentences('thoughtsandfeelings.csv'))
# pos_phrases = filter_positive_phrases(load_csv_sentences('spiritualforums.csv'))
neg_file = open("neg_phrases.txt", "r")
pos_file = open("pos_phrases.txt", "r")
neg_phrases = neg_file.readlines()
pos_phrases = pos_file.readlines()
neg_phrases_tagged = []
pos_phrases_tagged = []
for phrase in neg_phrases:
neg_phrases_tagged.append((word_feats(phrase.split()), 'suicidal'))
for phrase in pos_phrases:
pos_phrases_tagged.append((word_feats(phrase.split()), 'alright'))
negcutoff = int(len(neg_phrases_tagged) * .8)
poscutoff = int(len(pos_phrases_tagged) * .8)
trainfeats = neg_phrases_tagged[:negcutoff] + pos_phrases_tagged[:poscutoff]
testfeats = neg_phrases_tagged[negcutoff:] + pos_phrases_tagged[poscutoff:]
print 'train on %d instances, test on %d instances' % (len(trainfeats), len(testfeats))
self.classifier = NaiveBayesClassifier.train(trainfeats)
print 'accuracy:', nltk.classify.util.accuracy(self.classifier, testfeats)
self.classifier.show_most_informative_features()
开发者ID:amcnary,项目名称:cs294SuicideDetector,代码行数:25,代码来源:nltk_classify_bag_of_words.py
示例9: naiveBayes
def naiveBayes(features_train, features_test):
print 'train on %d instances, test on %d instances' % (len(features_train), len(features_test))
classifier = NaiveBayesClassifier.train(features_train)
print 'accuracy:', nltk.classify.util.accuracy(classifier, features_test)
classifier.show_most_informative_features()
precisions, recalls = precision_recall(classifier, features_test)
print "accuracy: ", precisions, "fitness: ", recalls
开发者ID:andylikescodes,项目名称:SentimentalAnalysis,代码行数:7,代码来源:Classifiers.py
示例10: classify
def classify(self):
# Classify
articles = Article.objects.filter(entity=self.entity)
def word_feats(body):
words = body.split(" ")
return dict([(word, True) for word in words])
negids = articles.filter(score__lt=0)
posids = articles.filter(score__gt=0)
negfeats = [(word_feats(a.body), "neg") for a in negids]
posfeats = [(word_feats(a.body), "pos") for a in posids]
negcutoff = len(negfeats) * 3 / 4
poscutoff = len(posfeats) * 3 / 4
trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]
print "train on %d instances, test on %d instances" % (len(trainfeats), len(testfeats))
classifier = NaiveBayesClassifier.train(trainfeats)
print "accuracy:", nltk.classify.util.accuracy(classifier, testfeats)
classifier.show_most_informative_features()
开发者ID:funkotron,项目名称:sentience,代码行数:25,代码来源:spider.py
示例11: __init_naive_bayes
def __init_naive_bayes(self):
"""
__init_naive_bayes(self):
Gets the data from the positive, negative and neutral text files.
Creates and trains the Naive Bayes classifier, using the data, so
that it can learn what constitutes a positive, negative or neutral tweet.
"""
try:
pos_file = pjoin(sys.path[0], "sentiment_word_files", "tweets_positive.txt")
f = codecs.open(pos_file, mode="rU", encoding='utf-8')
positive = [line.lower().replace("\n" , " ") for line in f]
positive = "".join(word[:] for word in positive).split()
f.close
neu_file = pjoin(sys.path[0], "sentiment_word_files", "tweets_neutral.txt")
f = codecs.open(neu_file, mode="rU", encoding='utf-8')
neutral = [line.lower().replace("\n" , " ") for line in f]
neutral = "".join(word[:] for word in neutral).split()
f.close
neg_file = pjoin(sys.path[0], "sentiment_word_files", "tweets_negative.txt")
f = codecs.open(neg_file, mode="rU", encoding='utf-8')
negative = [line.lower().replace("\n" , " ") for line in f]
negative = "".join(word[:] for word in negative).split()
f.close
posfeats = [(dict({word.lower() : True}), 'pos') for word in positive if self.__check_word(word)]
neufeats = [(dict({word.lower() : True}), 'neu') for word in neutral if self.__check_word(word)]
negfeats = [(dict({word.lower() : True}), 'neg') for word in negative if self.__check_word(word)]
self.classifier = NaiveBayesClassifier.train( posfeats + neufeats + negfeats )
except:
raise Exception ("Unknown error in SentimentAnalyzer::__init_naive_bayes")
开发者ID:StrongBrain,项目名称:test_twitter,代码行数:35,代码来源:sentiment_analyzer.py
示例12: generate_sentiment_classifier
def generate_sentiment_classifier(corpus, word_feats):
negids = corpus.fileids('neg')
posids = corpus.fileids('pos')
negfeats = [(word_feats(corpus.words(fileids=[f])), 'neg') for f in negids]
posfeats = [(word_feats(corpus.words(fileids=[f])), 'pos') for f in posids]
negcutoff = len(negfeats)*3/4
poscutoff = len(posfeats)*3/4
trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]
print 'train on %d instances, test on %d instances' % (len(trainfeats), len(testfeats))
refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)
classifier = NaiveBayesClassifier.train(trainfeats)
for i, (feats, label) in enumerate(testfeats):
refsets[label].add(i)
observed = classifier.classify(feats)
testsets[observed].add(i)
print 'accuracy:', nltk.classify.util.accuracy(classifier, testfeats)
print 'pos precision:', nltk.metrics.precision(refsets['pos'], testsets['pos'])
print 'pos recall:', nltk.metrics.recall(refsets['pos'], testsets['pos'])
print 'neg precision:', nltk.metrics.precision(refsets['neg'], testsets['neg'])
print 'neg recall:', nltk.metrics.recall(refsets['neg'], testsets['neg'])
classifier.show_most_informative_features()
return classifier
开发者ID:rohankshir,项目名称:football,代码行数:34,代码来源:util.py
示例13: main
def main():
org_names = Org.objects.values_list('name', flat=True)
users = User.objects.filter(likely_org=False)
user_names = [user.get_name for user in users]
# Exclude the users we know are orgs (exact same name). This mostly gets run the first time and for new users with org names
non_org_user_names = set(user_names) - set(org_names)
org_features = [(word_features(name), 'org') for name in org_names]
user_features = [(word_features(name), 'user') for name in non_org_user_names]
classifier = NaiveBayesClassifier.train(user_features + org_features)
counter = 0
likely_orgs = []
for user in users:
prediction = classifier.prob_classify(word_features(user.get_name))
if prediction.max() == 'org':
# Log probability ratio, so if P(org) == 2.4 and P(user) == 0.3 then log2(P(org)/P(user)) = log2(8.0) = 3.0
ratio = math.log(((float(prediction.prob('org')) + NORMALIZING_CONST) / (float(prediction.prob('user')) + NORMALIZING_CONST)), 2)
if ratio >= MIN_RATIO and user.likely_org == False and user.admin_classification != 'user':
log.info('User ID %d with name "%s" is probably an org. Saving.' % (user.id, user.get_name))
user.likely_org = True
user.org_probability = ratio
user.save()
counter += 1
log.info("Processed %d users with org-like names" % counter)
开发者ID:Bartelo,项目名称:openjumo,代码行数:30,代码来源:user_is_org.py
示例14: create_train_classifier
def create_train_classifier():
print "Recreating training classifier"
corpus_dir = nltk.data.find(TRAIN_DATASET_LOC)
train_data = nltk.corpus.CategorizedPlaintextCorpusReader(corpus_dir, fileids='.*\.txt',cat_pattern="(pos|neg)")
negids_train = train_data.fileids('neg')
posids_train = train_data.fileids('pos')
# negids_movies = movie_reviews.fileids('neg')
# posids_movies = movie_reviews.fileids('pos')
negfeats = [(__word_feats_neg(train_data.words(fileids=[f])), 'neg') for f in negids_train]
posfeats = [(__word_feats_pos(train_data.words(fileids=[f])), 'pos') for f in posids_train]
# negfeats.extend([(__word_feats_neg(movie_reviews.words(fileids=[f])), 'neg') for f in negids_movies])
# posfeats.extend([(__word_feats_pos(movie_reviews.words(fileids=[f])), 'pos') for f in posids_movies])
trainfeats = negfeats + posfeats
classifier = NaiveBayesClassifier.train(trainfeats)
pos_file_name = 'pickles'+os.sep+'positive_train.pickle'
neg_file_name = 'pickles'+os.sep+'negative_train.pickle'
class_file_name = 'pickles'+os.sep+'nbClassifier.pickle'
__write_file(pos_file_name,cPickle.dumps(posfeats))
__write_file(neg_file_name,cPickle.dumps(negfeats))
__write_file(class_file_name,cPickle.dumps(classifier))
print "Done!"
开发者ID:Eman-Naguib,项目名称:TwitterSentiment,代码行数:30,代码来源:recreate_pickles.py
示例15: naivebayes
def naivebayes(trainfeats, testfeats):
classifier = NaiveBayesClassifier.train(trainfeats)
print "NaiveBayes output"
print 'train on %d instances, test on %d instances' % (len(trainfeats), len(testfeats))
print 'accuracy:', nltk.classify.util.accuracy(classifier, testfeats)
print classifier.show_most_informative_features()
开发者ID:shachi04,项目名称:PSL_sentiment,代码行数:7,代码来源:baseline.py
示例16: evaluateFeatures
def evaluateFeatures(featureSelect):
posFeatures = []
negFeatures = []
with open(RT_POLARITY_POS_FILE, 'r') as posSentences:
for i in posSentences:
posWords = re.findall(r"[\w']+|[.,!?;]", i.rstrip())
posWords = [featureSelect(posWords), 'pos']
posFeatures.append(posWords)
with open(RT_POLARITY_NEG_FILE, 'r') as negSentences:
for i in negSentences:
negWords = re.findall(r"[\w']+|[.,!?;]", i.rstrip())
negWords = [featureSelect(negWords), 'neg']
negFeatures.append(negWords)
posCutoff = int(math.floor(len(posFeatures)*3/4))
negCutoff = int(math.floor(len(negFeatures)*3/4))
#trainFeatures = posFeatures[:posCutoff] + negFeatures[:negCutoff]
testFeatures = posFeatures[posCutoff:] + negFeatures[negCutoff:]
trainFeatures = posFeatures + negFeatures
print testFeatures[0]
classifier = NaiveBayesClassifier.train(trainFeatures)
referenceSets = collections.defaultdict(set)
testSets = collections.defaultdict(set)
for i, (features, label) in enumerate(testFeatures):
referenceSets[label].add(i)
predicted = classifier.classify(features)
#print features
#print predicted
testSets[predicted].add(i)
开发者ID:Sapphirine,项目名称:MyTravelAgent,代码行数:33,代码来源:Sentiment.py
示例17: evaluate_classifier_Naive
def evaluate_classifier_Naive(featx):
train_negids = train.fileids('neg')
train_posids = train.fileids('pos')
test_negids = test.fileids('neg')
test_posids = test.fileids('pos')
train_negfeats = [(featx(train.words(fileids=[f])), 'neg') for f in train_negids]
train_posfeats = [(featx(train.words(fileids=[f])), 'pos') for f in train_posids]
test_negfeats = [(featx(test.words(fileids=[f])), 'neg') for f in test_negids]
test_posfeats = [(featx(test.words(fileids=[f])), 'pos') for f in test_posids]
trainfeats = train_negfeats + train_posfeats
testfeats = test_negfeats + test_posfeats
Naive_classifier = NaiveBayesClassifier.train(trainfeats)
refsets = collections.defaultdict(set)
testsets_Naive = collections.defaultdict(set)
for i, (feats, label) in enumerate(testfeats):
refsets[label].add(i)
observed_Naive = Naive_classifier.classify(feats)
testsets_Naive[observed_Naive].add(i)
accuracy1 = nltk.classify.util.accuracy(Naive_classifier, testfeats)
pos_precision1 = nltk.metrics.precision(refsets['pos'], testsets_Naive['pos'])
pos_recall1 = nltk.metrics.recall(refsets['pos'], testsets_Naive['pos'])
neg_precision1 = nltk.metrics.precision(refsets['neg'], testsets_Naive['neg'])
neg_recall1 = nltk.metrics.recall(refsets['neg'], testsets_Naive['neg'])
Naive_classifier.show_most_informative_features(50)
return(['NaiveBayes',accuracy1,pos_precision1,pos_recall1,neg_precision1,neg_recall1])
开发者ID:Haoychen,项目名称:Movie-Review-Sentiment-Analysis,代码行数:31,代码来源:evaluation.py
示例18: classify
def classify():
#corpus = 'Cornell_text_polarity'
#corpus = 'BingLiu_selected_sentences'
corpus = 'Cornell_sentence_polarity'
cases = load_corpus(corpus)
features = get_word_features(cases)
train_feats = []
test_feats = []
for polarity, feats in features.items():
#cutoff = len(feats) * 1 / 4
cutoff = 1000
print polarity, 'number of train:', cutoff
#train_feats += feats[:cutoff]
#test_feats += feats[cutoff:]
temp_feats = feats[:]
random.shuffle(temp_feats)
train_feats += temp_feats[:cutoff]
test_feats += temp_feats[cutoff:]
print 'train on %d instances, test on %d instances' % (len(train_feats), len(test_feats))
classifier = NaiveBayesClassifier.train(train_feats)
print 'accuracy:', nltk.classify.util.accuracy(classifier, test_feats)
classifier.show_most_informative_features()
开发者ID:yxw,项目名称:hacknlp,代码行数:25,代码来源:bayes_analyzer.py
示例19: classification
def classification(self):
fstruct = FeatStruct(self.train_reviews)
classifier = NaiveBayesClassifier.train(fstruct)
print 'accuracy:', nltk.classify.util.accuracy(classifier, self.test_reviews)
classifier.show_most_informative_features()
开发者ID:karthik-chandrasekar,项目名称:SentimentalAnalysis,代码行数:7,代码来源:senti_analysis.py
示例20: main_function
def main_function():
conn = MySQLdb.connect(host=DATABASES['ensemble']['HOST'],
user=DATABASES['ensemble']['USER'],
passwd=DATABASES['ensemble']['PASSWORD'],
db=DATABASES['ensemble']['NAME'])
training_tweets = classify.get_training_tweets(conn)
training_feature_set = classify.process_tweets(training_tweets)
classifier = NaiveBayesClassifier.train(training_feature_set)
error_dict = {'+':0, '-':0, 'I':0, 'O':0}
count_dict = {'+':0, '-':0, 'I':0, 'O':0}
guess_dict = {'+':0, '-':0, 'I':0, 'O':0}
count_table = {'+':0, '-':0, 'I':0, 'O':0}
tweets = classify.get_tweets_to_classify(conn);
for tweet in tweets:
text = classify.get_tweet_text(conn, tweet[0])[0][0]
guess = classifier.classify(classify.process_tweet(text))
classify.update_tweet_polarity(tweet[0], guess, conn)
count_table[guess] += 1
#fix_manual_tweets(conn_analysis)
classify.run_sql(conn, classify.Statements.UPDATE_MANUAL_CLASSIFIED)
print count_table
print full_matrix
开发者ID:7andrew7,项目名称:vaccine-sentiment,代码行数:28,代码来源:full-dataset-bayes.py
注:本文中的nltk.classify.NaiveBayesClassifier类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论