本文整理汇总了Python中nltk.classify.MaxentClassifier类的典型用法代码示例。如果您正苦于以下问题:Python MaxentClassifier类的具体用法?Python MaxentClassifier怎么用?Python MaxentClassifier使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了MaxentClassifier类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: main_function
def main_function():
conn = MySQLdb.connect(host=DATABASES['date_cutoff']['HOST'],
user=DATABASES['date_cutoff']['USER'],
passwd=DATABASES['date_cutoff']['PASSWORD'],
db=DATABASES['date_cutoff']['NAME'])
training_tweets = classify.get_training_tweets(conn_analysis)
training_feature_set = process_tweets(training_tweets)
config_megam('/opt/packages')
classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0)
error_dict = {'+':0, '-':0, 'I':0, 'O':0}
count_dict = {'+':0, '-':0, 'I':0, 'O':0}
guess_dict = {'+':0, '-':0, 'I':0, 'O':0}
full_matrix = {'+':{'+':0, '-':0, 'I':0, 'O':0},
'-':{'+':0, '-':0, 'I':0, 'O':0},
'I':{'+':0, '-':0, 'I':0, 'O':0},
'O':{'+':0, '-':0, 'I':0, 'O':0}}
test_tweets = classify.get_test_tweets(conn_analysis)
test_feature_set = process_tweets(test_tweets)
classifier.show_most_informative_features(10)
classifier_accuracy = accuracy(classifier, test_feature_set)
print "classifier accuracy: " + repr(classifier_accuracy)
开发者ID:7andrew7,项目名称:vaccine-sentiment,代码行数:28,代码来源:max-ent-bigrams.py
示例2: __maxent_train
def __maxent_train(fs):
return MaxentClassifier.train(fs,
algorithm=algorithm,
gaussian_prior_sigma=gaussian_prior_sigma,
count_cutoff=count_cutoff,
min_lldelta=min_lldelta,
trace=trace)
开发者ID:Sandy4321,项目名称:nltk_contrib,代码行数:7,代码来源:chunk.py
示例3: main_function
def main_function():
conn = MySQLdb.connect(host=DATABASES['default']['HOST'],
user=DATABASES['default']['USER'],
passwd=DATABASES['default']['PASSWORD'],
db=DATABASES['default']['NAME'])
training_tweets = classify.get_training_tweets(conn_analysis)
training_feature_set = classify.process_tweets(training_tweets)
config_megam('/opt/packages')
classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0)
count_table = {'+':0, '-':0, 'I':0, 'O':0}
tweets = classify.get_tweets_to_classify(conn_analysis);
for tweet in tweets:
text = classify.get_tweet_text(conn_analysis, tweet[0])[0][0]
guess = classifier.classify(classify.process_tweet(text))
update_tweet_polarity(tweet[0], guess, conn_analysis)
count_table[guess] += 1
#For the tweets where polarity was determined manually, copy from
#majority_vote to auto_vote
fix_manual_tweets(conn_analysis)
print count_table
开发者ID:7andrew7,项目名称:vaccine-sentiment,代码行数:26,代码来源:full-dataset-max-ent.py
示例4: train
def train(self, d):
"""
Given a labeled set, train our classifier.
"""
t = self.__tag_data_set(d)
self.classifier = MaxentClassifier.train(t)
logging.info("Training on %s records complete." % len(d))
开发者ID:agness,项目名称:recipe_nltk,代码行数:7,代码来源:nltk_classifier.py
示例5: _train
def _train(self, algo='iis', trace=0, max_iter=10):
'''
Internal method to train and return a NLTK maxent classifier.
'''
data = [(p.text, p.quote) for p in train_query]
train_set = [(get_features(n), g) for (n, g) in data]
return MaxentClassifier.train(train_set, algorithm=algo, trace=trace, max_iter=max_iter)
开发者ID:bokas,项目名称:citizen-quotes,代码行数:7,代码来源:maxent.py
示例6: classify_maxent
def classify_maxent(X_train, Y_train, X_test):
training_input = X_train
training_output = Y_train
training_data = []
for i in range(len(training_input)):
training_data.append((training_input[i], training_output[i]))
clf = MaxentClassifier.train(training_data)
pred_labels = clf.classify_many(X_test)
return pred_labels
开发者ID:JoshuaW1990,项目名称:SentimentAnalysis,代码行数:9,代码来源:SentimentAnalysis.py
示例7: maxent_train
def maxent_train (self):
self.classifier_all = MaxentClassifier.train (self.maxent_memes_all, trace=100, max_iter=5)
#classifier_bottom = MaxentClassifier.train (maxent_memes_bottom, trace=100, max_iter=250)
#classifier_all = MaxentClassifier.train (maxent_memes_all, trace=100, max_iter=250)
weights = self.classifier_all.weights()
f = open ("lambdas.txt", "w")
for weight in weights:
f.write("weight = %f" % weight)
f.write ("\n")
开发者ID:AlexeyMK,项目名称:DATASS,代码行数:10,代码来源:NgramsManager.py
示例8: main_function
def main_function():
conn = MySQLdb.connect(host=DATABASES['default']['HOST'],
user=DATABASES['default']['USER'],
passwd=DATABASES['default']['PASSWORD'],
db=DATABASES['default']['NAME'])
training_tweets = classify.get_training_tweets(conn)
training_feature_set = classify.process_tweets(training_tweets)
bayes_classifier = NaiveBayesClassifier.train(training_feature_set)
count_table = {'+':0, '-':0, 'I':0, 'O':0}
test_tweets = classify.get_test_tweets(conn)
for tweet in test_tweets:
text = classify.get_tweet_text(conn, tweet[0])[0][0]
guess = bayes_classifier.classify(classify.process_tweet(text))
classify.update_tweet_polarity(tweet[0], guess, conn)
count_table[guess] += 1
print "Naive Bayes"
print count_table
count_table = {'+':0, '-':0, 'I':0, 'O':0}
config_megam('/opt/packages')
max_ent_classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0)
for tweet in test_tweets:
text = classify.get_tweet_text(conn, tweet[0])[0][0]
guess = max_ent_classifier.classify(classify.process_tweet(text))
update_tweet_polarity_ensemble(tweet[0], guess, conn)
count_table[guess] += 1
print "Maximum Entropy"
print count_table
#generate the accuracy matrix
full_matrix = {'+':{'+':0, '-':0, 'I':0, 'O':0},
'-':{'+':0, '-':0, 'I':0, 'O':0},
'I':{'+':0, '-':0, 'I':0, 'O':0},
'O':{'+':0, '-':0, 'I':0, 'O':0}}
for tweet in test_tweets:
result = classify.run_sql(conn, classify.Statements.CHECK_CONSENSUS % tweet[0])
guess = result[0][0]
actual_result = classify.run_sql(conn, classify.Statements.CHECK_MAJORITY % tweet[0])
actual = actual_result[0][0]
if guess is not None:
if actual is not None:
full_matrix[actual][guess] += 1
print full_matrix
开发者ID:7andrew7,项目名称:vaccine-sentiment,代码行数:55,代码来源:a-ensemble-bayes-max-ent.py
示例9: axentClassifier
def axentClassifier(features_train, features_test):
print 'train on %d instances, test on %d instances' % (len(features_train), len(features_test))
classifier = MaxentClassifier.train(features_train,algorithm='gis')
print 'accuracy:', nltk.classify.util.accuracy(classifier, features_test)
precisions, recalls = precision_recall(classifier, features_test)
print "accuracy: ", precisions, "fitness: ", recalls
# def sklearnMultinomialNB(features_train, features_test):
# print 'train on %d instances, test on %d instances' % (len(features_train), len(features_test))
# classifier = SklearnClassifier(MultinomialNB())
# classifier.train
# print 'accuracy:', nltk.classify.util.accuracy(classifier, features_test)
开发者ID:andylikescodes,项目名称:SentimentalAnalysis,代码行数:12,代码来源:Classifiers.py
示例10: run
def run(training):
"""
To create and train a MaxentClassifier
:return: a trained Classifier
"""
print "Training ME Classifier..."
# feats = label_feat_from_corps(movie_reviews)
# training, testing = split_label_feats(feats)
me_classifier = MaxentClassifier.train(training, algorithm='GIS', trace=0, max_iter=10, min_lldelta=0.5)
print "ME Classifier trained..."
return save_classifier(me_classifier)
开发者ID:Saher-,项目名称:SATC,代码行数:12,代码来源:Classifier_ME.py
示例11: trainMaxent
def trainMaxent(featuresets):
#idx = 2*len(featuresets) / ratio
#train_set, test_set = featuresets[idx:], featuresets[:idx]
train_set = featuresets
algo = MaxentClassifier.ALGORITHMS[1]
#max_iter=20
classifier = MaxentClassifier.train(train_set, algo, max_iter=3)
#print accuracy(classifier, test_set)
classifier.show_most_informative_features(100)
#train_set, test_set = featuresets[idx:], featuresets[:idx]
#classifier.train(train_set, algo, max_iter=20)
#print accuracy(classifier, test_set)
#classifier.show_most_informative_features(100)
return classifier
开发者ID:tkuboi,项目名称:eDetection_v2_1,代码行数:14,代码来源:classifyFace.py
示例12: train
def train(cls, training_sequence, **kwargs):
feature_detector = kwargs.get('feature_detector')
gaussian_prior_sigma = kwargs.get('gaussian_prior_sigma', 10)
count_cutoff = kwargs.get('count_cutoff', 1)
stopping_condition = kwargs.get('stopping_condition', 1e-7)
def __featurize(tagged_token):
tag = tagged_token[-1]
feats = feature_detector(tagged_token)
return (feats, tag)
labeled_featuresets = LazyMap(__featurize, training_sequence)
classifier = MaxentClassifier.train(labeled_featuresets,
algorithm='megam',
gaussian_prior_sigma=gaussian_prior_sigma,
count_cutoff=count_cutoff,
min_lldelta=stopping_condition)
return cls(classifier._encoding, classifier.weights())
开发者ID:Sandy4321,项目名称:nltk_contrib,代码行数:16,代码来源:train.py
示例13: trainCorpus
def trainCorpus():
if os.path.exists(classifier_fname):
return LoadClassifier()
else:
c = getDealsCorpus()
hiwords = corpus_high_info_words(c)
featdet = lambda words: bag_of_words_in_set(words, hiwords)
train_feats, test_feats = corpus_train_test_feats(c, featdet)
trainf = lambda train_feats: MaxentClassifier.train(train_feats, algorithm='megam', trace=0, max_iter=10)
labelset = set(c.categories())
classifiers = train_binary_classifiers(trainf, train_feats, labelset)
multi_classifier = MultiBinaryClassifier(*classifiers.items())
multi_p, multi_r, avg_md = multi_metrics(multi_classifier, test_feats)
print multi_p['activitiesevents'], multi_r['activitiesevents'], avg_md
SaveClassifier(multi_classifier)
return multi_classifier
开发者ID:shingjay,项目名称:dealchan,代码行数:16,代码来源:trainer.py
示例14: train
def train(self, featureset=None):
"""
Trains the maximum entropy classifier and returns it. If a
featureset is specified it trains on that, otherwise it trains on
the models featureset.
Pass in a featureset during cross validation.
Returns the training time and the classifier.
"""
featureset = featureset or self.featureset()
# Time how long it takes to train
start = time.time()
classifier = MaxentClassifier.train(featureset,
algorithm='megam', trace=1, gaussian_prior_sigma=1)
delta = time.time() - start
return classifier, delta
开发者ID:ericvsmith,项目名称:product-classifier,代码行数:19,代码来源:build.py
示例15: parse
def parse():
tagger_classes=([nltk.UnigramTagger, nltk.BigramTagger])
trained_sents, tagged_sents = trainer("WSJ_02-21.pos-chunk","WSJ_23.pos")
#tagger = nltk.UnigramTagger(trained_sents)
print len(trained_sents)
tagger = ClassifierBasedPOSTagger(train=trained_sents[:10000], classifier_builder=lambda train_feats:
MaxentClassifier.train(train_feats, trace = 0,max_iter=10))
f = open("WSJ_23.chunk",'w')
#print sents
for sents in tagged_sents:
(words,tags)=sents[0],sents[1]
chunks = tagger.tag(tags)
#print words, chunks
wtc = zip(words, chunks)
for tup in wtc:
f.write("%s\t%s\n" %(tup[0],tup[1][1]))
f.write("\n")
开发者ID:pratheeksh,项目名称:NLP,代码行数:20,代码来源:chunker.py
示例16: main_function
def main_function():
conn = MySQLdb.connect(
host=DATABASES["date_cutoff"]["HOST"],
user=DATABASES["date_cutoff"]["USER"],
passwd=DATABASES["date_cutoff"]["PASSWORD"],
db=DATABASES["date_cutoff"]["NAME"],
)
training_tweets = get_test_tweets(conn)
# training_feature_set = process_tweets(training_tweets)
total_word_count = total_words(conn)
training_feature_set = process_bigrams(conn, "+", total_word_count, best_words)
training_feature_set += process_bigrams(conn, "-", total_word_count, best_words)
training_feature_set += process_bigrams(conn, "I", total_word_count, best_words)
training_feature_set += process_bigrams(conn, "O", total_word_count, best_words)
print "configuring megam"
config_megam("/opt/packages")
print "starting training"
classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0)
print "starting end training"
classifier.show_most_informative_features(40)
test_tweets = get_training_tweets(conn)
test_feature_set = process_tweets(test_tweets)
classifier_accuracy = accuracy(classifier, test_feature_set)
# full_matrix = {'+':{'+':0, '-':0, 'I':0, 'O':0},
# '-':{'+':0, '-':0, 'I':0, 'O':0},
# 'I':{'+':0, '-':0, 'I':0, 'O':0},
# 'O':{'+':0, '-':0, 'I':0, 'O':0}}
# for f in test_tweets:
# guess = classifier.classify(process_tweet(f[1]))
# full_matrix[f[2]][guess] += 1
# print full_matrix
print "classifier accuracy: " + repr(classifier_accuracy)
开发者ID:khandelwal,项目名称:vaccine-sentiment,代码行数:40,代码来源:max-ent-sig-words-bigrams.py
示例17: _train_mode_for_user
def _train_mode_for_user(self, userid):
if userid in self._user_classifier:
print("Already exist!!!")
self._user_classifier[userid] = None
# 只需要好评和差评的,一般评论不做参考,所以INNER JOIN足够
sql = """ SELECT site_news.news_uuid, user_score.news_user_score as news_score FROM site_news
INNER JOIN user_score ON site_news.news_uuid = user_score.newsid
WHERE DATE(site_news.time) < CURRENT_DATE() AND DATE(site_news.time) > DATE_SUB(CURRENT_DATE(),INTERVAL 5 DAY)
AND user_score.news_user_score != 1 AND user_score.userid=%d; """ %(userid)
train_items = self.db_conn.query(sql);
print("建立POS/NEG特征")
pos_feature = []
neg_feature = []
for item in train_items:
news_vector = nlp_master.get_old_vect(item['news_uuid']);
if item['news_score'] == 0: #好评
pos_feature.append((self.best_word_features(news_vector, news_vector),'pos'))
elif item['news_score'] == 2: #差评
neg_feature.append((self.best_word_features(news_vector, news_vector),'neg'))
print("POS:%d, NEG:%d" %(len(pos_feature),len(neg_feature)))
if len(pos_feature) <= 3 or len(neg_feature) <=3:
print("特征太少,放弃。。。")
self._user_classifier[userid] = None
return
trainSet = pos_feature + neg_feature
self._user_classifier[userid] = MaxentClassifier.train(trainSet, max_iter=50)
print("MaxEnt Classifier for %d build done!"%(userid))
# 保存更新结果
today = datetime.date.today()
self.dumpfile = "dumpdir/recmaxent_dump.%d_%d" %(today.month, today.day)
with open(self.dumpfile,'wb', -1) as fp:
dump_data = []
dump_data.append(self._user_classifier)
pickle.dump(dump_data, fp, -1)
return
开发者ID:FashtimeDotCom,项目名称:readmeinfo,代码行数:39,代码来源:RecMaxEnt.py
示例18: __init__
def __init__(self):
try:
classifier = None
if not os.path.exists(classifier_path):
'''with open('nltk_sentiment_data/polarity_pos.txt', 'rb') as fp:
pos_lines = fp.readlines()
pos_feats = [(word_feats(tokenizer.tokenize(p_line)), '1') for p_line in pos_lines]
with open ('nltk_sentiment_data/polarity_neg.txt', 'rb') as fn:
neg_lines = fn.readlines()
neg_feats = [(word_feats(tokenizer.tokenize(n_line)), '0') for n_line in neg_lines]'''
filename = os.path.dirname(os.path.abspath(__file__)) + "/nltk_sentiment_data/sentiment_data_twitter.txt"
with open(filename, 'rb') as fp:
lines = fp.readlines()
feats =[(word_feats(tokenizer.tokenize(line.split(' -> ')[1].strip().lower())), line.split(' -> ')[0]) for line in lines if len(line.split(' -> ')) >=2]
print "Total : %s" %(len(feats),)
cutoff = int(len(feats)*0.1)
trainfeats, testfeats = feats[cutoff:], feats[:cutoff]
'''cutoff = int(len(pos_feats) * 0.1)
trainfeats = pos_feats[cutoff:] + neg_feats[cutoff:]
testfeats = pos_feats[:cutoff] + neg_feats[:cutoff]'''
print 'train on %d instances, test on %d instances' % (len(trainfeats), len(testfeats))
#classifier = NaiveBayesClassifier.train(trainfeats)
classifier = MaxentClassifier.train(trainfeats, algorithm='iis', trace=0, max_iter=10)
print 'accuracy:', nltk.classify.util.accuracy(classifier, testfeats)
classifier.show_most_informative_features()
with open(classifier_path, "w") as fh:
cPickle.dump(classifier, fh, 1)
else:
with open(classifier_path, "r") as fh:
classifier = cPickle.load(fh)
self.classifier = classifier
logger.info("Initialized SentimentClassifier instance..")
except Exception, e:
logger.exception(e)
raise e
开发者ID:Rahulaswani,项目名称:TweetSherlock,代码行数:39,代码来源:sentimentClassifier.py
示例19: evaluate_classifier
def evaluate_classifier(featx,collocationFunc):
negids = movie_reviews.fileids('neg')
posids = movie_reviews.fileids('pos')
negfeats = [(featx(movie_reviews.words(fileids=[f]),collocationFunc), 'neg') for f in negids]
posfeats = [(featx(movie_reviews.words(fileids=[f]),collocationFunc), 'pos') for f in posids]
lenNegFeats=min(len(negfeats),400)
lenPosFeats=min(len(posfeats),400)
# lenNegFeats=len(negfeats)
# lenPosFeats=len(posfeats)
negcutoff = int(lenNegFeats*3/4)
poscutoff = int(lenPosFeats*3/4)
trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
testfeats = negfeats[negcutoff:lenNegFeats] + posfeats[poscutoff:lenPosFeats]
classifier = MaxentClassifier.train(trainfeats,algorithm='IIS',max_iter=3)
print(classifier)
refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)
print(classifier)
for i, (feats, label) in enumerate(testfeats):
refsets[label].add(i)
observed = classifier.classify(feats)
testsets[observed].add(i)
evaluationMetrics={}
classifier.show_most_informative_features()
evaluationMetrics['model']=classifier
evaluationMetrics['trainingData']=trainfeats
evaluationMetrics['accuracy']=nltk.classify.util.accuracy(classifier, testfeats)
evaluationMetrics['posPrec']=nltk.precision(refsets['pos'], testsets['pos'])
evaluationMetrics['posRecall']=nltk.recall(refsets['pos'], testsets['pos'])
evaluationMetrics['posF_Score']=nltk.f_measure(refsets['pos'], testsets['pos'])
evaluationMetrics['negPrec']=nltk.precision(refsets['neg'], testsets['neg'])
evaluationMetrics['negRecall']=nltk.recall(refsets['neg'], testsets['neg'])
evaluationMetrics['negF_Score']=nltk.f_measure(refsets['neg'], testsets['neg'])
return evaluationMetrics
开发者ID:alokkumary2j,项目名称:Sentiment-Analysis-Using-Python-NLTK,代码行数:38,代码来源:MaxEntSentimentAnalysis.py
示例20: getClassifier
def getClassifier(tweetfile):
print "Loading content & preparing text"
content = prepText(loadFile(tweetfile))
print "Categorizing contents"
categorized = prepClassifications(content)
print "Deriving NGrams"
NGrammized = collectNGrams(categorized,degreesToUse)
print "Compiling Results"
readyToSend = []
for category in NGrammized.keys():
readyToSend += NGrammized[category]
print "Attempting Classification"
if classMode == 'naive bayes':
from nltk.classify import NaiveBayesClassifier
classifier = NaiveBayesClassifier.train(readyToSend)
elif classMode == 'max ent':
from nltk.classify import MaxentClassifier
classifier = MaxentClassifier.train(readyToSend)
print
classifier.show_most_informative_features(n=200)
classifier.show_most_informative_features()
return classifier
开发者ID:jschlitt84,项目名称:TwitterGIStudy,代码行数:24,代码来源:TweetMatch.py
注:本文中的nltk.classify.MaxentClassifier类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论