本文整理汇总了Python中nltk.classify.scikitlearn.SklearnClassifier类的典型用法代码示例。如果您正苦于以下问题:Python SklearnClassifier类的具体用法?Python SklearnClassifier怎么用?Python SklearnClassifier使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SklearnClassifier类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: classifier_for_lemma
def classifier_for_lemma(lemma, filenames):
# XXX: always doing non-null and Random Forest for initial version
classifier = SklearnClassifier(RandomForestClassifier(), sparse=False)
print("loading training data for", lemma)
load_training_for_word(lemma, filenames.bitextfn, filenames.alignfn,
filenames.annotatedfn)
training = trainingdata.trainingdata_for(lemma, nonnull=True)
print("got {0} instances for {1}".format(len(training), lemma))
# delete the sentences themselves; we have the instances
trainingdata.set_examples([], [])
trainingdata.set_sl_annotated([])
gc.collect()
if len(training) > (20 * 1000):
print("capping to 20k instances to fit in memory")
training = training[: 20 * 1000]
labels = set(label for (feat,label) in training)
print("loaded training data for", lemma)
if (not training) or len(labels) < 2:
return None
classifier.train(training)
return classifier
开发者ID:alexrudnick,项目名称:chipa,代码行数:25,代码来源:annotate_clwsd.py
示例2: trainClassifiers
def trainClassifiers(tweets):
# Generate the training set
training_set = nltk.classify.util.apply_features(extract_features, tweets)
print("Training set created!")
# Train and save the Naive Bayes classifier to a file
NBClassifier = nltk.NaiveBayesClassifier.train(training_set)
f = open('data/trained_classifiers/NBClassifier.pickle', 'wb')
pickle.dump(NBClassifier, f, 1)
f.close()
print("NBClassifier Classifier Trained")
#Train linear SVC
linear_SVC_classifier = SklearnClassifier(LinearSVC())
linear_SVC_classifier.train(training_set)
# Train Max Entropy Classifier
# MaxEntClassifier = nltk.classify.maxent.MaxentClassifier.train(training_set, 'IIS', trace=2, \
# encoding=None, labels=None, sparse=True, gaussian_prior_sigma=0, max_iter = 5)
# f = open('data/trained_classifiers/MaxEntClassifier.pickle', 'wb')
# pickle.dump(MaxEntClassifier, f, 1)
# f.close()
# print("MaxEntClassifier Classifier Trained")
# return (training_set, NBClassifier, MaxEntClassifier)
return (training_set, NBClassifier, linear_SVC_classifier)
开发者ID:quiuquio,项目名称:Twitter-Sentiment-Analysis,代码行数:26,代码来源:main2.py
示例3: score
def score(trainset, testset, classifier):
classifier = SklearnClassifier(classifier)
classifier._vectorizer.sort = False
classifier.train(trainset)
(test, tag_test) = zip(*testset)
pred = classifier.classify_many(test)
return accuracy_score(tag_test, pred)
开发者ID:eleanordong,项目名称:datamining,代码行数:7,代码来源:sentimentexample.py
示例4: __init__
class SKClassifier:
classifier = None
def __init__(self, cls='SVC'):
self.classifier = SklearnClassifier({
'SVC': SVC(),
'LogisticRegression': LogisticRegression(),
'BernoulliNB': BernoulliNB()
}[cls])
if not self.classifier:
self.classifier = SklearnClassifier(SVC())
def train(self, trainset):
self.classifier.train(trainset)
def test(self, tagged, featuresets):
predict = self.classifier.classify_many(featuresets)
print predict
return accuracy_score(tagged, predict)
def classify(self, featureset):
return self.classifier.classify(featureset)
def classify_many(self, featuresets):
return self.classifier.classify_many(featuresets)
开发者ID:Palazor,项目名称:sentiment,代码行数:26,代码来源:SkClassifier.py
示例5: learn_model
def learn_model(data,target):
bestwords = best_of_words(data, target)
# preparing data for split validation. 80% training, 20% test
data_train,data_test,target_train,target_test = cross_validation.train_test_split(data,target,test_size=0.1,random_state=43)
#classifier = BernoulliNB().fit(data_train,target_train)
train_feature=[]
test_feature=[]
for i in range(len(data_train)):
d=data_train[i]
d=jieba.cut(d, cut_all=False)
l=target_train[i]
#tmp=[bigram(d),l]
tmp = [dict([(word, True) for word in d if word in bestwords]), l]
train_feature.append(tmp)
for i in range(len(data_test)):
d=data_test[i]
d=jieba.cut(d, cut_all=False)
l=target_test[i]
#tmp=bigram(d)
tmp = dict([(word, True) for word in d if word in bestwords])
test_feature.append(tmp)
classifier = SklearnClassifier(MultinomialNB())
classifier.train(train_feature)
predicted = classifier.classify_many(test_feature)
evaluate_model(target_test,predicted)
return classifier, bestwords
开发者ID:cysjtu,项目名称:SentimentAnalysis,代码行数:32,代码来源:nlp_machine_v3.py
示例6: performCrossValidation
def performCrossValidation(featureset, labels, foldsCount, sklearnclassifier, uniqLabels):
accuracySum = 0.0
precisionSums = defaultdict(float)
recallSums = defaultdict(float)
fscoreSums = defaultdict(float)
crossValidationIterations = cross_validation.StratifiedKFold(labels, n_folds=foldsCount)
for train, test in crossValidationIterations:
trainset = [featureset[i] for i in train]
testset = [featureset[i] for i in test]
print("before train")
classifier = SklearnClassifier(sklearnclassifier).train(trainset)
true = [label for features, label in testset]
predicted = classifier.classify_many([features for features, label in testset])
precisions, recalls, fscores, support = precision_recall_fscore_support(true, predicted, pos_label=None, labels=uniqLabels)
accuracy = accuracy_score(true, predicted)
accuracySum += accuracy
for label, value in zip(uniqLabels, precisions):
precisionSums[label] += value
for label, value in zip(uniqLabels, recalls):
recallSums[label] += value
for label, value in zip(uniqLabels, fscores):
fscoreSums[label] += value
print("Average accurancy: {0:.3f}".format(accuracySum/foldsCount))
measures = {label: (sum/foldsCount, recallSums.get(label)/foldsCount, fscoreSums.get(label)/foldsCount) for label, sum in precisionSums.items()}
for label, (prec, recall, fscore) in measures.items():
print("Average precision for {0}: {1:.3f}".format(label, prec))
print("Average recall for {0}: {1:.3f}".format(label, recall))
print("Average f score for {0}: {1:.3f}".format(label, fscore))
开发者ID:ekedziora,项目名称:sentiment,代码行数:32,代码来源:utils.py
示例7: svm
def svm(train_data,preprocessing=True):
training_data = []
for data in train_data:
training_data.append(preprocess(data[0],label=data[1]))
cl = SklearnClassifier(LinearSVC())
cl.train(training_data)
return cl
开发者ID:EricSchles,项目名称:text_classify,代码行数:7,代码来源:algorithms.py
示例8: sentiment_classifier
def sentiment_classifier(debug):
# trainingfp = open('training.csv', 'rb')
train = pd.read_csv( 'training.csv', delimiter=',', quotechar='"', escapechar='\\',header=0 )
num_tweets = train['TweetText'].size
cleantweets = []
for i in xrange(0, num_tweets):
if debug and ( (i+1)%1000 == 0 ):
print "Tweet %d of %d\n" % ( i+1, num_tweets )
cleantweets.append((tweet_to_words(train['TweetText'][i]), train['Sentiment'][i]))
# vectorizer = CountVectorizer(analyzer = "word", \
# tokenizer = None, \
# preprocessor = None, \
# stop_words = None, \
# max_features = 5000)
# train_data_features = vectorizer.fit_transform([t for (t,_) in cleantweets])
# feature_labels = [(m,l) for ((f,l),m) in zip(cleantweets, train_data_features)]
# forest = RandomForestClassifier(n_estimators = sensitivity)
# forest = forest.fit(train_data_features, train['Sentiment'])
classif = SklearnClassifier(LinearSVC())
classif.train(cleantweets)
return (classif)
开发者ID:greensam,项目名称:am221project,代码行数:27,代码来源:sentiment.py
示例9: evaluate
def evaluate(classifier_alo):
classifier = SklearnClassifier(classifier_alo) #在nltk 中使用scikit-learn 的接口
classifier.train(trainFeatures) #训练分类器
referenceSets = collections.defaultdict(set)
testSets = collections.defaultdict(set)
i = 0
for item in testFeatures:
referenceSets[item[1]].add(i)
predicted = classifier.classify(item[0])
testSets[predicted].add(i)
i += 1
pos_pre = nltk.metrics.precision(referenceSets['pos'], testSets['pos'])
pos_recall = nltk.metrics.recall(referenceSets['pos'], testSets['pos'])
neg_pre = nltk.metrics.precision(referenceSets['neg'], testSets['neg'])
neg_recall = nltk.metrics.recall(referenceSets['neg'], testSets['neg'])
print (str('{0:.3f}'.format(float(pos_pre))) + " "
+str('{0:.3f}'.format(float(pos_recall))) + " "
+str('{0:.3f}'.format(float(neg_pre))) + " "
+str( '{0:.3f}'.format(float(neg_recall))) + " "
+str('{0:.3f}'.format(2*(float(pos_pre)*float(pos_recall)) / (float(pos_recall)+float(pos_pre)))) + " "
+str('{0:.3f}'.format(2*(float(neg_pre)*float(neg_recall)) / (float(neg_recall)+float(neg_pre)))))
开发者ID:delili,项目名称:NLP_Comments_Sentiment_Analysis,代码行数:25,代码来源:process.py
示例10: chatBot
class chatBot(object):
def __init__(self):
self.posts = nltk.corpus.nps_chat.xml_posts()
self.categories = ['Emotion', 'ynQuestion', 'yAnswer', 'Continuer',
'whQuestion', 'System', 'Accept', 'Clarify', 'Emphasis',
'nAnswer', 'Greet', 'Statement', 'Reject', 'Bye', 'Other']
self.mapper = [0, 2, 6, 3, 11, 5, 8, 1, 8, 3, 10, 11, 13, 13, 13]
self.responses = {}
self.featuresets = []
self.train = []
self.test = []
self.testSet = []
self.testSetClass = []
self.classif = SklearnClassifier(LinearSVC())
for i in range(0, 15):
self.responses[i] = []
for post in self.posts:
self.featuresets.append((self.tokenize(post.text),self.categories.index(post.get('class'))))
self.temp = self.responses[self.categories.index(post.get('class'))]
self.temp.append(post.text)
def tokenize(self, sentence):
"""
Extracts a set of features from a message.
"""
features = {}
tokens = nltk.word_tokenize(sentence)
for t in tokens:
features['contains(%s)' % t.lower()] = True
return features
def talk(self):
while 1:
inp = raw_input("YOU: ")
features = self.tokenize(inp)
pp = self.classif.classify_many(features)
pp = pp[0]
pp = int(pp)
m = self.mapper[pp]
r = self.responses[m]
val = randint(0, len(r))
print("BOT: "+r[val])
def trainSet(self):
shuffle(self.featuresets)
size = int(len(self.featuresets) * .1) # 10% is used for the test set
self.train = self.featuresets[size:]
self.test = self.featuresets[:size]
self.classif.train(self.train)
self.testSet = []
self.testSetClass = []
for i in self.test:
self.testSet.append(i[0])
self.testSetClass.append(i[1])
self.batch = self.classif.classify_many(self.testSet)
def statistics(self):
print (classification_report(self.testSetClass, self.batch, labels=list(set(self.testSetClass)),target_names=self.categories))
开发者ID:donjuma,项目名称:NLP_chatBot,代码行数:60,代码来源:nps.py
示例11: train
def train(cleanedDataCollection, tagPool):
posSamples = []
negSamples = []
featuresets = [(extractFeatures(d,tagPool), c) for (d,c) in cleanedDataCollection]
for sample in featuresets:
if sample[1] == "trash":
negSamples.append(sample)
else:
posSamples.append(sample)
train_set = negSamples[10:]+posSamples[10:]
test_set = negSamples[:10]+posSamples[:10]
# classifier = nltk.NaiveBayesClassifier.train(train_set)
# print(nltk.classify.accuracy(classifier, test_set))
# classifier.show_most_informative_features(5)
# return classifier
sk_classifier = SklearnClassifier(MultinomialNB())
sk_classifier.train(train_set)
print "accuracy is: %s" % (accuracy(sk_classifier, test_set))
precision, recall, fMeasure = precision_recall_fmeasure(sk_classifier, test_set, "useful")
print "precision is: %s" % (precision)
print "recall is: %s" % (recall)
print "F-measure is: %s" % (fMeasure)
return sk_classifier
开发者ID:iaoshili,项目名称:NLP_Project,代码行数:30,代码来源:InterestingArticleIdentifier.py
示例12: main3
def main3():
from nltk.classify.scikitlearn import SklearnClassifier
from sklearn.svm import LinearSVC
from sklearn.metrics import confusion_matrix
from matplotlib import pyplot
svm = SklearnClassifier(LinearSVC(loss="hinge"))
svm.train(trainData)
print("SVM: ", nltk.classify.accuracy(svm, testData))
results = svm.classify_many(item[0] for item in testData)
print(results)
from sklearn.metrics import classification_report
# getting a full report
print(classification_report(t_test_skl, results, labels=list(set(t_test_skl)), target_names=t_test_skl))
# Compute confusion matrix
import numpy as np
cmm = confusion_matrix([x[1] for x in testData], results)
print(cmm)
cmm = np.array(cmm, dtype = np.float)
print(cmm.shape)
#f=figure()
#ax = f.add_subplot(111)
#show()
#%pylab inline
# Show confusion matrix in a separate window
print(pyplot.imshow(cmm, interpolation='nearest'))
开发者ID:listentojohan,项目名称:cjor,代码行数:32,代码来源:Main.py
示例13: SVM
def SVM(training_set, test_set):
classifier = SklearnClassifier(LinearSVC())
print("Training a new SVM classifier")
classifier.train(training_set)
print("Accuracy of SVM in training:",nltk.classify.accuracy(classifier, test_set))
# classifier.show_most_informative_features(5)
#print("Running new Decision Tree classifier")
accuracy = nltk.classify.accuracy(classifier, test_set)
trueLabels = [l for d, l in test_set]
predictedLabels = classifier.classify_many([d for d,t in test_set])
#print("Accuracy:",accuracy)
# classifier.show_most_informative_features(MIF)
def runTrained(test_set, hasTags=False):
#print("Running pre-trained Decision Tree classifier")
if hasTags:
tagglessTest_set = [data for data, tag in test_set]
acc = nltk.classify.accuracy(classifier, test_set)
print("Accuracy:", acc)
predictions = classifier.classify_many(tagglessTest_set)
return ([e for e in zip(tagglessTest_set, predictions)], acc)
else:
tagglessTest_set = test_set
predictions = classifier.classify_many(tagglessTest_set)
#print("Predicted Labels:",predictions)
return [e for e in zip(tagglessTest_set, predictions)]
return (runTrained, accuracy, predictedLabels, trueLabels)
开发者ID:dfgerrity,项目名称:AuthorDetector,代码行数:26,代码来源:ClassifierRunner.py
示例14: clf_score
def clf_score(classifier):
classifier = SklearnClassifier(classifier)
classifier.train(train_set)
# nltk.classify.scikitlearn(BernoulliNB())
predict = classifier.classify_many(test)
# classifier.prob_classify_many()
return accuracy_score(tag_test, predict)
开发者ID:wac81,项目名称:LSI-for-ChineseDocument,代码行数:7,代码来源:store+sentiment+classifier.py
示例15: get_performance
def get_performance(clf_sel, train_features, test_features):
ref_set = collections.defaultdict(set)
test_set = collections.defaultdict(set)
classification_error = False
clf = SklearnClassifier(clf_sel)
try:
classifier = clf.train(train_features)
except:
classification_error = True
# print (str(clf_sel.__class__),'NA')
if str(clf_sel.__class__) == "<class 'sklearn.naive_bayes.MultinomialNB'>":
pickle_cls(classifier, 'MultinomialNB')
# print(str(clf_sel), 'accuracy:'(nltk.classify.accuracy(classifier, test_features)) * 100)
if not classification_error:
clf_acc = nltk.classify.accuracy(classifier, test_features)
for i, (features, label) in enumerate(test_features):
ref_set[label].add(i)
predicted = classifier.classify(features)
test_set[predicted].add(i)
pos_precision = precision(ref_set['pos'], test_set['pos'])
pos_recall = recall(ref_set['pos'], test_set['pos'])
neg_precision = precision(ref_set['neg'], test_set['neg'])
neg_recall = recall(ref_set['neg'], test_set['neg'])
print(
"{0},{1},{2},{3},{4},{5}".format(clf_sel.__class__, clf_acc, pos_precision, pos_recall, neg_precision,
neg_recall))
开发者ID:koosha,项目名称:twitter-sentiment-analysis-v2,代码行数:33,代码来源:sentiment_analyzer.py
示例16: validate
def validate(data, params, d):
stop = stopwords.words("english")
(rel_dict, Wv, b, L) = params
print "validating, adding lookup"
for split in data:
for tree in split:
for node in tree.get_nodes():
node.vec = L[:, node.ind].reshape((d, 1))
train_feats = []
val_feats = []
for tt, split in enumerate(data):
if tt == 0:
print "processing train"
else:
print "processing val"
for num_finished, tree in enumerate(split):
# process validation trees
forward_prop(None, params, tree, d, labels=False)
ave = zeros((d, 1))
words = zeros((d, 1))
count = 0
wcount = 0
word_list = []
for ex, node in enumerate(tree.get_nodes()):
if ex != 0 and node.word not in stop:
ave += node.p_norm
count += 1
ave = ave / count
featvec = ave.flatten()
curr_feats = {}
for dim, val in ndenumerate(featvec):
curr_feats["_" + str(dim)] = val
if tt == 0:
train_feats.append((curr_feats, tree.ans))
else:
val_feats.append((curr_feats, tree.ans))
print "training"
classifier = SklearnClassifier(LogisticRegression(C=10))
classifier.train(train_feats)
print "predicting..."
train_acc = nltk.classify.util.accuracy(classifier, train_feats)
val_acc = nltk.classify.util.accuracy(classifier, val_feats)
return train_acc, val_acc
开发者ID:luoq,项目名称:qanta,代码行数:60,代码来源:learn_classifiers.py
示例17: evaluate
def evaluate(train_qs, test_qs, params, d):
data = [train_qs, test_qs]
(W, b, W2, b2, W3, b3, L) = params
train_feats = []
test_feats = []
for tt, split in enumerate(data):
for qs, ans in split:
prev_qs = zeros((d, 1))
prev_sum = zeros((d, 1))
count = 0.
history = []
for dist in qs:
sent = qs[dist]
# input is average of all nouns in sentence
# av = average(L[:, sent], axis=1).reshape((d, 1))
history += sent
prev_sum += sum(L[:, sent], axis=1).reshape((d, 1))
if len(history) == 0:
av = zeros((d, 1))
else:
av = prev_sum / len(history)
# apply non-linearity
p = relu(W.dot(av) + b)
p2 = relu(W2.dot(p) + b2)
p3 = relu(W3.dot(p2) + b3)
curr_feats = {}
for dim, val in ndenumerate(p3):
curr_feats['__' + str(dim)] = val
if tt == 0:
train_feats.append( (curr_feats, ans[0]) )
else:
test_feats.append( (curr_feats, ans[0]) )
print 'total training instances:', len(train_feats)
print 'total testing instances:', len(test_feats)
random.shuffle(train_feats)
# can modify this classifier / do grid search on regularization parameter using sklearn
classifier = SklearnClassifier(LogisticRegression(C=10))
classifier.train(train_feats)
print 'accuracy train:', nltk.classify.util.accuracy(classifier, train_feats)
print 'accuracy test:', nltk.classify.util.accuracy(classifier, test_feats)
print ''
print 'dumping classifier'
cPickle.dump(classifier, open('data/deep/classifier', 'wb'),
protocol=cPickle.HIGHEST_PROTOCOL)
开发者ID:jankim,项目名称:qb,代码行数:60,代码来源:learn_classifiers.py
示例18: score
def score(classifier):
classifier = SklearnClassifier(classifier)
classifier.train(trainset)
# pred = classifier.batch_classify(test)
pred = classifier.classify_many(test)
return accuracy_score(tag_test, pred)
开发者ID:vsooda,项目名称:Review-Helpfulness-Prediction,代码行数:7,代码来源:store_sentiment_classifier.py
示例19: cross_validate
def cross_validate(data,model=None):
training_set = nltk.classify.apply_features(preprocess,data)
cv = cross_validation.KFold(len(training_set), n_folds=10, shuffle=False, random_state=None)
if model == "svm" or model=="SVM":
svm = SklearnClassifier(LinearSVC())
for traincv, testcv in cv:
classifier = svm.train(training_set[traincv[0]:traincv[len(traincv)-1]])
print 'accuracy:', nltk.classify.util.accuracy(classifier, training_set[testcv[0]:testcv[len(testcv)-1]])
开发者ID:EricSchles,项目名称:text_classify,代码行数:8,代码来源:algorithms.py
示例20: svm
def svm(trainfeats, testfeats):
y = []
accuracy = []
classif = SklearnClassifier(LinearSVC(C=0.032))
classif.train(trainfeats)
print "SVM output"
print 'train on %d instances, test on %d instances' % (len(trainfeats), len(testfeats))
y.append( nltk.classify.util.accuracy(classif, testfeats))
print y
开发者ID:shachi04,项目名称:PSL_sentiment,代码行数:9,代码来源:baseline.py
注:本文中的nltk.classify.scikitlearn.SklearnClassifier类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论