• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python scikitlearn.SklearnClassifier类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中nltk.classify.scikitlearn.SklearnClassifier的典型用法代码示例。如果您正苦于以下问题:Python SklearnClassifier类的具体用法?Python SklearnClassifier怎么用?Python SklearnClassifier使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了SklearnClassifier类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: classifier_for_lemma

def classifier_for_lemma(lemma, filenames):
    # XXX: always doing non-null and Random Forest for initial version
    classifier = SklearnClassifier(RandomForestClassifier(), sparse=False)
    print("loading training data for", lemma)
    load_training_for_word(lemma, filenames.bitextfn, filenames.alignfn,
                           filenames.annotatedfn)

    training = trainingdata.trainingdata_for(lemma, nonnull=True)
    print("got {0} instances for {1}".format(len(training), lemma))

    # delete the sentences themselves; we have the instances
    trainingdata.set_examples([], [])
    trainingdata.set_sl_annotated([])
    gc.collect()

    if len(training) > (20 * 1000):
        print("capping to 20k instances to fit in memory")
        training = training[: 20 * 1000]

    labels = set(label for (feat,label) in training)
    print("loaded training data for", lemma)
    if (not training) or len(labels) < 2:
        return None
    classifier.train(training)
    return classifier
开发者ID:alexrudnick,项目名称:chipa,代码行数:25,代码来源:annotate_clwsd.py


示例2: trainClassifiers

def trainClassifiers(tweets):
    # Generate the training set
    training_set = nltk.classify.util.apply_features(extract_features, tweets)
    print("Training set created!")

    # Train and save the Naive Bayes classifier to a file
    NBClassifier = nltk.NaiveBayesClassifier.train(training_set)
    f = open('data/trained_classifiers/NBClassifier.pickle', 'wb')
    pickle.dump(NBClassifier, f, 1)
    f.close()
    print("NBClassifier Classifier Trained")

    #Train linear SVC
    linear_SVC_classifier = SklearnClassifier(LinearSVC())
    linear_SVC_classifier.train(training_set)

    # Train Max Entropy Classifier
    # MaxEntClassifier = nltk.classify.maxent.MaxentClassifier.train(training_set, 'IIS', trace=2, \
    #                        encoding=None, labels=None, sparse=True, gaussian_prior_sigma=0, max_iter = 5)
    # f = open('data/trained_classifiers/MaxEntClassifier.pickle', 'wb')
    # pickle.dump(MaxEntClassifier, f, 1)
    # f.close()
    # print("MaxEntClassifier Classifier Trained")

    # return (training_set, NBClassifier, MaxEntClassifier)
    return (training_set, NBClassifier, linear_SVC_classifier)
开发者ID:quiuquio,项目名称:Twitter-Sentiment-Analysis,代码行数:26,代码来源:main2.py


示例3: score

def score(trainset, testset, classifier):
    classifier = SklearnClassifier(classifier)
    classifier._vectorizer.sort = False
    classifier.train(trainset)
    (test, tag_test) = zip(*testset)
    pred = classifier.classify_many(test)
    return accuracy_score(tag_test, pred)
开发者ID:eleanordong,项目名称:datamining,代码行数:7,代码来源:sentimentexample.py


示例4: __init__

class SKClassifier:

    classifier = None

    def __init__(self, cls='SVC'):
        self.classifier = SklearnClassifier({
            'SVC': SVC(),
            'LogisticRegression': LogisticRegression(),
            'BernoulliNB': BernoulliNB()
        }[cls])
        if not self.classifier:
            self.classifier = SklearnClassifier(SVC())

    def train(self, trainset):
        self.classifier.train(trainset)

    def test(self, tagged, featuresets):
        predict = self.classifier.classify_many(featuresets)
        print predict
        return accuracy_score(tagged, predict)

    def classify(self, featureset):
        return self.classifier.classify(featureset)

    def classify_many(self, featuresets):
        return self.classifier.classify_many(featuresets)
开发者ID:Palazor,项目名称:sentiment,代码行数:26,代码来源:SkClassifier.py


示例5: learn_model

def learn_model(data,target):
    bestwords = best_of_words(data, target)
    # preparing data for split validation. 80% training, 20% test
    data_train,data_test,target_train,target_test = cross_validation.train_test_split(data,target,test_size=0.1,random_state=43)
    #classifier = BernoulliNB().fit(data_train,target_train)
    train_feature=[]
    test_feature=[]
    for i in range(len(data_train)):
        d=data_train[i]
        d=jieba.cut(d, cut_all=False)
        l=target_train[i]
        #tmp=[bigram(d),l]
        tmp = [dict([(word, True) for word in d if word in bestwords]), l]
        train_feature.append(tmp)
        
    for i in range(len(data_test)):
        d=data_test[i]
        d=jieba.cut(d, cut_all=False)
        l=target_test[i]
        #tmp=bigram(d)
        tmp = dict([(word, True) for word in d if word in bestwords])
        test_feature.append(tmp)
    
        
    classifier = SklearnClassifier(MultinomialNB())
    classifier.train(train_feature)
   
    predicted = classifier.classify_many(test_feature)
    
    evaluate_model(target_test,predicted)

    return classifier, bestwords
开发者ID:cysjtu,项目名称:SentimentAnalysis,代码行数:32,代码来源:nlp_machine_v3.py


示例6: performCrossValidation

def performCrossValidation(featureset, labels, foldsCount, sklearnclassifier, uniqLabels):
    accuracySum = 0.0
    precisionSums = defaultdict(float)
    recallSums = defaultdict(float)
    fscoreSums = defaultdict(float)
    crossValidationIterations = cross_validation.StratifiedKFold(labels, n_folds=foldsCount)
    for train, test in crossValidationIterations:
        trainset = [featureset[i] for i in train]
        testset = [featureset[i] for i in test]
        print("before train")
        classifier = SklearnClassifier(sklearnclassifier).train(trainset)

        true = [label for features, label in testset]
        predicted = classifier.classify_many([features for features, label in testset])

        precisions, recalls, fscores, support = precision_recall_fscore_support(true, predicted, pos_label=None, labels=uniqLabels)
        accuracy = accuracy_score(true, predicted)
        accuracySum += accuracy

        for label, value in zip(uniqLabels, precisions):
            precisionSums[label] += value
        for label, value in zip(uniqLabels, recalls):
            recallSums[label] += value
        for label, value in zip(uniqLabels, fscores):
            fscoreSums[label] += value

    print("Average accurancy: {0:.3f}".format(accuracySum/foldsCount))
    measures = {label: (sum/foldsCount, recallSums.get(label)/foldsCount, fscoreSums.get(label)/foldsCount) for label, sum in precisionSums.items()}
    for label, (prec, recall, fscore) in measures.items():
        print("Average precision for {0}: {1:.3f}".format(label, prec))
        print("Average recall for {0}: {1:.3f}".format(label, recall))
        print("Average f score for {0}: {1:.3f}".format(label, fscore))
开发者ID:ekedziora,项目名称:sentiment,代码行数:32,代码来源:utils.py


示例7: svm

def svm(train_data,preprocessing=True):
    training_data = []
    for data in train_data:
        training_data.append(preprocess(data[0],label=data[1]))
    cl = SklearnClassifier(LinearSVC())
    cl.train(training_data)
    return cl
开发者ID:EricSchles,项目名称:text_classify,代码行数:7,代码来源:algorithms.py


示例8: sentiment_classifier

def sentiment_classifier(debug):
	# trainingfp = open('training.csv', 'rb')
	train = pd.read_csv( 'training.csv', delimiter=',', quotechar='"', escapechar='\\',header=0 )
	num_tweets = train['TweetText'].size
	
	cleantweets = []
	for i in xrange(0, num_tweets):
		if debug and ( (i+1)%1000 == 0 ):
			print "Tweet %d of %d\n" % ( i+1, num_tweets )          
		cleantweets.append((tweet_to_words(train['TweetText'][i]), train['Sentiment'][i]))

	# vectorizer = CountVectorizer(analyzer = "word",   \
 #                             tokenizer = None,    \
 #                             preprocessor = None, \
 #                             stop_words = None,   \
 #                             max_features = 5000) 

	# train_data_features = vectorizer.fit_transform([t for (t,_) in cleantweets])
	
	# feature_labels = [(m,l) for ((f,l),m) in zip(cleantweets, train_data_features)]

	# forest = RandomForestClassifier(n_estimators = sensitivity)
	# forest = forest.fit(train_data_features, train['Sentiment'])
	classif = SklearnClassifier(LinearSVC())
	classif.train(cleantweets)

	return (classif)
开发者ID:greensam,项目名称:am221project,代码行数:27,代码来源:sentiment.py


示例9: evaluate

def evaluate(classifier_alo):
    
    classifier = SklearnClassifier(classifier_alo) #在nltk 中使用scikit-learn 的接口
    classifier.train(trainFeatures) #训练分类器
    
    referenceSets = collections.defaultdict(set)
    testSets = collections.defaultdict(set)	
    i = 0
    for item in testFeatures:
        referenceSets[item[1]].add(i)
        predicted = classifier.classify(item[0])
        testSets[predicted].add(i)	
        i += 1
    
    pos_pre = nltk.metrics.precision(referenceSets['pos'], testSets['pos'])
    pos_recall = nltk.metrics.recall(referenceSets['pos'], testSets['pos'])
    neg_pre =  nltk.metrics.precision(referenceSets['neg'], testSets['neg'])
    neg_recall = nltk.metrics.recall(referenceSets['neg'], testSets['neg'])
    
    print (str('{0:.3f}'.format(float(pos_pre))) + "  "
    +str('{0:.3f}'.format(float(pos_recall))) + "  "
    +str('{0:.3f}'.format(float(neg_pre))) + "  "
    +str( '{0:.3f}'.format(float(neg_recall))) + "  "
    +str('{0:.3f}'.format(2*(float(pos_pre)*float(pos_recall)) / (float(pos_recall)+float(pos_pre)))) + "  "
    +str('{0:.3f}'.format(2*(float(neg_pre)*float(neg_recall)) / (float(neg_recall)+float(neg_pre)))))
开发者ID:delili,项目名称:NLP_Comments_Sentiment_Analysis,代码行数:25,代码来源:process.py


示例10: chatBot

class chatBot(object):

    def __init__(self):
        self.posts = nltk.corpus.nps_chat.xml_posts()
        self.categories = ['Emotion', 'ynQuestion', 'yAnswer', 'Continuer',
                'whQuestion', 'System', 'Accept', 'Clarify', 'Emphasis',
                'nAnswer', 'Greet', 'Statement', 'Reject', 'Bye', 'Other']
        self.mapper = [0, 2, 6, 3, 11, 5, 8, 1, 8, 3, 10, 11, 13, 13, 13]
        self.responses = {}
        self.featuresets = []
        self.train = []
        self.test = []
        self.testSet = []
        self.testSetClass = []
        self.classif = SklearnClassifier(LinearSVC())
        for i in range(0, 15):
            self.responses[i] = []
        for post in self.posts:
            self.featuresets.append((self.tokenize(post.text),self.categories.index(post.get('class'))))
            self.temp = self.responses[self.categories.index(post.get('class'))]
            self.temp.append(post.text)

    def tokenize(self, sentence):
        """
            Extracts a set of features from a message.
        """
        features = {}
        tokens = nltk.word_tokenize(sentence)
        for t in tokens:
            features['contains(%s)' % t.lower()] = True
        return features

    def talk(self):
        while 1:
            inp = raw_input("YOU: ")
            features = self.tokenize(inp)
            pp = self.classif.classify_many(features)
            pp = pp[0]
            pp = int(pp)
            m = self.mapper[pp]
            r = self.responses[m]
            val = randint(0, len(r))
            print("BOT: "+r[val])

    def trainSet(self):
        shuffle(self.featuresets)
        size = int(len(self.featuresets) * .1) # 10% is used for the test set
        self.train = self.featuresets[size:]
        self.test = self.featuresets[:size]
        self.classif.train(self.train)

        self.testSet = []
        self.testSetClass = []
        for i in self.test:
            self.testSet.append(i[0])
            self.testSetClass.append(i[1])
        self.batch = self.classif.classify_many(self.testSet)

    def statistics(self):
        print (classification_report(self.testSetClass, self.batch, labels=list(set(self.testSetClass)),target_names=self.categories))
开发者ID:donjuma,项目名称:NLP_chatBot,代码行数:60,代码来源:nps.py


示例11: train

def train(cleanedDataCollection, tagPool):
	posSamples = []
	negSamples = []

	featuresets = [(extractFeatures(d,tagPool), c) for (d,c) in cleanedDataCollection]
	for sample in featuresets:
		if sample[1] == "trash":
			negSamples.append(sample)
		else:
			posSamples.append(sample)

	train_set = negSamples[10:]+posSamples[10:]
	test_set = negSamples[:10]+posSamples[:10]


	# classifier = nltk.NaiveBayesClassifier.train(train_set)
	# print(nltk.classify.accuracy(classifier, test_set))
	# classifier.show_most_informative_features(5) 
	# return classifier

	sk_classifier = SklearnClassifier(MultinomialNB())
	sk_classifier.train(train_set)
	print "accuracy is: %s" % (accuracy(sk_classifier, test_set))

	precision, recall, fMeasure = precision_recall_fmeasure(sk_classifier,  test_set, "useful")

	print "precision is: %s" % (precision)
	print "recall is: %s" % (recall)
	print "F-measure is: %s" % (fMeasure)
	return sk_classifier
开发者ID:iaoshili,项目名称:NLP_Project,代码行数:30,代码来源:InterestingArticleIdentifier.py


示例12: main3

def main3():
    from nltk.classify.scikitlearn import SklearnClassifier
    from sklearn.svm import LinearSVC
    from sklearn.metrics import confusion_matrix
    from matplotlib import pyplot

    svm = SklearnClassifier(LinearSVC(loss="hinge"))
    svm.train(trainData)
    print("SVM: ", nltk.classify.accuracy(svm, testData))
    results = svm.classify_many(item[0] for item in testData)

    print(results)
    from sklearn.metrics import classification_report

    # getting a full report
    print(classification_report(t_test_skl, results, labels=list(set(t_test_skl)), target_names=t_test_skl))

    # Compute confusion matrix
    import numpy as np
    cmm = confusion_matrix([x[1] for x in testData], results)

    print(cmm)
    cmm = np.array(cmm, dtype = np.float)
    print(cmm.shape)

    #f=figure()
    #ax = f.add_subplot(111)
    #show()
    #%pylab inline

    # Show confusion matrix in a separate window
    print(pyplot.imshow(cmm, interpolation='nearest'))
开发者ID:listentojohan,项目名称:cjor,代码行数:32,代码来源:Main.py


示例13: SVM

def SVM(training_set, test_set):
    classifier = SklearnClassifier(LinearSVC())
    print("Training a new SVM classifier")
    classifier.train(training_set)
    print("Accuracy of SVM in training:",nltk.classify.accuracy(classifier, test_set))
#     classifier.show_most_informative_features(5)
    #print("Running new Decision Tree classifier")
    accuracy = nltk.classify.accuracy(classifier, test_set)
    trueLabels = [l for d, l in test_set]
    predictedLabels = classifier.classify_many([d for d,t in test_set])
    #print("Accuracy:",accuracy)
#     classifier.show_most_informative_features(MIF)
    def runTrained(test_set, hasTags=False):
        #print("Running pre-trained Decision Tree classifier")
        if hasTags:
            tagglessTest_set = [data for data, tag in test_set]
            acc = nltk.classify.accuracy(classifier, test_set)
            print("Accuracy:", acc)
            predictions = classifier.classify_many(tagglessTest_set)
            return ([e for e in zip(tagglessTest_set, predictions)], acc)
        else:
            tagglessTest_set = test_set         
        predictions = classifier.classify_many(tagglessTest_set)
        #print("Predicted Labels:",predictions)
        return [e for e in zip(tagglessTest_set, predictions)]
    return (runTrained, accuracy, predictedLabels, trueLabels) 
开发者ID:dfgerrity,项目名称:AuthorDetector,代码行数:26,代码来源:ClassifierRunner.py


示例14: clf_score

def clf_score(classifier):
    classifier = SklearnClassifier(classifier)
    classifier.train(train_set)
    # nltk.classify.scikitlearn(BernoulliNB())
    predict = classifier.classify_many(test)
    # classifier.prob_classify_many()
    return accuracy_score(tag_test, predict)
开发者ID:wac81,项目名称:LSI-for-ChineseDocument,代码行数:7,代码来源:store+sentiment+classifier.py


示例15: get_performance

def get_performance(clf_sel, train_features, test_features):
    ref_set = collections.defaultdict(set)
    test_set = collections.defaultdict(set)
    classification_error = False

    clf = SklearnClassifier(clf_sel)
    try:
        classifier = clf.train(train_features)
    except:
        classification_error = True
        # print (str(clf_sel.__class__),'NA')

    if str(clf_sel.__class__) == "<class 'sklearn.naive_bayes.MultinomialNB'>":
        pickle_cls(classifier, 'MultinomialNB')

    # print(str(clf_sel), 'accuracy:'(nltk.classify.accuracy(classifier, test_features)) * 100)

    if not classification_error:
        clf_acc = nltk.classify.accuracy(classifier, test_features)

        for i, (features, label) in enumerate(test_features):
            ref_set[label].add(i)
            predicted = classifier.classify(features)
            test_set[predicted].add(i)

        pos_precision = precision(ref_set['pos'], test_set['pos'])
        pos_recall = recall(ref_set['pos'], test_set['pos'])
        neg_precision = precision(ref_set['neg'], test_set['neg'])
        neg_recall = recall(ref_set['neg'], test_set['neg'])

        print(
            "{0},{1},{2},{3},{4},{5}".format(clf_sel.__class__, clf_acc, pos_precision, pos_recall, neg_precision,
                                             neg_recall))
开发者ID:koosha,项目名称:twitter-sentiment-analysis-v2,代码行数:33,代码来源:sentiment_analyzer.py


示例16: validate

def validate(data, params, d):

    stop = stopwords.words("english")

    (rel_dict, Wv, b, L) = params

    print "validating, adding lookup"
    for split in data:
        for tree in split:
            for node in tree.get_nodes():
                node.vec = L[:, node.ind].reshape((d, 1))

    train_feats = []
    val_feats = []

    for tt, split in enumerate(data):

        if tt == 0:
            print "processing train"

        else:
            print "processing val"

        for num_finished, tree in enumerate(split):

            # process validation trees
            forward_prop(None, params, tree, d, labels=False)

            ave = zeros((d, 1))
            words = zeros((d, 1))
            count = 0
            wcount = 0
            word_list = []
            for ex, node in enumerate(tree.get_nodes()):

                if ex != 0 and node.word not in stop:
                    ave += node.p_norm
                    count += 1

            ave = ave / count
            featvec = ave.flatten()

            curr_feats = {}
            for dim, val in ndenumerate(featvec):
                curr_feats["_" + str(dim)] = val

            if tt == 0:
                train_feats.append((curr_feats, tree.ans))

            else:
                val_feats.append((curr_feats, tree.ans))

    print "training"
    classifier = SklearnClassifier(LogisticRegression(C=10))
    classifier.train(train_feats)

    print "predicting..."
    train_acc = nltk.classify.util.accuracy(classifier, train_feats)
    val_acc = nltk.classify.util.accuracy(classifier, val_feats)
    return train_acc, val_acc
开发者ID:luoq,项目名称:qanta,代码行数:60,代码来源:learn_classifiers.py


示例17: evaluate

def evaluate(train_qs, test_qs, params, d):

    data = [train_qs, test_qs]
    (W, b, W2, b2, W3, b3, L) = params

    train_feats = []
    test_feats = []

    for tt, split in enumerate(data):

        for qs, ans in split:

            prev_qs = zeros((d, 1))
            prev_sum = zeros((d, 1))
            count = 0.
            history = []

            for dist in qs:

                sent = qs[dist]

                # input is average of all nouns in sentence
                # av = average(L[:, sent], axis=1).reshape((d, 1))
                history += sent
                prev_sum += sum(L[:, sent], axis=1).reshape((d, 1))
                if len(history) == 0:
                    av = zeros((d, 1))
                else:
                    av = prev_sum / len(history)

                # apply non-linearity
                p = relu(W.dot(av) + b)
                p2 = relu(W2.dot(p) + b2)
                p3 = relu(W3.dot(p2) + b3)

                curr_feats = {}
                for dim, val in ndenumerate(p3):
                    curr_feats['__' + str(dim)] = val

                if tt == 0:
                    train_feats.append( (curr_feats, ans[0]) )

                else:
                    test_feats.append( (curr_feats, ans[0]) )

    print 'total training instances:', len(train_feats)
    print 'total testing instances:', len(test_feats)
    random.shuffle(train_feats)

    # can modify this classifier / do grid search on regularization parameter using sklearn
    classifier = SklearnClassifier(LogisticRegression(C=10))
    classifier.train(train_feats)

    print 'accuracy train:', nltk.classify.util.accuracy(classifier, train_feats)
    print 'accuracy test:', nltk.classify.util.accuracy(classifier, test_feats)
    print ''

    print 'dumping classifier'
    cPickle.dump(classifier, open('data/deep/classifier', 'wb'),
                 protocol=cPickle.HIGHEST_PROTOCOL)
开发者ID:jankim,项目名称:qb,代码行数:60,代码来源:learn_classifiers.py


示例18: score

def score(classifier):
    classifier = SklearnClassifier(classifier)
    classifier.train(trainset)

    # pred = classifier.batch_classify(test)
    pred = classifier.classify_many(test)
    return accuracy_score(tag_test, pred)
开发者ID:vsooda,项目名称:Review-Helpfulness-Prediction,代码行数:7,代码来源:store_sentiment_classifier.py


示例19: cross_validate

def cross_validate(data,model=None):
    training_set = nltk.classify.apply_features(preprocess,data)
    cv = cross_validation.KFold(len(training_set), n_folds=10, shuffle=False, random_state=None)
    if model == "svm" or model=="SVM":
        svm = SklearnClassifier(LinearSVC())
        for traincv, testcv in cv:
            classifier = svm.train(training_set[traincv[0]:traincv[len(traincv)-1]])
            print 'accuracy:', nltk.classify.util.accuracy(classifier, training_set[testcv[0]:testcv[len(testcv)-1]])
开发者ID:EricSchles,项目名称:text_classify,代码行数:8,代码来源:algorithms.py


示例20: svm

def svm(trainfeats, testfeats):
	y = []
	accuracy = []
	classif = SklearnClassifier(LinearSVC(C=0.032))
	classif.train(trainfeats)
	print "SVM output"
	print 'train on %d instances, test on %d instances' % (len(trainfeats), len(testfeats))
	y.append( nltk.classify.util.accuracy(classif, testfeats))
	print y
开发者ID:shachi04,项目名称:PSL_sentiment,代码行数:9,代码来源:baseline.py



注:本文中的nltk.classify.scikitlearn.SklearnClassifier类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python util.accuracy函数代码示例发布时间:2022-05-27
下一篇:
Python classify.NaiveBayesClassifier类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap