本文整理汇总了Python中sklearn.naive_bayes.GaussianNB类的典型用法代码示例。如果您正苦于以下问题:Python GaussianNB类的具体用法?Python GaussianNB怎么用?Python GaussianNB使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了GaussianNB类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: selectKBest
def selectKBest(previous_result, data):
# remove 'restricted_stock_deferred' and 'director_fees'
previous_result.pop(4)
previous_result.pop(4)
result = []
_k = 10
for k in range(0,_k):
feature_list = ['poi']
for n in range(0,k+1):
feature_list.append(previous_result[n][0])
data = featureFormat(my_dataset, feature_list, sort_keys = True, remove_all_zeroes = False)
labels, features = targetFeatureSplit(data)
features = [abs(x) for x in features]
from sklearn.cross_validation import StratifiedShuffleSplit
cv = StratifiedShuffleSplit(labels, 1000, random_state = 42)
features_train = []
features_test = []
labels_train = []
labels_test = []
for train_idx, test_idx in cv:
for ii in train_idx:
features_train.append( features[ii] )
labels_train.append( labels[ii] )
for jj in test_idx:
features_test.append( features[jj] )
labels_test.append( labels[jj] )
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(features_train, labels_train)
predictions = clf.predict(features_test)
score = score_func(labels_test,predictions)
result.append((k+1,score[0],score[1],score[2]))
return result
开发者ID:yielder,项目名称:identifying-fraud-from-enron-email,代码行数:35,代码来源:poi_id.py
示例2: test_gnb_sample_weight
def test_gnb_sample_weight():
"""Test whether sample weights are properly used in GNB. """
# Sample weights all being 1 should not change results
sw = np.ones(6)
clf = GaussianNB().fit(X, y)
clf_sw = GaussianNB().fit(X, y, sw)
assert_array_almost_equal(clf.theta_, clf_sw.theta_)
assert_array_almost_equal(clf.sigma_, clf_sw.sigma_)
# Fitting twice with half sample-weights should result
# in same result as fitting once with full weights
sw = rng.rand(y.shape[0])
clf1 = GaussianNB().fit(X, y, sample_weight=sw)
clf2 = GaussianNB().partial_fit(X, y, classes=[1, 2], sample_weight=sw / 2)
clf2.partial_fit(X, y, sample_weight=sw / 2)
assert_array_almost_equal(clf1.theta_, clf2.theta_)
assert_array_almost_equal(clf1.sigma_, clf2.sigma_)
# Check that duplicate entries and correspondingly increased sample
# weights yield the same result
ind = rng.randint(0, X.shape[0], 20)
sample_weight = np.bincount(ind, minlength=X.shape[0])
clf_dupl = GaussianNB().fit(X[ind], y[ind])
clf_sw = GaussianNB().fit(X, y, sample_weight)
assert_array_almost_equal(clf_dupl.theta_, clf_sw.theta_)
assert_array_almost_equal(clf_dupl.sigma_, clf_sw.sigma_)
开发者ID:daidan,项目名称:MLearning,代码行数:30,代码来源:test_naive_bayes.py
示例3: scikitNBClassfier
def scikitNBClassfier(self):
dataMat, labels = self.loadProcessedData()
bayesian = Bayesian()
myVocabList = bayesian.createVocabList(dataMat)
## 建立bag of words 矩阵
trainMat = []
for postinDoc in dataMat:
trainMat.append(bayesian.setOfWords2Vec(myVocabList, postinDoc))
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
X = array(trainMat)
y = labels
testText = "美国军队的军舰今天访问了巴西港口城市,并首次展示了核潜艇攻击能力,飞机,监听。他们表演了足球。"
testEntry = self.testEntryProcess(testText)
bayesian = Bayesian()
thisDoc = array(bayesian.setOfWords2Vec(myVocabList, testEntry))
## 拟合并预测
y_pred = gnb.fit(X, y).predict(thisDoc)
clabels = ['军事', '体育']
y_pred = gnb.fit(X, y).predict(X)
print("Number of mislabeled points : %d" % (labels != y_pred).sum())
开发者ID:JavierCrisostomo,项目名称:MLinaction,代码行数:25,代码来源:BayesianTest.py
示例4: categorize
def categorize(train_data,test_data,train_class,n_features):
#cf= ExtraTreesClassifier()
#cf.fit(train_data,train_class)
#print (cf.feature_importances_)
#lsvmcf = sklearn.svm.LinearSVC(penalty='l2', loss='l2', dual=True, tol=0.0001, C=100.0)
model = LogisticRegression()
lgr = LogisticRegression(C=100.0,penalty='l1')
#knn = KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=10, p=2, metric='minkowski', metric_params=None)
svmlcf = sklearn.svm.SVC(C=1000.0, kernel='linear', degree=1, gamma=0.01, probability=True)#2
svmcf = sklearn.svm.SVC(C=1000.0, kernel='rbf', degree=1, gamma=0.01, probability=True)#2
cf = DecisionTreeClassifier()
dct = DecisionTreeClassifier(criterion='gini', splitter='best', min_samples_split=7, min_samples_leaf=4)
rf = RandomForestClassifier(n_estimators=10, criterion='gini', min_samples_split=7, min_samples_leaf=4, max_features='auto')
gnb = GaussianNB() #1
adbst = sklearn.ensemble.AdaBoostClassifier(base_estimator=rf, n_estimators=5, learning_rate=1.0, algorithm='SAMME.R', random_state=True)
#ch2 = SelectKBest(chi2, k=n_features)
#train_data = ch2.fit_transform(train_data, train_class)
#test_data = ch2.transform(test_data)
#rfe = RFE(svmlcf,n_features)
#rfe = rfe.fit(train_data, train_class)
gnb.fit(train_data,train_class)
return gnb.predict(test_data)
开发者ID:sibrajas,项目名称:data-python,代码行数:25,代码来源:numpyreadallalgo.py
示例5: performNB
def performNB(trainingScores, trainingResults, testScores):
print "->Gaussian NB"
X = []
for currMark in trainingScores:
pass
for idx in range(0, len(trainingScores[currMark])):
X.append([])
for currMark in trainingScores:
if "Asym" in currMark:
continue
print currMark,
for idx in range(0, len(trainingScores[currMark])):
X[idx].append(trainingScores[currMark][idx])
X_test = []
for idx in range(0, len(testScores[currMark])):
X_test.append([])
for currMark in trainingScores:
if "Asym" in currMark:
continue
for idx in range(0, len(testScores[currMark])):
X_test[idx].append(testScores[currMark][idx])
gnb = GaussianNB()
gnb.fit(X, np.array(trainingResults))
y_pred = gnb.predict_proba(X_test)[:, 1]
print "->Gaussian NB"
return y_pred
开发者ID:gersteinlab,项目名称:MatchedFilter,代码行数:29,代码来源:validationDifferentCelltypeModel.py
示例6: NBAccuracy
def NBAccuracy(features_train, labels_train, features_test, labels_test):
""" compute the accuracy of your Naive Bayes classifier """
### import the sklearn module for GaussianNB
from sklearn.naive_bayes import GaussianNB
### create classifier
clf = GaussianNB()
t0 = time()
### fit the classifier on the training features and labels
clf.fit(features_train, labels_train)
print "training time:", round(time()-t0, 3), "s"
### use the trained classifier to predict labels for the test features
import numpy as np
t1 = time()
pred = clf.predict(features_test)
print "predicting time:", round(time()-t1, 3), "s"
### calculate and return the accuracy on the test data
### this is slightly different than the example,
### where we just print the accuracy
### you might need to import an sklearn module
accuracy = clf.score(features_test, labels_test)
return accuracy
开发者ID:dixu-ca,项目名称:ud120-projects,代码行数:25,代码来源:nb_author_id.py
示例7: classify
def classify(features_train, labels_train):
clf = GaussianNB()
clf.fit(features_train, labels_train)
### import the sklearn module for GaussianNB
### create classifier
### fit the classifier on the training features and labels
return clf
开发者ID:anup5889,项目名称:IntroToMachineLearning,代码行数:7,代码来源:GaussianNB.py
示例8: naive_bayes
def naive_bayes(features, labels):
classifier = GaussianNB()
classifier.fit(features, labels)
scores = cross_validation.cross_val_score(
classifier, features, labels, cv=10, score_func=metrics.precision_recall_fscore_support
)
print_table("Naive Bayes", numpy.around(numpy.mean(scores, axis=0), 2))
开发者ID:pelluch,项目名称:data-mining,代码行数:7,代码来源:main.py
示例9: test_gnb_prior
def test_gnb_prior():
# Test whether class priors are properly set.
clf = GaussianNB().fit(X, y)
assert_array_almost_equal(np.array([3, 3]) / 6.0, clf.class_prior_, 8)
clf.fit(X1, y1)
# Check that the class priors sum to 1
assert_array_almost_equal(clf.class_prior_.sum(), 1)
开发者ID:arvindchari88,项目名称:newGitTest,代码行数:7,代码来源:test_naive_bayes.py
示例10: nb_names
def nb_names():
#generate list of tuple names
engine = create_engine('sqlite:///names.db')
DBSession = sessionmaker(bind=engine)
session = DBSession()
db_names = names.Names.getAllNames(session)
names_list = [(x,'name') for x in db_names]
words_list = generate_words()
sample_names = [names_list[i] for i in sorted(random.sample(xrange(len(names_list)), len(words_list)))]
data = sample_names + words_list
shuffled_data = np.random.permutation(data)
strings = []
classification = []
for item in shuffled_data:
strings.append([item[0]])
classification.append(str(item[1]))
X = np.array(strings)
Y = np.array(classification)
print X,Y
clf = GaussianNB()
clf.fit(X, Y)
开发者ID:mwcurry,项目名称:tracker,代码行数:25,代码来源:parser.py
示例11: CruiseAlgorithm
class CruiseAlgorithm(object):
# cruise algorithm is used to classify the cruise phase vs noncruise phase, it uses the differential change in data stream as the input matrix
def __init__(self, testing=False):
self.core = GaussianNB()
self.scaler = RobustScaler()
self.X_prev = None
self.testing = testing
def fit(self,X,Y): # Y should be the label of cruise or not
X = self.prepare(X)
self.core.fit(X,Y.ravel())
def predict(self, X):
if self.testing:
X_t = self.prepare(X)
else:
if self.X_prev:
X_t = X - self.X_prev
else:
X_t = X
self.X_prev = X
print repr(X_t)
prediction_result = self.core.predict(X_t)
return np.asmatrix(prediction_result)
def prepare(self,X):
a = np.zeros((X.shape[0],X.shape[1]))
for i in xrange(X.shape[0]-1):
a[i+1,:] = X[i+1] - X[i]
return a
开发者ID:RPI-WCL,项目名称:pilots,代码行数:29,代码来源:custom.py
示例12: trainNB
def trainNB():
featureVector = []
classVector = []
temp= []
headerLine = True
#training
train = open(r'C:\Python34\alchemyapi_python\TrainingDataDummy.csv')
for line in train:
if(headerLine):
headerLine = False
else:
temp = line.split(",")
x = [float(temp[i]) for i in activeFeatureIndex]
#print(x)
featureVector.append(x)
#temp = [int(x) for x in line.split(",")[-1].rstrip("\n")]
classVector.append(int(line.split(",")[-1].rstrip("\n")))
fVector = np.array(featureVector)
cVector = np.array(classVector)
#print(classVector)
print(fVector.shape)
print(cVector.shape)
clf = GaussianNB()
clf.fit(fVector,cVector)
train.close()
return clf
开发者ID:gangchill,项目名称:forked_project,代码行数:35,代码来源:nbTest.py
示例13: univariateFeatureSelection
def univariateFeatureSelection(f_list, my_dataset):
result = []
for feature in f_list:
# Replace 'NaN' with 0
for name in my_dataset:
data_point = my_dataset[name]
if not data_point[feature]:
data_point[feature] = 0
elif data_point[feature] == 'NaN':
data_point[feature] =0
data = featureFormat(my_dataset, ['poi',feature], sort_keys = True, remove_all_zeroes = False)
labels, features = targetFeatureSplit(data)
features = [abs(x) for x in features]
from sklearn.cross_validation import StratifiedShuffleSplit
cv = StratifiedShuffleSplit(labels, 1000, random_state = 42)
features_train = []
features_test = []
labels_train = []
labels_test = []
for train_idx, test_idx in cv:
for ii in train_idx:
features_train.append( features[ii] )
labels_train.append( labels[ii] )
for jj in test_idx:
features_test.append( features[jj] )
labels_test.append( labels[jj] )
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(features_train, labels_train)
predictions = clf.predict(features_test)
score = score_func(labels_test,predictions)
result.append((feature,score[0],score[1],score[2]))
result = sorted(result, reverse=True, key=lambda x: x[3])
return result
开发者ID:yielder,项目名称:identifying-fraud-from-enron-email,代码行数:35,代码来源:poi_id.py
示例14: NBAccuracy
def NBAccuracy(features_train, labels_train, features_test, labels_test):
#Import sklearn modules for GaussianNB
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
#Create classifer
classifer = GaussianNB();
#Timing fit algorithm
t0 = time();
#Fit classier on the training features
classifer.fit(features_train, labels_train);
print "Training Time: ", round(time() - t0, 3), "s";
GaussianNB();
#Timing prediction algorithm
t0=time();
#Use trained classifer to predict labels for test features
pred = classifer.predict(features_test);
print "Prediction Time: ", round(time() - t0, 3), "s";
#Calculate accuracy from features_test with answer in labels_test
accuracy = accuracy_score(pred, labels_test);
return accuracy;
开发者ID:avasilescu,项目名称:ud120-projects,代码行数:31,代码来源:nb_author_id.py
示例15: __init__
class GaussianNBClassifier:
def __init__(self):
"""
This is the constructor responsible for initializing the classifier
"""
self.outputHeader = "#gnb"
self.clf = None
def buildModel(self):
"""
This builds the model of the Gaussian NB classifier
"""
self.clf = GaussianNB()
def trainGaussianNB(self,X, Y):
"""
Training the Gaussian NB Classifier
"""
self.clf.fit(X, Y)
def validateGaussianNB(self,X, Y):
"""
Validate the Gaussian NB Classifier
"""
YPred = self.clf.predict(X)
print accuracy_score(Y, YPred)
def testGaussianNB(self,X, Y):
"""
Test the Gaussian NB Classifier
"""
YPred = self.clf.predict(X)
print accuracy_score(Y, YPred)
开发者ID:USCDataScience,项目名称:NN-fileTypeDetection,代码行数:34,代码来源:gaussianNB.py
示例16: gnbmodel
def gnbmodel(d,X_2,y_2,X_3,y_3,X_test,y_test):
X_3_copy = X_3.copy(deep=True)
X_3_copy['chance']=0
index = 0
########## k折交叉验证 ###########################
scores = cross_val_score(GaussianNB(), X_2, y_2, cv=5, scoring='accuracy')
score_mean =scores.mean()
print(d+'5折交互检验:'+str(score_mean))
#################################################
gnb = GaussianNB().fit(X_2,y_2)
################ 预测测试集 ################
answer_gnb = gnb.predict(X_test)
accuracy = metrics.accuracy_score(y_test,answer_gnb)
print(d+'预测:'+str(accuracy))
###############################################
chance = gnb.predict_proba(X_3)[:,1]
for c in chance:
X_3_copy.iloc[index,len(X_3_copy.columns)-1]=c
index += 1
chance_que = X_3_copy.iloc[:,len(X_3_copy.columns)-1]
return chance_que
开发者ID:IamCatkin,项目名称:Learning-Python,代码行数:25,代码来源:SSL-8.py
示例17: __init__
class PatternBasedDiagnosis:
"""
Pattern Based Diagnosis with Decision Tree
"""
__slots__ = [
"model"
]
def __init__(self):
pass
def train(self, data, labels):
"""
Train the decision tree with the training data
:param data:
:param labels:
:return:
"""
print('Training Data: %s' % (data))
print('Training Labels: %s' % (labels))
self.model = GaussianNB()
self.model = self.model.fit(data, labels)
def eval(self, obs):
# print('Testing Result: %s; %s' % (self.model.predict(obs), self.model.predict_proba(obs)))
print('Testing Result: %s' % self.model.predict(obs))
开发者ID:mkdmkk,项目名称:infaas,代码行数:27,代码来源:nb.py
示例18: getGaussianPred
def getGaussianPred(featureMatrix, labels, testSet, testSet_docIndex):
"""
All input arguments are return of getTrainTestData()
:param featureMatrix:
:param labels:
:param testSet:
:param testSet_docIndex:
:return docIndexPred: dict{docid: [index1, index2, ...], ...}
key is docid
value is all cognates' index
"""
gnb = GaussianNB()
gnb.fit(featureMatrix, labels)
# pred = gnb.predict(featureMatrix)
pred = gnb.predict(testSet)
docIndexPred = dict()
for i, p in enumerate(pred):
if p:
docid = testSet_docIndex[i, 0]
index = testSet_docIndex[i, 1]
if docid in docIndexPred:
docIndexPred[docid].append(index)
else:
docIndexPred[docid] = [index]
return docIndexPred
开发者ID:spacegoing,项目名称:ALTA2015Contest,代码行数:28,代码来源:trainModel.py
示例19: test_gnb_priors
def test_gnb_priors():
"""Test whether the class prior override is properly used"""
clf = GaussianNB(priors=np.array([0.3, 0.7])).fit(X, y)
assert_array_almost_equal(clf.predict_proba([[-0.1, -0.1]]),
np.array([[0.825303662161683,
0.174696337838317]]), 8)
assert_array_equal(clf.class_prior_, np.array([0.3, 0.7]))
开发者ID:Lavanya-Basavaraju,项目名称:scikit-learn,代码行数:7,代码来源:test_naive_bayes.py
示例20: GaussianColorClassifier
class GaussianColorClassifier(ContourClassifier):
'''
A contour classifier which classifies a contour
based on it's mean color in BGR, HSV, and LAB colorspaces,
using a Gaussian classifier for these features.
For more usage info, see class ContourClassifier
'''
FEATURES = ['B', 'G', 'R', 'H', 'S', 'V', 'L', 'A', 'B']
def __init__(self, classes, **kwargs):
super(GaussianColorClassifier, self).__init__(classes, **kwargs)
self.classifier = GaussianNB()
def get_features(self, img, mask):
mean = cv2.mean(img, mask)
mean = np.array([[mean[:3]]], dtype=np.uint8)
mean_hsv = cv2.cvtColor(mean, cv2.COLOR_BGR2HSV)
mean_lab = cv2.cvtColor(mean, cv2.COLOR_BGR2LAB)
features = np.hstack((mean.flatten(), mean_hsv.flatten(), mean_lab.flatten()))
return features
def classify_features(self, features):
return self.classifier.predict(features)
def feature_probabilities(self, features):
return self.classifier.predict_proba(features)
def train(self, features, classes):
self.classifier.fit(features, classes)
开发者ID:uf-mil,项目名称:software-common,代码行数:30,代码来源:color_classifier.py
注:本文中的sklearn.naive_bayes.GaussianNB类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论