本文整理汇总了Python中sklearn.linear_model.RidgeClassifier类的典型用法代码示例。如果您正苦于以下问题:Python RidgeClassifier类的具体用法?Python RidgeClassifier怎么用?Python RidgeClassifier使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了RidgeClassifier类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: train_and_predict_m8
def train_and_predict_m8 (train, test, labels) :
## Apply basic concatenation + stemming
trainData, testData = stemmer_clean (train, test, stemmerEnableM7, stemmer_type = 'porter')
## TF-IDF transform with sub-linear TF and stop-word removal
tfv = TfidfVectorizer(min_df = 5, max_features = None, strip_accents = 'unicode', analyzer = 'word', token_pattern = r'\w{1,}', ngram_range = (1, 5), smooth_idf = 1, sublinear_tf = 1, stop_words = ML_STOP_WORDS)
tfv.fit(trainData)
X = tfv.transform(trainData)
X_test = tfv.transform(testData)
## Create the classifier
print ("Fitting Ridge Classifer...")
clf = RidgeClassifier(class_weight = 'auto', alpha = 1, normalize = True)
## Create a parameter grid to search for best parameters for everything in the pipeline
param_grid = {'alpha' : [0.1, 0.3, 1, 3, 10], 'normalize' : [True, False]}
## Predict model with best parameters optimized for quadratic_weighted_kappa
if (gridSearch) :
model = perform_grid_search (clf, param_grid, X, labels)
pred = model.predict(X_test)
else :
clf.fit(X, labels)
pred = clf.predict(X_test)
return pred
开发者ID:sathishrvijay,项目名称:Kaggle-CrowdFlowerSRR,代码行数:25,代码来源:classifier.py
示例2: retrain_models
def retrain_models(username):
train_x, train_y, body_x, body_y, head_x, head_y = model_retriever.retrieve_data_db(username)
b_train_x = []
b_train_y = numpy.concatenate([body_y, train_y])
for msg in (body_x + train_x):
b_train_x.append(extract_body_features(msg))
body_vec = TfidfVectorizer(norm="l2")
b_train_x = body_vec.fit_transform(b_train_x)
h_train_x = []
h_train_y = numpy.concatenate([head_y, train_y])
for msg in (head_x + train_x):
h_train_x.append(extract_header_features(msg))
head_vec = DictVectorizer()
h_train_x = head_vec.fit_transform(h_train_x)
body_model = LinearSVC(loss='l2', penalty="l2", dual=False, tol=1e-3)
head_model = RidgeClassifier(tol=1e-2, solver="lsqr")
body_model.fit(b_train_x, b_train_y)
head_model.fit(h_train_x, h_train_y)
print("Finished training models for "+username+"...")
store_models(username, body_vec, body_model, head_vec, head_model)
开发者ID:dylanrhodes,项目名称:sigma,代码行数:30,代码来源:offline_updater.py
示例3: run
def run(input_train, input_test, output_name):
"""
Takes a file path as input, a file path as output, and produces a sorted csv of
item IDs for Kaggle submission
-------
input_train : 'full path of the training file'
input_test : 'full path of the testing file'
output_name : 'full path of the output file'
"""
data = pd.read_table(input_train)
test = pd.read_table(input_test)
testItemIds = test.itemid
response = data.is_blocked
dummies = sparse.csc_matrix(pd.get_dummies(data.subcategory))
pretestdummies = pd.get_dummies(test.subcategory)
testdummies = sparse.csc_matrix(pretestdummies.drop(['Растения', 'Товары для компьютера'],axis=1))
words = np.array(data.description,str)
testwords = np.array(test.description,str)
del data, test
vect = text.CountVectorizer(decode_error = u'ignore', strip_accents='unicode', ngram_range=(1,2))
corpus = np.concatenate((words, testwords))
vect.fit(corpus)
counts = vect.transform(words)
features = sparse.hstack((dummies,counts))
clf = RidgeClassifier()
clf.fit(features, response)
testcounts = vect.transform(testwords)
testFeatures = sparse.hstack((testdummies,testcounts))
predicted_scores = clf.predict_proba(testFeatures).T[1]
f = open(output_name,'w')
f.write("id\n")
for pred_score, item_id in sorted(zip(predicted_scores, testItemIds), reverse = True):
f.write("%d\n" % (item_id))
f.close()
开发者ID:eyedvabny,项目名称:CDIPS-WS-2014,代码行数:35,代码来源:ridge_wordbag.py
示例4: validate
def validate(input_train, rows=True, test=0.25):
"""
Takes file as input and returns classification report, average precision, and
AUC for a bigram model. By default, loads all rows of a dataset, trains on .75,
and tests on .25.
----
input_train : 'full path of the file you are loading'
rows : True - loads all rows; insert an int for specific number of rows
test : float proportion of dataset used for testing
"""
if rows == True:
data = pd.read_table(input_train)
else:
data = pd.read_table(input_train, nrows = rows)
response = data.is_blocked
dummies = sparse.csc_matrix(pd.get_dummies(data.subcategory))
words = np.array(data.description,str)
del data
vect = text.CountVectorizer(decode_error = u'ignore',strip_accents='unicode',ngram_range=(1,2))
counts = vect.fit_transform(words)
features = sparse.hstack((dummies,counts))
features_train, features_test, target_train, target_test = train_test_split(features, response, test_size = test)
clf = RidgeClassifier()
clf.fit(features_train, target_train)
prediction = clf.predict(features_test)
return classification_report(target_test, prediction), average_precision_score(target_test, prediction), roc_auc_score(target_test, prediction)
开发者ID:eyedvabny,项目名称:CDIPS-WS-2014,代码行数:26,代码来源:ridge_wordbag.py
示例5: test_default_configuration_classify
def test_default_configuration_classify(self):
for i in range(2):
X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits',
make_sparse=False)
configuration_space = ExtraTreesPreprocessor.get_hyperparameter_search_space()
default = configuration_space.get_default_configuration()
preprocessor = ExtraTreesPreprocessor(random_state=1,
**{hp_name: default[hp_name]
for hp_name in default})
preprocessor.fit(X_train, Y_train)
X_train_trans = preprocessor.transform(X_train)
X_test_trans = preprocessor.transform(X_test)
# fit a classifier on top
classifier = RidgeClassifier()
predictor = classifier.fit(X_train_trans, Y_train)
predictions = predictor.predict(X_test_trans)
accuracy = sklearn.metrics.accuracy_score(predictions, Y_test)
self.assertAlmostEqual(accuracy, 0.87310261080752882, places=2)
开发者ID:dongzhixiang,项目名称:paramsklearn,代码行数:19,代码来源:test_extra_trees.py
示例6: test_default_configuration_classify
def test_default_configuration_classify(self):
for i in range(5):
X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits',
make_sparse=False)
configuration_space = KernelPCA.get_hyperparameter_search_space()
default = configuration_space.get_default_configuration()
preprocessor = KernelPCA(random_state=1,
**{hp_name: default[hp_name] for hp_name in
default if default[hp_name] is not None})
preprocessor.fit(X_train, Y_train)
X_train_trans = preprocessor.transform(X_train)
X_test_trans = preprocessor.transform(X_test)
# fit a classifier on top
classifier = RidgeClassifier()
predictor = classifier.fit(X_train_trans, Y_train)
predictions = predictor.predict(X_test_trans)
accuracy = sklearn.metrics.accuracy_score(predictions, Y_test)
self.assertAlmostEqual(accuracy, 0.096539162112932606)
开发者ID:Allen1203,项目名称:auto-sklearn,代码行数:19,代码来源:test_kernel_pca.py
示例7: test_default_configuration_classify
def test_default_configuration_classify(self):
for i in range(2):
X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits',
make_sparse=True)
configuration_space = TruncatedSVD.get_hyperparameter_search_space()
default = configuration_space.get_default_configuration()
preprocessor = TruncatedSVD(random_state=1,
**{hp_name: default[hp_name]
for hp_name in
default if default[
hp_name] is not None})
preprocessor.fit(X_train, Y_train)
X_train_trans = preprocessor.transform(X_train)
X_test_trans = preprocessor.transform(X_test)
# fit a classifier on top
classifier = RidgeClassifier()
predictor = classifier.fit(X_train_trans, Y_train)
predictions = predictor.predict(X_test_trans)
accuracy = sklearn.metrics.accuracy_score(predictions, Y_test)
self.assertAlmostEqual(accuracy, 0.44201578627808136, places=2)
开发者ID:dongzhixiang,项目名称:paramsklearn,代码行数:21,代码来源:test_truncatedSVD.py
示例8: get_optimal_blend_weigth
def get_optimal_blend_weigth(exp_, best_param_,
folder, fname, model_fname):
clf = RidgeClassifier()
X_test, y_test = exp_.get_test_data()
clf.set_params(**best_param_)
clf.fit(X_test, y_test)
# dump2csv optimal linear weight
names = np.append(np.array(['intercept'], dtype='S100'), X_test.columns.values)
coefs = np.append(clf.intercept_, clf.coef_).astype(np.float64)
optimal_linear_weight = pd.DataFrame(coefs.reshape(1,len(coefs)), columns=names)
optimal_linear_weight.to_csv(os.path.join(Config.get_string('data.path'),
folder,
fname), index=False)
# dump2cpkle for ridge model
model_fname = os.path.join(Config.get_string('data.path'), folder, model_fname)
with gzip.open(model_fname, 'wb') as gf:
cPickle.dump(clf, gf, cPickle.HIGHEST_PROTOCOL)
return True
开发者ID:Quasi-quant2010,项目名称:Stacking,代码行数:21,代码来源:run_ridge_grid_search.py
示例9: Predict
def Predict():
print('\nThere are %d new deals') % n_test
# Using the KNN classifier
clf_KNN = KNeighborsClassifier(n_neighbors=3) # KNN doesnot work even if k has been tuned
#clf_KNN = KNeighborsClassifier(n_neighbors=7)
#clf_KNN = KNeighborsClassifier(n_neighbors=11)
clf_KNN.fit(Corpus_train, Y_train)
Y_pred_KNN = clf_KNN.predict(Corpus_test)
print_rate(Y_test, Y_pred_KNN, n_test, 'KNNClassifier')
# Using the SVM classifier
clf_SVM = svm.SVC()
clf_SVM.fit(Corpus_train, Y_train)
Y_pred_SVM = clf_SVM.predict(Corpus_test)
print_rate(Y_test, Y_pred_SVM, n_test, 'SVMClassifier')
# Using the Ridge classifier
clf_RC = RidgeClassifier(tol=0.01, solver="lsqr")
#clf_RC = RidgeClassifier(tol=0.1, solver="lsqr")
clf_RC.fit(Corpus_train, Y_train)
Y_pred_RC = clf_RC.predict(Corpus_test)
print_rate(Y_test, Y_pred_RC, n_test, 'RidgeClassifier')
# won't consider Random Forests or Decision Trees beacause they work bad for high sparse dimensions
# Using the Multinomial Naive Bayes classifier
# I expect that this MNB classifier will do the best since it is designed for occurrence counts features
#clf_MNB = MultinomialNB(alpha=0.01) #smoothing parameter = 0.01 is worse than 0.1
clf_MNB = MultinomialNB(alpha=0.1)
#clf_MNB = MultinomialNB(alpha=0.3) #a big smoothing rate doesnot benefit the model
#clf_MNB = MultinomialNB(alpha=0.2) #or alpha = 0.05 can generate the best outcome
clf_MNB.fit(Corpus_train, Y_train)
Y_pred_MNB = clf_MNB.predict(Corpus_test)
print_rate(Y_test, Y_pred_MNB, n_test, 'MultinomialNBClassifier')
开发者ID:albingrace,项目名称:QianWan,代码行数:36,代码来源:test_3.py
示例10: get_classifier
def get_classifier(classifier):
if classifier["name"] == 'linear-ridge':
c = RidgeClassifier()
elif classifier["name"] == 'SVC':
c = SVC()
elif classifier["name"] == "l2-SVC":
c = L2KernelClassifier()
elif classifier["name"] == "fredholm":
c = L2FredholmClassifier()
elif classifier["name"] == "TSVM":
c = SVMLight()
elif classifier["name"] == "Lap-RLSC":
c = LapRLSC()
elif classifier["name"] == "fred_kernel_appr":
c = FredholmKernelApprClassifier()
else:
raise NameError('Not existing classifier: ' + classifier["name"] + '.')
c.set_params(**classifier["params"])
return c
开发者ID:queqichao,项目名称:FredholmLearning,代码行数:19,代码来源:classifier_help.py
示例11: KFold
# N: number for training examples; K: number of models in level 0
# X: feature matrix; y: result array; z_k: prediction result array for k's model
#
# Setup 10 fold cross validation
fold_num = 10
kf = KFold(n_samples, k=fold_num, indices=True)
# set number of neighbors for kNN
n_neighb = 13
# Brute-force implementation
clf_bNB = BernoulliNB(alpha=.01)
clf_mNB = MultinomialNB(alpha=.01)
clf_kNN = KNeighborsClassifier(n_neighbors=n_neighb)
clf_ridge = RidgeClassifier(tol=1e-1)
clf_SGD = SGDClassifier(alpha=.0001, n_iter=50, penalty="l2")
clf_lSVC = LinearSVC(loss='l2', penalty='l2', C=1000, dual=False, tol=1e-3)
clf_SVC = SVC(C=1024, kernel='rbf', degree=3, gamma=0.001, probability=True)
###############################################################################
# Stacking
#
# initialize empty y and z
print 'X_den shape: ', X_den.shape
print 'y shape: ', y.shape
n_categories = len(set(y))
z = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=float)
开发者ID:YuanhaoSun,项目名称:PPLearn,代码行数:31,代码来源:20_ensemble_stacking_prob.py
示例12: main
def main():
startCol = 0
endCol = 50 # max = 1775
train = csv_io.read_data("../Data/train.csv")
target = [x[0] for x in train][1:3000]
targetTest = [x[0] for x in train][3001:]
trainTest = [x[startCol+1:endCol+1] for x in train][3001:]
test = csv_io.read_data("../Data/test.csv")
test = [x[startCol:endCol] for x in test]
train = [x[startCol+1:endCol+1] for x in train][1:3000]
fo = open("knn_stats.txt", "a+")
rf = RidgeClassifier(alpha=0.01, fit_intercept=True, normalize=False, copy_X=True, tol=0.001)
rf.fit(train, target)
prob = rf.predict(trainTest) # changed from test
result = 100
probSum = 0
for i in range(0, len(prob)):
probX = prob[i] # [1]
if ( probX > 0.7):
probX = 0.7;
if ( probX < 0.3):
probX = 0.3;
print i, probSum, probX, target[i]
print target[i]*log(probX), (1-target[i])*log(1-probX)
probSum += targetTest[i]*log(probX)+(1-targetTest[i])*log(1-probX)
#print probSum
#print len(prob)
#print "C: ", 10**C, " gamma: " ,2**g
print -probSum/len(prob)
if ( -probSum/len(prob) < result ):
result = -probSum/len(prob)
predicted_probs = rf.predict(test) # was test
predicted_probs = ["%f" % x for x in predicted_probs]
csv_io.write_delimited_file("../Submissions/knn.csv", predicted_probs)
print "Generated Data!!"
#fo.write(str(5) + str(5)+ str(5));
fo.close()
#csv_io.write_delimited_file("../Submissions/rf_benchmark_test2.csv", predicted_probs)
#predicted_probs = rf.predict_proba(train) # changed from test
#predicted_probs = ["%f" % x[1] for x in predicted_probs]
#predicted_probs = rf.predict(train) # changed from test
#predicted_probs = ["%f" % x for x in predicted_probs]
#csv_io.write_delimited_file("../Submissions/rf_benchmark_train2.csv", predicted_probs)
var = raw_input("Enter to terminate.")
开发者ID:mb16,项目名称:Kaggle,代码行数:64,代码来源:ridge.py
示例13: KFold
# Notation:
# N: number for training examples; K: number of models in level 0
# X: feature matrix; y: result array; z_k: prediction result array for k's model
#
# Setup 10 fold cross validation
fold_num = 10
kf = KFold(n_samples, k=fold_num, indices=True)
# set number of neighbors for kNN
n_neighb = 19
# Brute-force implementation
clf_mNB = MultinomialNB(alpha=.01)
clf_kNN = KNeighborsClassifier(n_neighbors=n_neighb)
clf_ridge = RidgeClassifier(tol=1e-1)
clf_lSVC = LinearSVC(loss='l2', penalty='l2', C=0.5, dual=False, tol=1e-3)
clf_SVC = SVC(C=32, gamma=0.0625)
# clf_SGD = SGDClassifier(alpha=.0001, n_iter=50, penalty="l2")
# empty ndarrays for predication results z_kn
z_mNB = np.array([], dtype=np.int32)
z_kNN = np.array([], dtype=np.int32)
z_ridge = np.array([], dtype=np.int32)
z_lSVC = np.array([], dtype=np.int32)
z_SVC = np.array([], dtype=np.int32)
###############################################################################
# Stacking
#
开发者ID:YuanhaoSun,项目名称:PPLearn,代码行数:31,代码来源:05_Test_stacking_pred.py
示例14: KFold
# N: number for training examples; K: number of models in level 0
# X: feature matrix; y: result array; z_k: prediction result array for k's model
#
# Setup 10 fold cross validation
fold_num = 10
kf = KFold(n_samples, k=fold_num, indices=True)
# set number of neighbors for kNN
n_neighb = 19
# Brute-force implementation
clf_bNB = BernoulliNB(alpha=.01)
clf_mNB = MultinomialNB(alpha=.01)
clf_kNN = KNeighborsClassifier(n_neighbors=n_neighb)
clf_ridge = RidgeClassifier(tol=1e-1)
clf_lSVC = LinearSVC(loss='l2', penalty='l2', C=0.5, dual=False, tol=1e-3)
clf_SVC = SVC(C=32, gamma=0.0625, probability=True)
# clf_SGD = SGDClassifier(alpha=.0001, n_iter=50, penalty="l2")
###############################################################################
# Stacking
#
# initialize empty y and z
n_categories = len(set(y))
z = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=float)
# z = np.zeros( (n_samples, n_categories) , dtype=float)
# Test for 10 rounds using the results from 10 fold cross validations
for i, (train_index, test_index) in enumerate(kf):
开发者ID:YuanhaoSun,项目名称:PPLearn,代码行数:31,代码来源:04_01_Ensemble_Stacking_Prob_1010.py
示例15: train_test_split
#!/usr/bin/env python
"""
Ridge regression for Avito
"""
__author__ = "deniederhut"
__license__ = "GPL"
import numpy as np
import pandas as pd
from sklearn.linear_model import RidgeClassifier
from sklearn.metrics import classification_report
from sklearn.cross_validation import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.metrics import average_precision_score
data = pd.read_table('/Users/dillonniederhut/Desktop/avito_train.tsv',nrows=100000)
#replace with file path to your training data
features = pd.get_dummies(data.subcategory)
features_train, features_test, target_train, target_test =\
train_test_split(features, data.is_blocked, test_size = 0.25)
ridge = RidgeClassifier()
ridge.fit(features_train, target_train)
prediction = np.round(ridge.predict(features_test))
print classification_report(target_test, prediction)
print average_precision_score(target_test, prediction)
print roc_auc_score(target_test, prediction)
开发者ID:eyedvabny,项目名称:CDIPS-WS-2014,代码行数:27,代码来源:ridge_benchmark.py
示例16: open
if __name__ == "__main__":
filehandler = open(features_evaluation.SELECTED_FEATURES_CORPUS_CHI2, 'r')
corpus = pickle.load(filehandler)
dataset = Dataset(corpus=corpus)
X = dataset.get_train_x()
y = dataset.get_train_y()
scores_dict = defaultdict(list)
clf1 = LogisticRegression(C=0.05, random_state=1, class_weight='balanced')
clf2 = RandomForestClassifier(random_state=1)
clf3 = svm.SVC(C=0.35, class_weight='balanced')
clf4 = RidgeClassifier(alpha=2.5)
clf5 = AdaBoostClassifier(n_estimators=150)
eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('svm', clf3),
('rc', clf4), ('ab', clf5)],
voting='hard')
for clf, label in zip([clf1, clf2, clf3, clf4, clf5, eclf],
['Logistic Regression', 'Random Forest', 'SVM',
'Ridge Classifier', 'Ada boost', 'Ensemble']):
scores = cross_val_score(clf, X.toarray(), y, cv=5, scoring='f1_macro')
scores_dict[label].append(scores.mean())
print("f1_macro: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label))
X, y = dataset.get_resampled_train_X_y(kind='regular')
clf1.fit(X.toarray(), y)
clf2.fit(X.toarray(), y)
开发者ID:TvrtkoSternak,项目名称:TAR-project,代码行数:30,代码来源:depression_classifier_ensamble.py
示例17: RidgeClassifier
non_lemons = non_lemons.ix[random.sample(non_lemons.index, 6684)]
train = lemons.append(non_lemons)
#X = train.drop(['RefId','IsBadBuy','VehYear','Make','Model','Trim','SubModel','WheelTypeID','BYRNO'],axis=1)
#y = pd.Series(train['IsBadBuy']).values
target = pd.Series(train['IsBadBuy']).values
data = train.drop(['RefId','IsBadBuy','VehYear','Make','Model','Trim','SubModel','WheelTypeID','BYRNO'],axis=1)
x_train, x_test, y_train, y_test = cross_validation.train_test_split(data,target, test_size=.3)
# Subset the data so we have a more even data set
model = RidgeClassifier()
clf = model.fit(X,y)
Ridg_Class = clf.predict(X)
clf.score(X,y)
metrics.confusion_matrix(y, clf.predict(X))
print metrics.classification_report(y, clf.predict(X))
# GradientBoostingClassifier
from sklearn.ensemble import *
model = GradientBoostingClassifier()
# Train
clf = model.fit(x_train, y_train)
开发者ID:AkiraKane,项目名称:GA_Data_Science,代码行数:31,代码来源:lemons+11+2+2013.py
示例18: KNeighborsClassifier
# Using the KNN classifier
clf_KNN = KNeighborsClassifier(n_neighbors=3) # KNN doesnot work even if k has been tuned
#clf_KNN = KNeighborsClassifier(n_neighbors=7)
#clf_KNN = KNeighborsClassifier(n_neighbors=11)
clf_KNN.fit(Corpus_train, Y_train)
Y_pred_KNN = clf_KNN.predict(Corpus_test)
print_rate(Y_test, Y_pred_KNN, n_test, 'KNNClassifier')
# Using the SVM classifier
clf_SVM = svm.SVC()
clf_SVM.fit(Corpus_train, Y_train)
Y_pred_SVM = clf_SVM.predict(Corpus_test)
print_rate(Y_test, Y_pred_SVM, n_test, 'SVMClassifier')
# Using the Ridge classifier
clf_RC = RidgeClassifier(tol=0.01, solver="lsqr")
#clf_RC = RidgeClassifier(tol=0.1, solver="lsqr")
clf_RC.fit(Corpus_train, Y_train)
Y_pred_RC = clf_RC.predict(Corpus_test)
print_rate(Y_test, Y_pred_RC, n_test, 'RidgeClassifier')
# won't consider Random Forests or Decision Trees beacause they work bad for high sparse dimensions
# Using the Multinomial Naive Bayes classifier
# I expect that this MNB classifier will do the best since it is designed for occurrence counts features
#clf_MNB = MultinomialNB(alpha=0.01) #smoothing parameter = 0.01 is worse than 0.1
clf_MNB = MultinomialNB(alpha=0.1)
#clf_MNB = MultinomialNB(alpha=0.3) #a big smoothing rate doesnot benefit the model
#clf_MNB = MultinomialNB(alpha=0.2) #or alpha = 0.05 can generate the best outcome
clf_MNB.fit(Corpus_train, Y_train)
开发者ID:albingrace,项目名称:QianWan,代码行数:31,代码来源:task3_final.py
示例19: Eval
def Eval(XTrain, YTrain, XTest, YTest, clf, return_predicted_labels=False):
"""
Inputs:
XTrain - N by D matrix of training data vectors
YTrain - N by 1 matrix of training class labels
XTest - M by D matrix of testin data vectors
YTrain - M by 1 matrix of testing class labels
clstr - the clustering function
either the string = "KMeans" or "GMM"
or a sklearn clustering instance
with the methods .fit and
Outputs:
A tuple containing (in the following order):
Accuracy
Overall Precision
Overall Recall
Overall F1 score
Avg. Precision per class
Avg. Recall per class
F1 Score
Precision per class
Recall per class
F1 Score per class
(if return_predicted_labels)
predicted class labels for each row in XTest
"""
if type(clf) == str:
if 'ridge' in clf.lower():
clf = RidgeClassifier(tol=1e-2, solver="lsqr")
elif "perceptron" in clf.lower():
clf = Perceptron(n_iter=50)
elif "passive aggressive" in clf.lower() or 'passive-aggressive' in clf.lower():
clf = PassiveAggressiveClassifier(n_iter=50)
elif 'linsvm' in clf.lower() or 'linearsvm' in clf.lower() or 'linearsvc' in clf.lower():
clf = LinearSVC()
elif 'svm' in clf.lower() or 'svc' in clf.lower():
clf = SVC()
elif 'sgd' in clf.lower():
clf = SGDClassifier()
clf.fit(XTrain, YTrain)
YPred = clf.predict(XTest)
accuracy = sklearn.metrics.accuracy_score(YTest, YPred)
(overall_precision, overall_recall, overall_f1, support) = sklearn.metrics.precision_recall_fscore_support(YTest, YPred, average='micro')
(precision_per_class, recall_per_class, f1_per_class, support_per_class) = sklearn.metrics.precision_recall_fscore_support(YTest, YPred)
avg_precision_per_class = np.mean(precision_per_class)
avg_recall_per_class = np.mean(recall_per_class)
avg_f1_per_class = np.mean(f1_per_class)
del clf
if return_predicted_labels:
return (accuracy, overall_precision, overall_recall, overall_f1, avg_precision_per_class, avg_recall_per_class, avg_f1_per_class, precision_per_class, recall_per_class, f1_per_class, YPred)
else:
return (accuracy, overall_precision, overall_recall, overall_f1, avg_precision_per_class, avg_recall_per_class, avg_f1_per_class, precision_per_class, recall_per_class, f1_per_class)
开发者ID:nmonath,项目名称:NLPProject,代码行数:58,代码来源:SupervisedLearning.py
示例20: len
data = [ i for i in csv.reader(file(train_file, 'rb')) ]
data = data[1:] # remove header
random.shuffle(data)
X = np.array([ i[1:] for i in data ]).astype(float)
Y = np.array([ i[0] for i in data ]).astype(int)
train_cutoff = len(data) * 3/4
X_train = X[:train_cutoff]
Y_train = Y[:train_cutoff]
X_test = X[train_cutoff:]
Y_test = Y[train_cutoff:]
classifier = RidgeClassifier(normalize = True, alpha = 1)
classifier = classifier.fit(X_train, Y_train)
print 'Training error : %s' % (classifier.fit(X_train, Y_train).score(X_train, Y_train))
Y_predict = classifier.predict(X_test)
equal = 0
for i in xrange(len(Y_predict)):
if Y_predict[i] == Y_test[i]:
equal += 1
print 'Accuracy = %s' % (float(equal)/len(Y_predict))
开发者ID:log0,项目名称:digit_recognizer,代码行数:27,代码来源:ridge.py
注:本文中的sklearn.linear_model.RidgeClassifier类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论