本文整理汇总了Python中sklearn.datasets.load_svmlight_files函数的典型用法代码示例。如果您正苦于以下问题:Python load_svmlight_files函数的具体用法?Python load_svmlight_files怎么用?Python load_svmlight_files使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了load_svmlight_files函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_load_svmlight_files
def test_load_svmlight_files():
X_train, y_train, X_test, y_test = load_svmlight_files([datafile] * 2, dtype=np.float32)
assert_array_equal(X_train.toarray(), X_test.toarray())
assert_array_equal(y_train, y_test)
assert_equal(X_train.dtype, np.float32)
assert_equal(X_test.dtype, np.float32)
X1, y1, X2, y2, X3, y3 = load_svmlight_files([datafile] * 3, dtype=np.float64)
assert_equal(X1.dtype, X2.dtype)
assert_equal(X2.dtype, X3.dtype)
assert_equal(X3.dtype, np.float64)
开发者ID:kkuunnddaann,项目名称:scikit-learn,代码行数:11,代码来源:test_svmlight_format.py
示例2: classify_test
def classify_test(feature_list=[], classifiers=[], root_path='./'):
#load data set
datasets = []
for name in feature_list:
logging.log(logging.DEBUG, 'loading data: %s ...' % name)
filenames = tuple(['./feature/%s_%s' % (name, tag) for tag in ['train.txt', 'test.txt']])
X_train, y_train, X_test, y_test = load_svmlight_files(filenames)
datasets.append((name, X_train, y_train, X_test, y_test))
#make directory to store results
result_path = path.join(root_path, 'results')
if path.exists(result_path):
assert path.isdir(result_path), 'data must be a directory!'
else:
system('mkdir ' + result_path)
for clf in classifiers:
for feature in datasets:
clf_name = clf.__class__.__name__
feature_name, X_train, y_train, X_test, y_test = feature
combine_name = feature_name+'_'+clf_name
info = {}
logging.log(logging.DEBUG, 'classification test: %s ...' % combine_name)
logging.log(logging.DEBUG, 'training...')
t0 = time()
clf.fit(X_train, y_train)
t1 = time()
info['training_time'] = t1-t0
logging.log(logging.DEBUG, 'testing on training...')
pred_y = clf.predict(X_train)
training_acc = accuracy_score(y_train, pred_y)
logging.log(logging.DEBUG, 'error rate on training set: %f' % (1.0 - training_acc))
info['training_error'] = 1.0 - training_acc
fout = open(path.join(result_path, combine_name+'_train.txt'), 'w')
for y in pred_y:
print >>fout, y
fout.close()
logging.log(logging.DEBUG, 'testing...')
t0 = time()
pred_y = clf.predict(X_test)
t1 = time()
info['test_time'] = t1-t0
test_acc = accuracy_score(y_test, pred_y)
logging.log(logging.DEBUG, 'error rate on test set: %f' % (1.0 - test_acc))
info['test_error'] = 1.0 - test_acc
fout = open(path.join(result_path, combine_name+'_test.txt'), 'w')
for y in pred_y:
print >>fout, y
fout.close()
yield combine_name, feature_name, clf_name, info
开发者ID:defaultstr,项目名称:movie_review,代码行数:55,代码来源:base.py
示例3: pCoverX
def pCoverX(featureFamily):
os.chdir("C:\\Users\\Vaibhav\\Desktop\\dir_data\\dir_data\\train")
path = "C:\\Users\\Vaibhav\\Desktop\\dir_data\\dir_data\\"
data_df = pd.DataFrame()
n_guass = 2
train_post_array = []
test_post_array = []
val_post_array = []
train_entropy_array = []
test_entropy_array = []
val_entropy_array = []
fileType = featureFamily+'*.gz'
for file in glob.glob(fileType):
print(file)
X_train, y_train, X_test, y_test,X_val, y_val = load_svmlight_files((gzip.open(path+"train\\"+file), gzip.open(path+"test\\"+file),gzip.open(path+"validation\\"+file)))
#X_train, y_train, X_test, y_test, X_val, y_val = load_svmlight_files(("train\\vision_cuboids_histogram.txt", "test\\vision_cuboids_histogram.txt","validation\\vision_cuboids_histogram.txt"))
X_train = X_train[y_train!=31]
X_test = X_test[y_test!=31]
X_val = X_val[y_val!=31]
y_train = y_train[y_train!=31]
y_test = y_test[y_test!=31]
y_val = y_val[y_val!=31]
#========================= Feature Selection using Variance Thresold =============================================================
X_train_new, X_test_new , X_val_new = featureSelection(X_train,X_test,X_val,y_train, log=True,tech = 'LinearSVC')
#========================= Mixture of Guassian ============================================================
train_prob,test_prob,val_prob = pXoverC(X_train_new, y_train, X_test_new, y_test, X_val_new, y_val, n_guass)
#========================= Calculating Prior, Posterior and Entropy ============================================================
prr = prior(y_train)
train_post = posterior(train_prob,prr)
train_entropy = entropy(train_post)
train_post_array.append(train_post)
train_entropy_array.append(train_entropy)
test_post = posterior(test_prob,prr)
test_entropy = entropy(test_post)
test_post_array.append(test_post)
test_entropy_array.append(test_entropy)
val_post = posterior(val_prob,prr)
val_entropy = entropy(val_post)
val_post_array.append(val_post)
val_entropy_array.append(val_entropy)
train_acc,c_mat = checkAccuracy(train_post,y_train)
test_acc,c_mat = checkAccuracy(test_post,y_test)
val_acc,c_mat = checkAccuracy(val_post,y_val)
temp = pd.DataFrame([[file,train_acc,test_acc,val_acc]])
data_df = data_df.append(temp,ignore_index =True)
return train_post_array,test_post_array,val_post_array,train_entropy_array,test_entropy_array,val_entropy_array,data_df
开发者ID:sahuvaibhav,项目名称:Capstone,代码行数:53,代码来源:test4.py
示例4: test_load_zero_based_auto
def test_load_zero_based_auto():
data1 = "-1 1:1 2:2 3:3\n"
data2 = "-1 0:0 1:1\n"
f1 = BytesIO(data1)
X, y = load_svmlight_file(f1, zero_based="auto")
assert_equal(X.shape, (1, 3))
f1 = BytesIO(data1)
f2 = BytesIO(data2)
X1, y1, X2, y2 = load_svmlight_files([f1, f2], zero_based="auto")
assert_equal(X1.shape, (1, 4))
assert_equal(X2.shape, (1, 4))
开发者ID:kkuunnddaann,项目名称:scikit-learn,代码行数:13,代码来源:test_svmlight_format.py
示例5: test_load_with_qid
def test_load_with_qid():
# load svmfile with qid attribute
data = """
3 qid:1 1:0.53 2:0.12
2 qid:1 1:0.13 2:0.1
7 qid:2 1:0.87 2:0.12"""
X, y = load_svmlight_file(BytesIO(data), query_id=False)
assert_array_equal(y, [3, 2, 7])
assert_array_equal(X.todense(), [[0.53, 0.12], [0.13, 0.1], [0.87, 0.12]])
res1 = load_svmlight_files([BytesIO(data)], query_id=True)
res2 = load_svmlight_file(BytesIO(data), query_id=True)
for X, y, qid in (res1, res2):
assert_array_equal(y, [3, 2, 7])
assert_array_equal(qid, [1, 1, 2])
assert_array_equal(X.todense(), [[0.53, 0.12], [0.13, 0.1], [0.87, 0.12]])
开发者ID:kkuunnddaann,项目名称:scikit-learn,代码行数:15,代码来源:test_svmlight_format.py
示例6: select_feature
def select_feature(trainfilename, testfilename):
def returnCHI(X, y):
return chivalue
X_train, y_train, X_test, y_test = load_svmlight_files((trainfilename, testfilename), multilabel=True)
featureNum = X_train.get_shape()[1]
chivalue = chi2(X_train, y_train)
step = featureNum / 20;
for i in range(1, 21):
selectNum = step * i
print "selecting", selectNum, "features"
selector = SelectKBest(chi2, k=selectNum)
X_train_new = selector.fit_transform(X_train, y_train)
X_test_new= selector.transform(X_test)
dump_svmlight_file(X_train_new, y_train, trainfilename + '_' + str(selectNum), zero_based = False)
dump_svmlight_file(X_test_new, y_test, testfilename + '_' + str(selectNum), zero_based = False)
开发者ID:junjiek,项目名称:cmu-exp,代码行数:17,代码来源:chi_square_max_multilabel.py
示例7: select_feature_multilabel
def select_feature_multilabel(trainfilename, testfilename):
def returnIG(X, y):
return randval, p
X_train, y_train, X_test, y_test = load_svmlight_files((trainfilename, testfilename), multilabel=True)
featurenum = X_train.shape[1]
randval = randomValues(X_train, y_train)
p = np.ones((featurenum,1), int)
p.reshape(featurenum,1)
featureNum = X_train.get_shape()[1]
step = featureNum / 20;
for i in range(1, 21):
selectNum = step * i
print "selecting", selectNum, "features"
selector = SelectKBest(returnIG, k=selectNum)
X_train_new = selector.fit_transform(X_train, y_train)
X_test_new = selector.transform(X_test)
dump_svmlight_file(X_train_new, y_train, trainfilename + '_' + str(selectNum), zero_based = False)
dump_svmlight_file(X_test_new, y_test, testfilename + '_' + str(selectNum), zero_based = False)
开发者ID:junjiek,项目名称:cmu-exp,代码行数:20,代码来源:random_selection_ml.py
示例8: select_feature
def select_feature(trainfilename, testfilename):
def returnIG(X, y):
return ig, p
X_train, y_train, X_test, y_test = load_svmlight_files((trainfilename, testfilename))
featurenum = X_train.shape[1]
ig = information_gain(X_train, y_train)
ig = ig.reshape(featurenum,)
p = np.ones((1,featurenum), int)
p.reshape(featurenum,1)
featureNum = X_train.get_shape()[1]
step = featureNum / 20;
for i in range(1, 21):
selectNum = step * i
print "selecting", selectNum, "features"
selector = SelectKBest(returnIG, k=selectNum)
X_train_new = selector.fit_transform(X_train, y_train)
X_test_new = selector.transform(X_test)
sklearn.datasets.dump_svmlight_file(X_train_new, y_train, trainfilename + '_' + str(selectNum), zero_based = False)
sklearn.datasets.dump_svmlight_file(X_test_new, y_test, testfilename + '_' + str(selectNum), zero_based = False)
开发者ID:junjiek,项目名称:cmu-exp,代码行数:21,代码来源:ig_selection.py
示例9: run_nblcr
def run_nblcr(train, test, outfn, grams='123', clf=LogisticRegression(class_weight="auto")):
f_train = outfn + '-train.txt'
f_test = outfn + '-test.txt'
ngram = [int(i) for i in grams]
ptrain = []
ntrain = []
for _, row in train.iterrows():
if row['label'] == 1:
ptrain.append(tokenize(row['text'], ngram))
elif row['label'] == 0:
ntrain.append(tokenize(row['text'], ngram))
pos_counts = build_dict(ptrain, ngram)
neg_counts = build_dict(ntrain, ngram)
dic, r = compute_ratio(pos_counts, neg_counts)
generate_svmlight_file(train, dic, r, ngram, f_train)
generate_svmlight_file(test, dic, r, ngram, f_test)
X_train, y_train, X_test, _ = load_svmlight_files((f_train, f_test))
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
try:
y_prob = clf.predict_proba(X_test)
except:
# for svm with probability output
clf.set_params(probability=True)
y_prob_pos = clf.predict(X_test)
y_prob_neg = np.ones(X_test.shape[0]) - y_prob_pos
y_prob = np.column_stack((y_prob_neg, y_prob_pos))
return y_pred, y_prob
开发者ID:bluedrone,项目名称:psb-adr,代码行数:37,代码来源:maxent_nblcr.py
示例10: load_amazon
def load_amazon(source_name, target_name, data_folder=None, verbose=False):
"""
Load the amazon sentiment datasets from svmlight format files
inputs:
source_name : name of the source dataset
target_name : name of the target dataset
data_folder : path to the folder containing the files
outputs:
xs : training source data matrix
ys : training source label vector
xt : training target data matrix
yt : training target label vector
xtest : testing target data matrix
ytest : testing target label vector
"""
if data_folder is None:
data_folder = 'data/'
source_file = data_folder + source_name + '_train.svmlight'
target_file = data_folder + target_name + '_train.svmlight'
test_file = data_folder + target_name + '_test.svmlight'
if verbose:
print('source file:', source_file)
print('target file:', target_file)
print('test file: ', test_file)
xs, ys, xt, yt, xtest, ytest = load_svmlight_files([source_file, target_file, test_file])
# Convert sparse matrices to numpy 2D array
xs, xt, xtest = (np.array(X.todense()) for X in (xs, xt, xtest))
# Convert {-1,1} labels to {0,1} labels
ys, yt, ytest = (np.array((y + 1) / 2, dtype=int) for y in (ys, yt, ytest))
return xs, ys, xt, yt, xtest, ytest
开发者ID:GRAAL-Research,项目名称:domain_adversarial_neural_network,代码行数:37,代码来源:experiments_amazon.py
示例11: LDA
probs.append(score_i)
return probs
parser = argparse.ArgumentParser()
#parser.add_argument( "train_file" )
parser.add_argument( "-p", "--predict", help = "if is to make predictions in a test file", default = None )
parser.add_argument( "-t", "--predict_file", help = "if is to make predictions in a test file", default = None )
parser.add_argument( "-c", "--cross_validation", help = "if have make cross-validation", default = None )
args = parser.parse_args()
classifier = LDA(n_components=2)
#classifier = RandomForestClassifier()
X_url, y, X_title, y_t, X_body, y_b, X_a, y_a = load_svmlight_files(("url_train.txt", "title_train.txt", "body_train.txt", "all_train.txt"))
X = {"url":X_url, "title": X_title, "body": X_body, "all": X_a}
if(args.predict):
print "Predicting"
T_url, t, T_title, y_t, T_body, y_b, T_a, y_a = load_svmlight_files(("url_test.txt", "title_test.txt", "body_test.txt", "all_test.txt"))
T = {"url": T_url, "title": T_title, "body": T_body, "all": T_a}
probs = predict(classifier, X, y, T, t)
f = open("sub_31-08_01h15.txt","w")
f.write("label\n")
for p in probs:
line = "%f\n" % p
f.write(line)
f.close()
elif(args.cross_validation):
开发者ID:rloliveirajr,项目名称:kaggle_stumbleupon,代码行数:31,代码来源:train.py
示例12: svm_skin
def svm_skin(X_train, y_train, X_test, y_test):
"""Learn the skin data sets with SVM with Linear kernel.
X_*: Samples.
y_*: labels.
"""
print 'SVM w/ Linear kernel'
clf = svm.LinearSVC()
clf.fit(X_train, y_train)
score = 100 * clf.score(X_test.toarray(), y_test)
print 'SVM score: %.2f%%' % score
return score
if __name__ == '__main__':
# `data_size` is an integer which controls how big the data set is.
# Use none for to use the whole dataset.
# split_libsvm_dataset(path='skin.txt', data_size=None)
# Load train and test samples (X) + labels (y).
X_train, y_train, X_test, y_test = load_svmlight_files(
('skin-train.libsvm', 'skin-test.libsvm'))
svm_skin(X_train, y_train, X_test, y_test)
# iterations, scores = adaboost_skin(X_train, y_train, X_test, y_test)
# graph = plot_success_per_size(iterations, scores)
# show()
开发者ID:oryband,项目名称:homework,代码行数:29,代码来源:q5.py
示例13: test_load_invalid_file2
def test_load_invalid_file2():
load_svmlight_files([datafile, invalidfile, datafile])
开发者ID:kkuunnddaann,项目名称:scikit-learn,代码行数:2,代码来源:test_svmlight_format.py
示例14: load_svmlight_files
# remove axis spines
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.spines["left"].set_visible(False)
plt.grid()
plt.tight_layout
plt.show()
os.chdir("F:\Analytics\ISB Study\Capstone\dir_data\dir_data")
X_train, y_train, X_test, y_test, X_val, y_val = load_svmlight_files(("train\\vision_cuboids_histogram.txt", "test\\vision_cuboids_histogram.txt","validation\\vision_cuboids_histogram.txt"))
np.unique(y_train)
sklearn_lda = LDA(n_components=30)
X_lda_sklearn = sklearn_lda.fit_transform(X_train.todense(), y_train)
plot_scikit_lda(X_lda_sklearn, title='LDA vision_cuboids_histogram')
# PCA
sklearn_pca = sklearnPCA(n_components=30)
X_pca = sklearn_pca.fit_transform(X_train.todense())
plot_pca(title = 'PCA vision_cuboids_histogram')
#
X_ldapca_sklearn = sklearn_pca.fit_transform(X_lda_sklearn)
plot_scikit_lda(X_ldapca_sklearn, title='LDA+PCA LDA vision_cuboids_histogram', mirror=(-1))
开发者ID:sahuvaibhav,项目名称:Capstone,代码行数:29,代码来源:pcalda.py
示例15: documentFrequency
from sklearn.datasets import load_svmlight_files
def documentFrequency(X, y):
featurenum = X.shape[1]
s = sum(X).toarray()
p = np.ones((1, featurenum), int)
return s.reshape(featurenum), p.reshape(featurenum, 1)
if __name__ == "__main__":
if len(sys.argv) != 4:
print "Usage: python threshold trainfilename testfilename"
exit(1)
trainfilename = sys.argv[2]
testfilename = sys.argv[3]
X_train, y_train, X_test, y_test = load_svmlight_files((trainfilename, testfilename))
df = sum(X_train).toarray()[0]
cnt = 0
threshold = int(sys.argv[1])
for i in range(0, len(df)):
if df[i] >= threshold:
cnt = cnt + 1
selector = SelectKBest(documentFrequency, k=cnt)
X_train = selector.fit_transform(X_train, y_train)
X_test = selector.transform(X_test)
sklearn.datasets.dump_svmlight_file(X_train, y_train, trainfilename + "_" + str(cnt), zero_based=False)
sklearn.datasets.dump_svmlight_file(X_test, y_test, testfilename + "_" + str(cnt), zero_based=False)
print cnt, "features selected"
开发者ID:junjiek,项目名称:cmu-exp,代码行数:30,代码来源:filterbyDF.py
示例16: print
===================================================
In this example we show how to handle LIBSVM file format.
"""
from sklearn.datasets import load_svmlight_files
import sklearn.metrics
import jubakit
from jubakit.classifier import Classifier, Dataset, Config
# Load LIBSVM files.
# Note that these example files are not included in this repository.
# You can fetch them from: https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass.html#news20
print("Loading LIBSVM files...")
(train_X, train_y, test_X, test_y) = load_svmlight_files(['news20', 'news20.t'])
# Create a Train Dataset.
print("Creating train dataset...")
train_ds = Dataset.from_matrix(train_X, train_y)
# Create a Test Dataset
print("Creating test dataset...")
test_ds = Dataset.from_matrix(test_X, test_y)
# Create a Classifier Service
classifier = Classifier.run(Config())
# Train the classifier.
print("Training...")
for (idx, _) in classifier.train(train_ds):
开发者ID:jubatus,项目名称:jubakit,代码行数:31,代码来源:classifier_libsvm.py
示例17: scale
model = Word2Vec.load(model_name)
print "Creating the w2v vectors...\n"
X_train_w2v = scale(getAvgFeatureVecs(getCleanReviews(train), model, n_dim))
X_test_w2v = scale(getAvgFeatureVecs(getCleanReviews(test), model, n_dim))
print "Generating the svmlight-format files...\n"
generate_svmlight_files(train, test, '123', '../data/nbsvm')
print "Creating the nbsvm...\n"
files = ("../data/nbsvm-train.txt", "../data/nbsvm-test.txt")
X_train_nbsvm, _, X_test_nbsvm, _ = load_svmlight_files(files)
print "Combing the bag of words and the w2v vectors...\n"
X_train_bwv = hstack([X_train_bow, X_train_w2v])
X_test_bwv = hstack([X_test_bow, X_test_w2v])
print "Combing the bag of words and the d2v vectors...\n"
X_train_bdv = hstack([X_train_bow, X_train_d2v])
X_test_bdv = hstack([X_test_bow, X_test_d2v])
print "Checking the dimension of training vectors"
开发者ID:StevenLOL,项目名称:kaggle-word2vec-movie-reviews,代码行数:30,代码来源:predict.py
示例18: range
x = range(len(data))
plt.xticks(x,data[data.columns[0]],rotation='vertical')
for i in range(1,len(data.columns)):
plt.plot(x,data[data.columns[i]])
plt.legend(data.columns[1:], loc='upper left')
plt.xlabel(data.columns[0])
plt.ylabel('Accuracy')
plt.title('Accuracy plot for ' + fileName)
plt.show()
#=============================================== Main =================================================================
os.chdir("F:\Analytics\ISB Study\Capstone\dir_data\Capstone")
#os.chdir("C:\Users\Vaibhav\Desktop\dir_data\dir_data")
X_train, y_train, X_test, y_test, X_val, y_val = load_svmlight_files(("train\\vision_hist_motion_estimate.txt", "test\\vision_hist_motion_estimate.txt","validation\\vision_hist_motion_estimate.txt"))
y_train = y_train[y_train!=31]
y_test = y_test[y_test!=31]
y_val = y_val[y_val!=31]
#y_train = y_train[y_train <=2]
#y_test = y_test[y_test<=2]
#y_val = y_val[y_val<=2]
#================ First Level of Fusion - Audio ===============================
n_guass =5
nClass = 30
train_post_array,test_post_array,val_post_array,train_entropy_array,test_entropy_array,val_entropy_array,data_df = pCoverX('audio',n_guass,tech = 'LinearSVC',C= 0.5,nClass=30)
data_df.columns = ['filename','train Accuracy','test Accuracy','validation Accuracy']
data_df.to_csv('Audio_preComb_Acc0801.csv',index=False)
audioComb1Acc = pd.DataFrame()
for alpha in [1,2,3,4,5]:
开发者ID:sahuvaibhav,项目名称:Capstone,代码行数:31,代码来源:EntropyFusion.py
示例19: PCA
import numpy as np
from scipy.sparse import csr_matrix
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.svm import LinearSVC
from mlclas.ensemble import BinaryRelevance, ClassifierChains, CalibratedLabelRanking, RandomKLabelsets, MLKNN
from mlclas.tree import MLDecisionTree
from mlclas.neural import BPMLL
from mlclas.svm import RankingSVM
from mlclas.stats import UniversalMetrics
files = ['datasets/scene_train', 'datasets/scene_test']
# load files
data = datasets.load_svmlight_files(files, multilabel=True)
train_data = data[0]
train_target = np.array(MultiLabelBinarizer().fit_transform(data[1]))
test_data = data[2]
test_target = data[3]
# feature extraction using PCA
feature_size = train_data.shape[1]
pca = PCA(n_components=(feature_size * 10) // 100)
train_data_trans = csr_matrix(pca.fit_transform(train_data.todense()))
test_data_trans = csr_matrix(pca.transform(test_data.todense()))
"""
train and predict using any of following scripts:
1. result = BinaryRelevance(LinearSVC()).fit(train_data, train_target).predict(test_data)
开发者ID:markzy,项目名称:MLCLAS,代码行数:31,代码来源:ex1.py
示例20: textpCoverX
def textpCoverX():
#os.chdir("F:\\Analytics\\ISB Study\\Capstone\\dir_data\\dir_data\\train")
#path = "F:\\Analytics\\ISB Study\\Capstone\\dir_data\\dir_data\\"
path = "C:\\Users\\Vaibhav\\Desktop\\dir_data\\dir_data\\"
os.chdir(path+'train')
data_df = pd.DataFrame()
train_post_array = []
test_post_array = []
val_post_array = []
train_entropy_array = []
test_entropy_array = []
val_entropy_array = []
for file in glob.glob("text*.gz"):
print(file)
X_train, y_train, X_test, y_test,X_val, y_val = load_svmlight_files((gzip.open(path+"train\\"+file), gzip.open(path+"test\\"+file),gzip.open(path+"validation\\"+file)))
X_train = X_train[y_train!=31]
X_test = X_test[y_test!=31]
X_val = X_val[y_val!=31]
y_train = y_train[y_train!=31]
y_test = y_test[y_test!=31]
y_val = y_val[y_val!=31]
svmClf = Pipeline([ ('clf', SGDClassifier(loss='log', penalty='l1',alpha=1e-6, n_iter=10, random_state=88)),])
svmClf = svmClf.fit(X_train, y_train)
predicted_train = svmClf.predict(X_train)
train_acc = np.mean(predicted_train == y_train)
print "Train Model Accuracy %f" % train_acc
train_post = pd.DataFrame(svmClf.predict_proba(X_train))
predicted_test = svmClf.predict(X_test)
test_acc = np.mean(predicted_test == y_test)
print "Test Model Accuracy %f" % test_acc
test_post = pd.DataFrame(svmClf.predict_proba(X_test))
predicted_val = svmClf.predict(X_val)
val_acc = np.mean(predicted_val == y_val)
print "Validation Model Accuracy %f" % val_acc
val_post = pd.DataFrame(svmClf.predict_proba(X_val))
train_entropy = entropy(train_post)
train_post_array.append(train_post)
train_entropy_array.append(train_entropy)
test_entropy = entropy(test_post)
test_post_array.append(test_post)
test_entropy_array.append(test_entropy)
val_entropy = entropy(val_post)
val_post_array.append(val_post)
val_entropy_array.append(val_entropy)
temp = pd.DataFrame([[file,train_acc,test_acc,val_acc]])
data_df = data_df.append(temp,ignore_index =True)
return train_post_array,test_post_array,val_post_array,train_entropy_array,test_entropy_array,val_entropy_array,data_df
开发者ID:sahuvaibhav,项目名称:Capstone,代码行数:64,代码来源:EntropyFusion.py
注:本文中的sklearn.datasets.load_svmlight_files函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论