• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python datasets.load_svmlight_files函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.datasets.load_svmlight_files函数的典型用法代码示例。如果您正苦于以下问题:Python load_svmlight_files函数的具体用法?Python load_svmlight_files怎么用?Python load_svmlight_files使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了load_svmlight_files函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_load_svmlight_files

def test_load_svmlight_files():
    X_train, y_train, X_test, y_test = load_svmlight_files([datafile] * 2, dtype=np.float32)
    assert_array_equal(X_train.toarray(), X_test.toarray())
    assert_array_equal(y_train, y_test)
    assert_equal(X_train.dtype, np.float32)
    assert_equal(X_test.dtype, np.float32)

    X1, y1, X2, y2, X3, y3 = load_svmlight_files([datafile] * 3, dtype=np.float64)
    assert_equal(X1.dtype, X2.dtype)
    assert_equal(X2.dtype, X3.dtype)
    assert_equal(X3.dtype, np.float64)
开发者ID:kkuunnddaann,项目名称:scikit-learn,代码行数:11,代码来源:test_svmlight_format.py


示例2: classify_test

def classify_test(feature_list=[], classifiers=[], root_path='./'):
    #load data set
    datasets = []
    for name in feature_list:
        logging.log(logging.DEBUG, 'loading data: %s ...' % name)
        filenames = tuple(['./feature/%s_%s' % (name, tag) for tag in ['train.txt', 'test.txt']])
        X_train, y_train, X_test, y_test = load_svmlight_files(filenames)
        datasets.append((name, X_train, y_train, X_test, y_test))

    #make directory to store results
    result_path = path.join(root_path, 'results')
    if path.exists(result_path):
        assert path.isdir(result_path), 'data must be a directory!'
    else:
        system('mkdir ' + result_path)

    for clf in classifiers:
        for feature in datasets:
            clf_name = clf.__class__.__name__
            feature_name, X_train, y_train, X_test, y_test = feature
            combine_name = feature_name+'_'+clf_name
            info = {}

            logging.log(logging.DEBUG, 'classification test: %s ...' % combine_name)

            logging.log(logging.DEBUG, 'training...')
            t0 = time()
            clf.fit(X_train, y_train)
            t1 = time()
            info['training_time'] = t1-t0

            logging.log(logging.DEBUG, 'testing on training...')
            pred_y = clf.predict(X_train)
            training_acc = accuracy_score(y_train, pred_y)
            logging.log(logging.DEBUG, 'error rate on training set: %f' % (1.0 - training_acc))
            info['training_error'] = 1.0 - training_acc
            fout = open(path.join(result_path, combine_name+'_train.txt'), 'w')
            for y in pred_y:
                print >>fout, y
            fout.close()

            logging.log(logging.DEBUG, 'testing...')
            t0 = time()
            pred_y = clf.predict(X_test)
            t1 = time()
            info['test_time'] = t1-t0
            test_acc = accuracy_score(y_test, pred_y)
            logging.log(logging.DEBUG, 'error rate on test set: %f' % (1.0 - test_acc))
            info['test_error'] = 1.0 - test_acc
            fout = open(path.join(result_path, combine_name+'_test.txt'), 'w')
            for y in pred_y:
                print >>fout, y
            fout.close()

            yield combine_name, feature_name, clf_name, info
开发者ID:defaultstr,项目名称:movie_review,代码行数:55,代码来源:base.py


示例3: pCoverX

def pCoverX(featureFamily):
    os.chdir("C:\\Users\\Vaibhav\\Desktop\\dir_data\\dir_data\\train")
    path = "C:\\Users\\Vaibhav\\Desktop\\dir_data\\dir_data\\"
    data_df = pd.DataFrame()
    n_guass = 2
    train_post_array = []
    test_post_array = []
    val_post_array = []
    train_entropy_array = []
    test_entropy_array = []
    val_entropy_array = []
    fileType = featureFamily+'*.gz'
    for file in glob.glob(fileType):
        print(file)
        X_train, y_train, X_test, y_test,X_val, y_val = load_svmlight_files((gzip.open(path+"train\\"+file), gzip.open(path+"test\\"+file),gzip.open(path+"validation\\"+file)))    
        #X_train, y_train, X_test, y_test, X_val, y_val = load_svmlight_files(("train\\vision_cuboids_histogram.txt", "test\\vision_cuboids_histogram.txt","validation\\vision_cuboids_histogram.txt"))
        X_train = X_train[y_train!=31]
        X_test = X_test[y_test!=31]
        X_val = X_val[y_val!=31]
        y_train = y_train[y_train!=31]
        y_test = y_test[y_test!=31]
        y_val = y_val[y_val!=31]
    #========================= Feature Selection using Variance Thresold =============================================================
        X_train_new, X_test_new , X_val_new = featureSelection(X_train,X_test,X_val,y_train, log=True,tech = 'LinearSVC')
    #========================= Mixture of Guassian ============================================================
        train_prob,test_prob,val_prob = pXoverC(X_train_new, y_train, X_test_new, y_test, X_val_new, y_val, n_guass)
    #========================= Calculating Prior, Posterior and Entropy ============================================================
        prr = prior(y_train)
        train_post = posterior(train_prob,prr)
        train_entropy = entropy(train_post)
        
        train_post_array.append(train_post)
        train_entropy_array.append(train_entropy)
    
        test_post = posterior(test_prob,prr)
        test_entropy = entropy(test_post)
    
        test_post_array.append(test_post)
        test_entropy_array.append(test_entropy)
        
        val_post = posterior(val_prob,prr)
        val_entropy = entropy(val_post)
    
        val_post_array.append(val_post)
        val_entropy_array.append(val_entropy)
        
        train_acc,c_mat = checkAccuracy(train_post,y_train)
        test_acc,c_mat = checkAccuracy(test_post,y_test)
        val_acc,c_mat = checkAccuracy(val_post,y_val)
        temp = pd.DataFrame([[file,train_acc,test_acc,val_acc]])        
        data_df = data_df.append(temp,ignore_index =True)
        
    return train_post_array,test_post_array,val_post_array,train_entropy_array,test_entropy_array,val_entropy_array,data_df
开发者ID:sahuvaibhav,项目名称:Capstone,代码行数:53,代码来源:test4.py


示例4: test_load_zero_based_auto

def test_load_zero_based_auto():
    data1 = "-1 1:1 2:2 3:3\n"
    data2 = "-1 0:0 1:1\n"

    f1 = BytesIO(data1)
    X, y = load_svmlight_file(f1, zero_based="auto")
    assert_equal(X.shape, (1, 3))

    f1 = BytesIO(data1)
    f2 = BytesIO(data2)
    X1, y1, X2, y2 = load_svmlight_files([f1, f2], zero_based="auto")
    assert_equal(X1.shape, (1, 4))
    assert_equal(X2.shape, (1, 4))
开发者ID:kkuunnddaann,项目名称:scikit-learn,代码行数:13,代码来源:test_svmlight_format.py


示例5: test_load_with_qid

def test_load_with_qid():
    # load svmfile with qid attribute
    data = """
    3 qid:1 1:0.53 2:0.12
    2 qid:1 1:0.13 2:0.1
    7 qid:2 1:0.87 2:0.12"""
    X, y = load_svmlight_file(BytesIO(data), query_id=False)
    assert_array_equal(y, [3, 2, 7])
    assert_array_equal(X.todense(), [[0.53, 0.12], [0.13, 0.1], [0.87, 0.12]])
    res1 = load_svmlight_files([BytesIO(data)], query_id=True)
    res2 = load_svmlight_file(BytesIO(data), query_id=True)
    for X, y, qid in (res1, res2):
        assert_array_equal(y, [3, 2, 7])
        assert_array_equal(qid, [1, 1, 2])
        assert_array_equal(X.todense(), [[0.53, 0.12], [0.13, 0.1], [0.87, 0.12]])
开发者ID:kkuunnddaann,项目名称:scikit-learn,代码行数:15,代码来源:test_svmlight_format.py


示例6: select_feature

def select_feature(trainfilename, testfilename):
    def returnCHI(X, y):
        return chivalue
    X_train, y_train, X_test, y_test = load_svmlight_files((trainfilename, testfilename), multilabel=True)
    
    featureNum = X_train.get_shape()[1]
    chivalue = chi2(X_train, y_train)

    step = featureNum / 20;
    for i in range(1, 21):
        selectNum = step * i
        print "selecting", selectNum, "features"
        selector = SelectKBest(chi2, k=selectNum)
        X_train_new = selector.fit_transform(X_train, y_train)
        X_test_new= selector.transform(X_test)
        dump_svmlight_file(X_train_new, y_train, trainfilename + '_' + str(selectNum), zero_based = False)
        dump_svmlight_file(X_test_new, y_test, testfilename + '_' + str(selectNum), zero_based = False)
开发者ID:junjiek,项目名称:cmu-exp,代码行数:17,代码来源:chi_square_max_multilabel.py


示例7: select_feature_multilabel

def select_feature_multilabel(trainfilename, testfilename):
    def returnIG(X, y):
        return randval, p
    X_train, y_train, X_test, y_test = load_svmlight_files((trainfilename, testfilename),  multilabel=True)

    featurenum = X_train.shape[1]
    randval = randomValues(X_train, y_train)
    p = np.ones((featurenum,1), int)
    p.reshape(featurenum,1)

    featureNum = X_train.get_shape()[1]
    step = featureNum / 20;
    for i in range(1, 21):
        selectNum = step * i
        print "selecting", selectNum, "features"
        selector = SelectKBest(returnIG, k=selectNum)
        X_train_new = selector.fit_transform(X_train, y_train)
        X_test_new = selector.transform(X_test)
        dump_svmlight_file(X_train_new, y_train, trainfilename + '_' + str(selectNum), zero_based = False)
        dump_svmlight_file(X_test_new, y_test, testfilename + '_' + str(selectNum), zero_based = False)
开发者ID:junjiek,项目名称:cmu-exp,代码行数:20,代码来源:random_selection_ml.py


示例8: select_feature

def select_feature(trainfilename, testfilename):
    def returnIG(X, y):
        return ig, p
    X_train, y_train, X_test, y_test = load_svmlight_files((trainfilename, testfilename))

    featurenum = X_train.shape[1]
    ig = information_gain(X_train, y_train)
    ig = ig.reshape(featurenum,)
    p = np.ones((1,featurenum), int)
    p.reshape(featurenum,1)

    featureNum = X_train.get_shape()[1]
    step = featureNum / 20;
    for i in range(1, 21):
        selectNum = step * i
        print "selecting", selectNum, "features"
        selector = SelectKBest(returnIG, k=selectNum)
        X_train_new = selector.fit_transform(X_train, y_train)
        X_test_new = selector.transform(X_test)
        sklearn.datasets.dump_svmlight_file(X_train_new, y_train, trainfilename + '_' + str(selectNum), zero_based = False)
        sklearn.datasets.dump_svmlight_file(X_test_new, y_test, testfilename + '_' + str(selectNum), zero_based = False)
开发者ID:junjiek,项目名称:cmu-exp,代码行数:21,代码来源:ig_selection.py


示例9: run_nblcr

def run_nblcr(train, test, outfn, grams='123', clf=LogisticRegression(class_weight="auto")):
    f_train = outfn + '-train.txt'
    f_test = outfn + '-test.txt'
    
    ngram = [int(i) for i in grams]
    ptrain = []
    ntrain = []
        
    for _, row in train.iterrows():
        if row['label'] == 1:
            ptrain.append(tokenize(row['text'], ngram))
        elif row['label'] == 0:
            ntrain.append(tokenize(row['text'], ngram))
        
    pos_counts = build_dict(ptrain, ngram)
    neg_counts = build_dict(ntrain, ngram)
        
    dic, r = compute_ratio(pos_counts, neg_counts)
        
    generate_svmlight_file(train, dic, r, ngram, f_train)
    generate_svmlight_file(test, dic, r, ngram, f_test)
    
    X_train, y_train, X_test, _ = load_svmlight_files((f_train, f_test))
    
    clf.fit(X_train, y_train)
    
    y_pred = clf.predict(X_test)
    try:
        y_prob = clf.predict_proba(X_test)
    except:
        # for svm with probability output
        clf.set_params(probability=True)
        y_prob_pos = clf.predict(X_test)
        y_prob_neg = np.ones(X_test.shape[0]) - y_prob_pos
        y_prob = np.column_stack((y_prob_neg, y_prob_pos))
    
    return y_pred, y_prob
    
    
    
开发者ID:bluedrone,项目名称:psb-adr,代码行数:37,代码来源:maxent_nblcr.py


示例10: load_amazon

def load_amazon(source_name, target_name, data_folder=None, verbose=False):
    """
    Load the amazon sentiment datasets from svmlight format files
    inputs:
        source_name : name of the source dataset
        target_name : name of the target dataset
        data_folder : path to the folder containing the files
    outputs:
        xs : training source data matrix
        ys : training source label vector
        xt : training target data matrix
        yt : training target label vector
        xtest : testing target data matrix
        ytest : testing target label vector
    """

    if data_folder is None:
        data_folder = 'data/'

    source_file = data_folder + source_name + '_train.svmlight'
    target_file = data_folder + target_name + '_train.svmlight'
    test_file = data_folder + target_name + '_test.svmlight'

    if verbose:
        print('source file:', source_file)
        print('target file:', target_file)
        print('test file:  ', test_file)

    xs, ys, xt, yt, xtest, ytest = load_svmlight_files([source_file, target_file, test_file])

    # Convert sparse matrices to numpy 2D array
    xs, xt, xtest = (np.array(X.todense()) for X in (xs, xt, xtest))

    # Convert {-1,1} labels to {0,1} labels
    ys, yt, ytest = (np.array((y + 1) / 2, dtype=int) for y in (ys, yt, ytest))

    return xs, ys, xt, yt, xtest, ytest
开发者ID:GRAAL-Research,项目名称:domain_adversarial_neural_network,代码行数:37,代码来源:experiments_amazon.py


示例11: LDA

    probs.append(score_i)
      
  return probs
  
parser = argparse.ArgumentParser()
#parser.add_argument( "train_file" )
parser.add_argument( "-p", "--predict", help = "if is to make predictions in a test file", default = None )
parser.add_argument( "-t", "--predict_file", help = "if is to make predictions in a test file", default = None )
parser.add_argument( "-c", "--cross_validation", help = "if have make cross-validation", default = None )

args = parser.parse_args()

classifier = LDA(n_components=2)
#classifier = RandomForestClassifier()

X_url, y, X_title, y_t, X_body, y_b, X_a, y_a = load_svmlight_files(("url_train.txt", "title_train.txt", "body_train.txt", "all_train.txt"))
X = {"url":X_url, "title": X_title, "body": X_body, "all": X_a}

if(args.predict):
  print "Predicting"
  T_url, t, T_title, y_t, T_body, y_b, T_a, y_a = load_svmlight_files(("url_test.txt", "title_test.txt", "body_test.txt", "all_test.txt"))
  T = {"url": T_url, "title": T_title, "body": T_body, "all": T_a}
  probs = predict(classifier, X, y, T, t)
  
  f = open("sub_31-08_01h15.txt","w")
  f.write("label\n")
  for p in probs:
    line = "%f\n" % p
    f.write(line)
  f.close()
elif(args.cross_validation):
开发者ID:rloliveirajr,项目名称:kaggle_stumbleupon,代码行数:31,代码来源:train.py


示例12: svm_skin

def svm_skin(X_train, y_train, X_test, y_test):
    """Learn the skin data sets with SVM with Linear kernel.

    X_*: Samples.
    y_*: labels.
    """
    print 'SVM w/ Linear kernel'
    clf = svm.LinearSVC()
    clf.fit(X_train, y_train)
    score = 100 * clf.score(X_test.toarray(), y_test)

    print 'SVM score: %.2f%%' % score
    return score


if __name__ == '__main__':
    # `data_size` is an integer which controls how big the data set is.
    # Use none for to use the whole dataset.
    # split_libsvm_dataset(path='skin.txt', data_size=None)

    # Load train and test samples (X) + labels (y).
    X_train, y_train, X_test, y_test = load_svmlight_files(
        ('skin-train.libsvm', 'skin-test.libsvm'))

    svm_skin(X_train, y_train, X_test, y_test)

    # iterations, scores = adaboost_skin(X_train, y_train, X_test, y_test)
    # graph = plot_success_per_size(iterations, scores)
    # show()
开发者ID:oryband,项目名称:homework,代码行数:29,代码来源:q5.py


示例13: test_load_invalid_file2

def test_load_invalid_file2():
    load_svmlight_files([datafile, invalidfile, datafile])
开发者ID:kkuunnddaann,项目名称:scikit-learn,代码行数:2,代码来源:test_svmlight_format.py


示例14: load_svmlight_files

    # remove axis spines
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.spines["bottom"].set_visible(False)
    ax.spines["left"].set_visible(False)

    plt.grid()
    plt.tight_layout
    plt.show()
    
    
    
os.chdir("F:\Analytics\ISB Study\Capstone\dir_data\dir_data")



X_train, y_train, X_test, y_test, X_val, y_val = load_svmlight_files(("train\\vision_cuboids_histogram.txt", "test\\vision_cuboids_histogram.txt","validation\\vision_cuboids_histogram.txt"))
np.unique(y_train)

sklearn_lda = LDA(n_components=30)
X_lda_sklearn = sklearn_lda.fit_transform(X_train.todense(), y_train)
plot_scikit_lda(X_lda_sklearn, title='LDA vision_cuboids_histogram')
# PCA
sklearn_pca = sklearnPCA(n_components=30)
X_pca = sklearn_pca.fit_transform(X_train.todense())
plot_pca(title = 'PCA vision_cuboids_histogram')
#
X_ldapca_sklearn = sklearn_pca.fit_transform(X_lda_sklearn)
plot_scikit_lda(X_ldapca_sklearn, title='LDA+PCA LDA vision_cuboids_histogram', mirror=(-1))
开发者ID:sahuvaibhav,项目名称:Capstone,代码行数:29,代码来源:pcalda.py


示例15: documentFrequency

from sklearn.datasets import load_svmlight_files


def documentFrequency(X, y):
    featurenum = X.shape[1]
    s = sum(X).toarray()
    p = np.ones((1, featurenum), int)
    return s.reshape(featurenum), p.reshape(featurenum, 1)


if __name__ == "__main__":
    if len(sys.argv) != 4:
        print "Usage: python threshold trainfilename testfilename"
        exit(1)
    trainfilename = sys.argv[2]
    testfilename = sys.argv[3]
    X_train, y_train, X_test, y_test = load_svmlight_files((trainfilename, testfilename))

    df = sum(X_train).toarray()[0]
    cnt = 0
    threshold = int(sys.argv[1])
    for i in range(0, len(df)):
        if df[i] >= threshold:
            cnt = cnt + 1
    selector = SelectKBest(documentFrequency, k=cnt)
    X_train = selector.fit_transform(X_train, y_train)
    X_test = selector.transform(X_test)
    sklearn.datasets.dump_svmlight_file(X_train, y_train, trainfilename + "_" + str(cnt), zero_based=False)
    sklearn.datasets.dump_svmlight_file(X_test, y_test, testfilename + "_" + str(cnt), zero_based=False)
    print cnt, "features selected"
开发者ID:junjiek,项目名称:cmu-exp,代码行数:30,代码来源:filterbyDF.py


示例16: print

===================================================

In this example we show how to handle LIBSVM file format.
"""

from sklearn.datasets import load_svmlight_files
import sklearn.metrics

import jubakit
from jubakit.classifier import Classifier, Dataset, Config

# Load LIBSVM files.
# Note that these example files are not included in this repository.
# You can fetch them from: https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass.html#news20
print("Loading LIBSVM files...")
(train_X, train_y, test_X, test_y) = load_svmlight_files(['news20', 'news20.t'])

# Create a Train Dataset.
print("Creating train dataset...")
train_ds = Dataset.from_matrix(train_X, train_y)

# Create a Test Dataset
print("Creating test dataset...")
test_ds = Dataset.from_matrix(test_X, test_y)

# Create a Classifier Service
classifier = Classifier.run(Config())

# Train the classifier.
print("Training...")
for (idx, _) in classifier.train(train_ds):
开发者ID:jubatus,项目名称:jubakit,代码行数:31,代码来源:classifier_libsvm.py


示例17: scale

    model = Word2Vec.load(model_name)
    
    print "Creating the w2v vectors...\n"

    X_train_w2v = scale(getAvgFeatureVecs(getCleanReviews(train), model, n_dim))
    X_test_w2v = scale(getAvgFeatureVecs(getCleanReviews(test), model, n_dim))
    
    print "Generating the svmlight-format files...\n"
    
    generate_svmlight_files(train, test, '123', '../data/nbsvm')
    
    print "Creating the nbsvm...\n"
    
    files = ("../data/nbsvm-train.txt", "../data/nbsvm-test.txt")
     
    X_train_nbsvm, _, X_test_nbsvm, _ = load_svmlight_files(files)
    
    print "Combing the bag of words and the w2v vectors...\n"
    
    X_train_bwv = hstack([X_train_bow, X_train_w2v])
    X_test_bwv = hstack([X_test_bow, X_test_w2v])

    
    print "Combing the bag of words and the d2v vectors...\n"
    
    X_train_bdv = hstack([X_train_bow, X_train_d2v])
    X_test_bdv = hstack([X_test_bow, X_test_d2v])

    
    print "Checking the dimension of training vectors"
    
开发者ID:StevenLOL,项目名称:kaggle-word2vec-movie-reviews,代码行数:30,代码来源:predict.py


示例18: range

        x = range(len(data))
        plt.xticks(x,data[data.columns[0]],rotation='vertical')
        for i in range(1,len(data.columns)):
            plt.plot(x,data[data.columns[i]])
        
    plt.legend(data.columns[1:], loc='upper left')
    plt.xlabel(data.columns[0])
    plt.ylabel('Accuracy')
    plt.title('Accuracy plot for ' + fileName)
    plt.show()

#=============================================== Main =================================================================

os.chdir("F:\Analytics\ISB Study\Capstone\dir_data\Capstone")
#os.chdir("C:\Users\Vaibhav\Desktop\dir_data\dir_data")
X_train, y_train, X_test, y_test, X_val, y_val = load_svmlight_files(("train\\vision_hist_motion_estimate.txt", "test\\vision_hist_motion_estimate.txt","validation\\vision_hist_motion_estimate.txt"))
y_train = y_train[y_train!=31]
y_test = y_test[y_test!=31]
y_val = y_val[y_val!=31]
#y_train = y_train[y_train <=2]
#y_test = y_test[y_test<=2]
#y_val = y_val[y_val<=2]
#================ First Level of Fusion - Audio ===============================
n_guass =5
nClass = 30 
train_post_array,test_post_array,val_post_array,train_entropy_array,test_entropy_array,val_entropy_array,data_df = pCoverX('audio',n_guass,tech = 'LinearSVC',C= 0.5,nClass=30)
data_df.columns = ['filename','train Accuracy','test Accuracy','validation Accuracy']
data_df.to_csv('Audio_preComb_Acc0801.csv',index=False)

audioComb1Acc = pd.DataFrame()
for alpha in [1,2,3,4,5]:
开发者ID:sahuvaibhav,项目名称:Capstone,代码行数:31,代码来源:EntropyFusion.py


示例19: PCA

import numpy as np
from scipy.sparse import csr_matrix
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.svm import LinearSVC
from mlclas.ensemble import BinaryRelevance, ClassifierChains, CalibratedLabelRanking, RandomKLabelsets, MLKNN
from mlclas.tree import MLDecisionTree
from mlclas.neural import BPMLL
from mlclas.svm import RankingSVM
from mlclas.stats import UniversalMetrics

files = ['datasets/scene_train', 'datasets/scene_test']

# load files
data = datasets.load_svmlight_files(files, multilabel=True)
train_data = data[0]
train_target = np.array(MultiLabelBinarizer().fit_transform(data[1]))
test_data = data[2]
test_target = data[3]

# feature extraction using PCA
feature_size = train_data.shape[1]
pca = PCA(n_components=(feature_size * 10) // 100)
train_data_trans = csr_matrix(pca.fit_transform(train_data.todense()))
test_data_trans = csr_matrix(pca.transform(test_data.todense()))

"""
    train and predict using any of following scripts:

    1.  result = BinaryRelevance(LinearSVC()).fit(train_data, train_target).predict(test_data)
开发者ID:markzy,项目名称:MLCLAS,代码行数:31,代码来源:ex1.py


示例20: textpCoverX

def textpCoverX():
    #os.chdir("F:\\Analytics\\ISB Study\\Capstone\\dir_data\\dir_data\\train")
    #path = "F:\\Analytics\\ISB Study\\Capstone\\dir_data\\dir_data\\"
    path = "C:\\Users\\Vaibhav\\Desktop\\dir_data\\dir_data\\"
    os.chdir(path+'train')
    
    data_df = pd.DataFrame()
    
    train_post_array = []
    test_post_array = []
    val_post_array = []
    train_entropy_array = []
    test_entropy_array = []
    val_entropy_array = []
    
    for file in glob.glob("text*.gz"):
        print(file)
        X_train, y_train, X_test, y_test,X_val, y_val = load_svmlight_files((gzip.open(path+"train\\"+file), gzip.open(path+"test\\"+file),gzip.open(path+"validation\\"+file)))    
            
        X_train = X_train[y_train!=31]
        X_test = X_test[y_test!=31]
        X_val = X_val[y_val!=31]
        y_train = y_train[y_train!=31]
        y_test = y_test[y_test!=31]
        y_val = y_val[y_val!=31]
        
        svmClf = Pipeline([ ('clf', SGDClassifier(loss='log', penalty='l1',alpha=1e-6, n_iter=10, random_state=88)),])
        svmClf = svmClf.fit(X_train, y_train)
        
        predicted_train = svmClf.predict(X_train)
        train_acc = np.mean(predicted_train == y_train)     
        print "Train Model Accuracy %f" % train_acc    
        train_post = pd.DataFrame(svmClf.predict_proba(X_train))
        
        predicted_test = svmClf.predict(X_test)
        test_acc = np.mean(predicted_test == y_test)        
        print "Test Model Accuracy %f" % test_acc
        test_post = pd.DataFrame(svmClf.predict_proba(X_test))    
        
        predicted_val = svmClf.predict(X_val)
        val_acc = np.mean(predicted_val == y_val)     
        print "Validation Model Accuracy %f" % val_acc
        val_post = pd.DataFrame(svmClf.predict_proba(X_val))    
        
        
        train_entropy = entropy(train_post)
        
        train_post_array.append(train_post)
        train_entropy_array.append(train_entropy)
    
        test_entropy = entropy(test_post)
    
        test_post_array.append(test_post)
        test_entropy_array.append(test_entropy)
        
        val_entropy = entropy(val_post)
    
        val_post_array.append(val_post)
        val_entropy_array.append(val_entropy)
        
        temp = pd.DataFrame([[file,train_acc,test_acc,val_acc]])        
        data_df = data_df.append(temp,ignore_index =True)
        
    return train_post_array,test_post_array,val_post_array,train_entropy_array,test_entropy_array,val_entropy_array,data_df
开发者ID:sahuvaibhav,项目名称:Capstone,代码行数:64,代码来源:EntropyFusion.py



注:本文中的sklearn.datasets.load_svmlight_files函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python datasets.make_blobs函数代码示例发布时间:2022-05-27
下一篇:
Python datasets.load_svmlight_file函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap