• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python cross_validation.train_test_split函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.cross_validation.train_test_split函数的典型用法代码示例。如果您正苦于以下问题:Python train_test_split函数的具体用法?Python train_test_split怎么用?Python train_test_split使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了train_test_split函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: splitDataset

def splitDataset(data, random_seed):
    '''
    Given a dataframe and a seed value, this function splits out the dataframe into a training set, a validation set, and a test set using the provided seed value for consistency. It uses a 60/20/20 split, but this could easily be parameterized and passed into the function. It returns a dictionary of dataframes with keys train, valid and test.
    '''
    #Get column headers
    col_headers = list(data.columns.values)
    feature_cols = copy.deepcopy(col_headers)
    feature_cols.remove('Sample')
    feature_cols.remove('Diagnosis')
    class_col = ['Diagnosis']
    
    #Train/test/validate split
    train, test = train_test_split(data, test_size=0.2, random_state=random_seed)
    train = pd.DataFrame(train)
    test = pd.DataFrame(test)
    train.columns = col_headers
    test.columns = col_headers
    train, validate = train_test_split(train, test_size=0.25, random_state=random_seed)
    train = pd.DataFrame(train)
    validate = pd.DataFrame(validate)
    train.columns = col_headers
    validate.columns = col_headers
    
    #Separate features and classes
    all_data = {'train': train, 'valid': validate, 'test': test}
    return extractFeatures(all_data)
开发者ID:bwelsh,项目名称:projectW4761,代码行数:26,代码来源:cgen_include.py


示例2: tribunalTrain

 def tribunalTrain(data,predict,tribunal,split=.2,stat=False,statLis=None):
     #data for testing the tribunal performance, not in actual judge training
     dat_train, dat_test, lab_train, lab_test = train_test_split(data,predict, test_size=split)
     verdict = []
      
     print 'Tribunal in session'
     
     for judge in tribunal:
         jdat_train, jdat_test, jlab_train, jlab_test = train_test_split(dat_train,lab_train, test_size=split)
         judge.fit(jdat_train, jlab_train)
         print 'judge trained'
 
     for d in dat_test:
         votes = []
         for judge in tribunal:
             v = judge.predict(d)
             votes.append(v)
         decision = stats.mode(votes,axis=None)
         verdict.append(decision[0])
     npVerdict = np.array(verdict)
     
     if stat == False:        
         svmDesc(npVerdict,lab_test,title='Tribunal Confusion Matrix')
     else:
         jac = jaccard_similarity_score(npVerdict,lab_test)
         statLis.append(jac)
开发者ID:am4002,项目名称:Hybrid-SOM-for-MEG,代码行数:26,代码来源:som_cluster_lib.py


示例3: processMethod3

def processMethod3(userid, featureCondition=1, classificationCondition=1, offsetFeatureOn=False):
    """ User-i Device-j hack in User-i Device-k Model: iphone6plus hack iphone5

    Returns
    -------
    float : error rate
    """
    # rawDataiPhone6Plus = loadUserData(userid, 1, datatype=1) # moment data
    # rawDataiPhone5     = loadUserData(userid, 2, datatype=1) # moment data

    # trainingData  = splitMomentDataByFeature(rawDataiPhone5, featureCondition=featureCondition)
    # trainingLabel = rawDataiPhone5[:, 4]

    # testData  = splitMomentDataByFeature(rawDataiPhone6Plus, featureCondition=featureCondition)
    # testLabel = rawDataiPhone6Plus[:, 4]

    iPhone6Plus = 1
    iPhone5     = 2
    trainingData, trainingLabel = splitMomentDataByFeatureAndLabel(userid, iPhone5, featureCondition, classificationCondition, offsetFeatureOn=offsetFeatureOn)
    testData, testLabel         = splitMomentDataByFeatureAndLabel(userid, iPhone6Plus, featureCondition, classificationCondition, offsetFeatureOn=offsetFeatureOn)

    # use same test size with method1
    trainingDataIP5, testDataIP5, trainingLabelIP5, testLabelIP5 = train_test_split(trainingData, trainingLabel, test_size=my_test_size, random_state=my_random_state)
    trainingDataIP6, testDataIP6, trainingLabelIP6, testLabelIP6 = train_test_split(    testData,     testLabel, test_size=my_test_size, random_state=my_random_state)

    return classify(trainingDataIP5, trainingLabelIP5, testDataIP6, testLabelIP6, kernel=my_kernel, max_iter=my_max_iteration)
开发者ID:changkun,项目名称:AugmentedTouch,代码行数:26,代码来源:moment.py


示例4: test_train_test_split

def test_train_test_split():
    X = np.arange(100).reshape((10, 10))
    X_s = coo_matrix(X)
    y = np.arange(10)

    # simple test
    split = cval.train_test_split(X, y, test_size=None, train_size=.5)
    X_train, X_test, y_train, y_test = split
    assert_equal(len(y_test), len(y_train))
    # test correspondence of X and y
    assert_array_equal(X_train[:, 0], y_train * 10)
    assert_array_equal(X_test[:, 0], y_test * 10)

    # conversion of lists to arrays (deprecated?)
    split = cval.train_test_split(X, X_s, y.tolist(), allow_lists=False)
    X_train, X_test, X_s_train, X_s_test, y_train, y_test = split
    assert_array_equal(X_train, X_s_train.toarray())
    assert_array_equal(X_test, X_s_test.toarray())

    # don't convert lists to anything else by default
    split = cval.train_test_split(X, X_s, y.tolist())
    X_train, X_test, X_s_train, X_s_test, y_train, y_test = split
    assert_true(isinstance(y_train, list))
    assert_true(isinstance(y_test, list))

    # allow nd-arrays
    X_4d = np.arange(10 * 5 * 3 * 2).reshape(10, 5, 3, 2)
    y_3d = np.arange(10 * 7 * 11).reshape(10, 7, 11)
    split = cval.train_test_split(X_4d, y_3d)
    assert_equal(split[0].shape, (7, 5, 3, 2))
    assert_equal(split[1].shape, (3, 5, 3, 2))
    assert_equal(split[2].shape, (7, 7, 11))
    assert_equal(split[3].shape, (3, 7, 11))
开发者ID:jjhelmus,项目名称:scikit-learn,代码行数:33,代码来源:test_cross_validation.py


示例5: tuning_l2_penalty

def tuning_l2_penalty(out_file, featurizers = None):
    # featurizers for blog/blog, twitter+wiki/blog and twitter+wiki/twitter+wiki respectively
    if not featurizers:
        featurizers = [feat4, feat5, feat4]
    # used to weigh L-2 penalty
    c_vals = [ v / 100.0 for v in range(50, 110, 10)]
    # data splits used
    b_train, b_test = train_test_split(blog_80, test_size = 0.1, random_state = 1)
    tw_train, tw_test = train_test_split(tw, test_size = 0.1, random_state = 1)
    # count sizes only once
    n_btest = float(len(b_test))
    n_b80 = float(len(blog_80))
    n_twtest = float(len(tw_test))

    for c_val in c_vals:
        print "Running l-2 tunning for C:%.2f" % c_val
        # Using split validation, as otherwise too slow
        make_model = lambda: Models.LogisticRegression(C = c_val)
        blog_errors = error_analyze(make_model, b_train, b_test, featurizers[0])
        twb_errors = error_analyze(make_model, tw, blog_80, featurizers[1])
        tw_errors = error_analyze(make_model, tw_train, tw_test, featurizers[2])

        blog_acc = 1 - len(blog_errors["error_indices"]) / n_btest
        twb_acc = 1 - len(twb_errors['error_indices']) / n_b80
        tw_acc = 1 - len(tw_errors['error_indices']) / n_twtest
        # write to file provided
        out_file.write("C=%f\n" % c_val)
        out_file.write("b=%f, twb=%f, tw=%f\n\n" % (blog_acc, twb_acc, tw_acc))
开发者ID:josepablocam,项目名称:snlp_project,代码行数:28,代码来源:maxent_experiments.py


示例6: get_best_k_model

def get_best_k_model(model, max_k, x, y):
    # Fit a model using a range of best-k values, 
    # returning the model that produces the best test score
    
    # Input
    # model: scikit-learn model
    # max_k: maximum k-value to iterate to (inclusive)
    # x: independent variables
    # y: dependent variable
    
    # Output
    # best_k: Number of dependent variables using to produce output
    # train_score: training score
    # test_score: test score
    # train_mse: training mse
    # test_mse: test mse       
    
    test_scores = []
    k_vals = []    
    
    k_limit = min(max_k, len(x.columns))
    for k_val in range(1, k_limit + 1):
        best_x = fs.SelectKBest(fs.chi2, k = k_val).fit_transform(x, y)
        x_train, x_test, y_train, y_test = cv.train_test_split(best_x, y, test_size = 0.2, random_state = 0)
        test_scores.append(model.fit(x_train, y_train).score(x_test, y_test))
        k_vals.append(k_val)

    best_k = k_vals[np.argmax(test_scores)]
    best_x = fs.SelectKBest(fs.chi2, k = best_k).fit_transform(x, y)
    x_train, x_test, y_train, y_test = cv.train_test_split(best_x, y, test_size = 0.2, random_state = 0)
       
    train_score, test_score, train_mse, test_mse = get_model_values(model, x_train, y_train, x_test, y_test)
    
    return best_k, train_score, test_score, train_mse, test_mse
开发者ID:kcavagnolo,项目名称:ml_fun,代码行数:34,代码来源:linkedin_salary.py


示例7: load_dataset

def load_dataset(path_id="", folder="", use_float_32=False, test_ratio=0.3, valid_ratio=0.1):	
#def load_dataset(path_id="", use_float_32=False, test_ratio=0.2, valid_ratio=0.1):
	# reading full dataset
	features_path = "data/%s/features%s.npy"%(folder, path_id)
	labels_path = "data/%s/labels%s.npy"%(folder, path_id)
	

	features = np.load(features_path)
	if use_float_32:
		features = features.astype(np.float32)
	labels = np.load(labels_path)
	
	# splitting data
	train_set_x, test_set_x, train_set_y, test_set_y = train_test_split(features, labels, test_size=test_ratio, random_state=89677)
	#train_set_x = features[:2500]
	#train_set_y = labels[:2500]
	
	#test_set_x = features[2500:]
	#test_set_y = labels[2500:]
	test_set_x = theano.shared(value=test_set_x, name='test_set_x', borrow=True)
	test_set_y = theano.shared(value=np.array(test_set_y), name='test_set_y', borrow=True)
	
	# split train set into validation set
	train_set_x, valid_set_x, train_set_y, valid_set_y = train_test_split(train_set_x, train_set_y, test_size=valid_ratio, random_state=89677)
	
	print train_set_x.shape, valid_set_x.shape, test_set_x.get_value(borrow=True).shape
	
	train_set_x = theano.shared(value=train_set_x, name='train_set_x', borrow=True)
	train_set_y = theano.shared(value=np.array(train_set_y), name='train_set_y', borrow=True)
	
	valid_set_x = theano.shared(value=valid_set_x, name='valid_set_x', borrow=True)
	valid_set_y = theano.shared(value=np.array(valid_set_y), name='valid_set_y', borrow=True)
	
	return ((train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y))	
开发者ID:Ahmed--Mohsen,项目名称:authorship,代码行数:34,代码来源:data_handler.py


示例8: getImages

def getImages():
   digitsImagesNormalized = getImagesFromDir(digitsPath)
   lettersImagesNormalized = getImagesFromDir(lettersPath)

   digitsImagesNormalized = [skpre.scale(digitsImagesNormalized[0]), digitsImagesNormalized[1]]
   lettersImagesNormalized = [skpre.scale(lettersImagesNormalized[0]), lettersImagesNormalized[1]]

   allImages = []
   for i in digitsImagesNormalized[0]:
      allImages.append(i)

   for i in lettersImagesNormalized[0]:
      allImages.append(i)

   # Divide em teste e treino.
   # Calcula PCA - Reducao de dimensionalidade dos dados. :)
   pca = computePCA(allImages)
   digitstransformedData = pca.transform(digitsImagesNormalized[0])
   letterstransformedData = pca.transform(lettersImagesNormalized[0])

   dtrainDataTF, dtestDataTF, dclassesTrainTF, dclassesTestTF = train_test_split(digitstransformedData, digitsImagesNormalized[1], train_size=0.65)

   ltrainDataTF, ltestDataTF, lclassesTrainTF, lclassesTestTF = train_test_split(letterstransformedData, lettersImagesNormalized[1], train_size=0.65)
   
   return [[dtrainDataTF, dclassesTrainTF], [dtestDataTF, dclassesTestTF]], [[ltrainDataTF, lclassesTrainTF], [ltestDataTF, lclassesTestTF]]
开发者ID:pedrokalmeida,项目名称:mc861,代码行数:25,代码来源:charDetect.py


示例9: train_lsvr

def train_lsvr():
    train_sys = np.load('fc2_train_sys.npy')
    test_sys =  np.load('fc2_test_sys.npy')
    # from sklearn.preprocessing import StandardScaler
    # sle = StandardScaler()
    # train_sys = sle.fit_transform(train_sys)
    # test_sys  = sle.fit_transform(test_sys)
    
    y = np.load('data/y_train.npy')
    from sklearn import svm
    #from sklearn.metrics import mean_squared_error
    from sklearn.ensemble import RandomForestRegressor
    lsvr = svm.SVR(C=0.1) # 0.045
    #lsvr = RandomForestRegressor(n_estimators = 100)
    train_sys, val_sys, train_y_sys, val_y_sys = train_test_split(train_sys, y[:,0])
    lsvr.fit(train_sys, train_y_sys)
    #print mean_squared_error(val_y_sys, l
    pred_systole = lsvr.predict(val_sys)
    cdf_val = real_to_cdf(val_y_sys)
    cdf_pred_systole = real_to_cdf(pred_systole)
    crps_val = crps(cdf_val, cdf_pred_systole)
    print('CRPS(val sys) = {0}'.format(crps_val))

    train_dia = np.load('fc2_train_dia.npy')
    test_dia  = np.load('fc2_test_dia.npy')

    train_dia, val_dia, train_y_dia, val_y_dia = train_test_split(train_dia, y[:,1])
    lsvr.fit(train_dia, train_y_dia)

    pred_dia = lsvr.predict(val_dia)
    cdf_val_dia = real_to_cdf(val_y_dia)
    cdf_pred_dia = real_to_cdf(pred_dia)
    crps_val = crps(cdf_val_dia, cdf_pred_dia)
    print('CRPS(val dia) = {0}'.format(crps_val))
开发者ID:ouceduxzk,项目名称:kaggle-ndsb2,代码行数:34,代码来源:extract.py


示例10: split_dataset

 def split_dataset(index, random_state, test_ratio=0.2, valid_ratio=0.2):
     index = list(index)
     ix_train, ix_test = train_test_split(index, test_size=test_ratio,
         random_state=random_state)
     ix_train, ix_valid = train_test_split(ix_train,
         test_size=valid_ratio / (1 - test_ratio), random_state=random_state)
     return {'train': ix_train, 'valid': ix_valid, 'test': ix_test}
开发者ID:bzamecnik,项目名称:ml-playground,代码行数:7,代码来源:prepare_training_data.py


示例11: create_sets

def create_sets(img_dir, train_set_proportion=.6, test_set_proportion=.2, val_set_proportion=.2):
    '''Split a list of image files up into training, testing and validation sets.'''

    if os.path.isfile(img_dir+ 'imgs.list'):
        baseimgfilenames = pickle.load(open(img_dir+'imgs.list','rb'))
    else:
        imgfilenames = glob.glob(img_dir + '*.jpg')
        baseimgfilenames = [os.path.basename(f) for f in imgfilenames]

    train,val = train_test_split(np.arange(len(baseimgfilenames)),
                                       train_size=train_set_proportion+test_set_proportion,
                                       test_size=val_set_proportion,
                                       random_state=1)

    train_test_prop = train_set_proportion + test_set_proportion
    train,test = train_test_split(train,
                                  train_size=train_set_proportion/train_test_prop,
                                  test_size=test_set_proportion/train_test_prop,
                                  random_state=1)

    trainfiles = [baseimgfilenames[i] for i in train]
    valfiles = [baseimgfilenames[i] for i in val]
    testfiles = [baseimgfilenames[i] for i in test]

    return trainfiles, valfiles,testfiles
开发者ID:emwebaze,项目名称:microscopy-object-detection,代码行数:25,代码来源:createdb.py


示例12: create_sets

def create_sets(img_dir, train_set_proportion=.6, test_set_proportion=.2, val_set_proportion=.2):
    '''Split a list of image files up into training, testing and validation sets.'''

    imgfilenames = glob.glob(img_dir + '*.jpg')
    baseimgfilenames = [os.path.basename(f) for f in imgfilenames]

    if train_set_proportion + test_set_proportion < 1:
        train,val = train_test_split(np.arange(len(baseimgfilenames)),
                                           train_size=train_set_proportion+test_set_proportion,
                                           test_size=val_set_proportion,
                                           random_state=1) 
    else:
        train = np.arange(len(baseimgfilenames))
        val = []

    train_test_prop = train_set_proportion + test_set_proportion
    train,test = train_test_split(train,
                                  train_size=train_set_proportion/train_test_prop,
                                  test_size=test_set_proportion/train_test_prop,
                                  random_state=1)

    trainfiles = [baseimgfilenames[i] for i in train]
    testfiles = [baseimgfilenames[i] for i in test]
    valfiles = [baseimgfilenames[i] for i in val]

    return trainfiles, valfiles,testfiles
开发者ID:jqug,项目名称:microscopy-object-detection,代码行数:26,代码来源:readdata.py


示例13: main

def main(unused_argv):
  iris = datasets.load_iris()
  x_train, x_test, y_train, y_test = train_test_split(
      iris.data, iris.target, test_size=0.2, random_state=42)

  x_train, x_val, y_train, y_val = train_test_split(
      x_train, y_train, test_size=0.2, random_state=42)
  val_monitor = learn.monitors.ValidationMonitor(
      x_val, y_val, early_stopping_rounds=200)

  # classifier with early stopping on training data
  classifier1 = learn.DNNClassifier(
      hidden_units=[10, 20, 10], n_classes=3, model_dir='/tmp/iris_model/')
  classifier1.fit(x=x_train, y=y_train, steps=2000)
  score1 = metrics.accuracy_score(y_test, classifier1.predict(x_test))

  # classifier with early stopping on validation data, save frequently for
  # monitor to pick up new checkpoints.
  classifier2 = learn.DNNClassifier(
      hidden_units=[10, 20, 10], n_classes=3, model_dir='/tmp/iris_model_val/',
      config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1))
  classifier2.fit(x=x_train, y=y_train, steps=2000, monitors=[val_monitor])
  score2 = metrics.accuracy_score(y_test, classifier2.predict(x_test))

  # In many applications, the score is improved by using early stopping
  print('score1: ', score1)
  print('score2: ', score2)
  print('score2 > score1: ', score2 > score1)
开发者ID:AadityaJ,项目名称:tensorflow,代码行数:28,代码来源:iris_val_based_early_stopping.py


示例14: split_data

def split_data(x_train, y_train):
    """
    Given training data cropped from the original dataset by create_training_set.py, split this data up into training, cross-validation, and test data.

    INPUTS:
    x_train = Features cropped from original dataset
    y_train = Labels manually inputed from x_train

    OUTPUTS:
    new_x_train = New training data randomly selected from x_train
    new_x_crossval = Cross-validation samples from x_train
    new_x_test = Test samples from x_train
    new_y_train = Training labels
    new_y_crossval = Cross-validation labels
    new_y_test = Testing labels
    """
    new_x_train, new_x_test, new_y_train, new_y_test \
     = cross_val.train_test_split(x_train,
                                  y_train,
                                  test_size=0.3,
                                  random_state=53)
    new_x_crossval, new_x_test, new_y_crossval, new_y_test \
     = cross_val.train_test_split(new_x_test,
                                  new_y_test,
                                  test_size=0.5,
                                  random_state=41)
    return new_x_train, new_x_crossval, new_x_test, new_y_train, \
            new_y_crossval, new_y_test
开发者ID:EthanRosenthal,项目名称:stm-routines,代码行数:28,代码来源:train_model.py


示例15: cook

def cook():
    x, y, weights = load_data()
    n_components = 200
    svd = TruncatedSVD(n_components, random_state=42)
    x_unweighted = svd.fit_transform(x)
    x_weighted = svd.fit_transform(weighted(x, weights))

    for i in range(9):
        frac = 1 - (i * 0.01 + 0.01)
        print frac

        x_train, x_test, y_train, y_test = train_test_split(x_unweighted, y, test_size=frac)
        classifier = AdaBoostClassifier(n_estimators=100)
        classifier.fit(x_train, y_train)
        print "Unweighted: ", classifier.score(x_test, y_test)

        x_train, x_test, y_train, y_test = train_test_split(x_weighted, y, test_size=frac)
        classifier = AdaBoostClassifier(n_estimators=100)
        classifier.fit(x_train, y_train)
        print "Weighted: ", classifier.score(x_test, y_test)

        print '--------------------------'


    '''
开发者ID:wangchr,项目名称:eMeriL,代码行数:25,代码来源:cook.py


示例16: main

def main():
    X, Y, encoder, scale = load_train_data('train.csv')
    estimators = 500
    X_train, X_valid, Y_train, Y_valid = train_test_split(X, Y, test_size=0.2, random_state=0)
    X_train_real, X_test_real, Y_train_real, Y_test_real = train_test_split(X_train, Y_train, test_size=0.2, random_state=42)
    log.info('Loaded training file')
    X_test, _ = load_csv_file('test.csv', cut_end=False)
    log.info('Loaded test file')

    #Classifier Setup
    tree_clf = ExtraTreesClassifier(n_estimators=estimators, n_jobs=-1,
                                    random_state=42, max_depth=55, min_samples_split=1)

    clf = make_pipeline(TfidfTransformer(), DenseTransformer(), tree_clf)
    log.info('Fitting GradientBoost')
    clf.fit(X_train_real, Y_train_real)
    clf_probs = clf.predict_proba(X_test_real)
    score = log_loss(Y_test_real, clf_probs)
    log.info('Log Loss score un-trained = %f' % score)
    # Calibrate Classifier using ground truth in X,Y_valid
    sig_clf = CalibratedClassifierCV(clf, method="isotonic", cv="prefit")
    log.info('Fitting CalibratedClassifierCV')
    sig_clf.fit(X_valid, Y_valid)
    sig_clf_probs = sig_clf.predict_proba(X_test_real)
    sig_score = log_loss(Y_test_real, sig_clf_probs)
    log.info('Log loss score trained = %f' % sig_score)

    # Ok lets predict the test data with our funky new classifier
    sig_submission_probs = sig_clf.predict_proba(X_test)

    write_out_submission(sig_submission_probs, 'submission.csv')
开发者ID:Almclean,项目名称:otto-group,代码行数:31,代码来源:main.py


示例17: load_data_sets

def load_data_sets(input_data, labels, split_only=True, valid_set=False):
    class DataSets(object):
        pass
    data_sets = DataSets()

    print("\nSplitting to Train & Test sets for Finetuning")

    if valid_set:
        train_examples, test_examples, train_labels, test_labels = \
                        train_test_split(input_data, labels, test_size=0.2)
        train_examples, validation_examples, train_labels, validation_labels = \
                        train_test_split(train_examples, train_labels, test_size=0.05)
        data_sets.validation = DataSet(validation_examples, validation_labels)
    else:
        train_examples, test_examples, train_labels, test_labels = \
                        train_test_split(input_data, labels, test_size=0.3)
        data_sets.validation = None

#     validation_examples = input_data[:VALIDATION_SIZE]
#     train_examples = input_data[VALIDATION_SIZE:]

    data_sets.train = DataSet(train_examples, train_labels)
    data_sets.test = DataSet(test_examples, test_labels)
    
    if not split_only:
        data_sets.all = DataSet(input_data, labels)
    
    return data_sets
开发者ID:glrs,项目名称:StackedDAE,代码行数:28,代码来源:utils.py


示例18: iris_demo

def iris_demo():
    # load the iris dataset
    iris = load_iris()
    X = iris['data']
    y_labels = iris['target']

    lb = LabelBinarizer()
    y = lb.fit_transform(y_labels)

    # split into training, validation and test datasets
    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        test_size=0.25,
                                                        random_state=RANDOM_STATE)

    X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train,
                                                          test_size=0.25,
                                                          random_state=RANDOM_STATE)

    # train the neural net
    print("Building logistic regression classifier to classify iris data")
    nn = pynn.ArtificialNeuralNet([X_train.shape[1], 20, y_train.shape[1]])
    print("Training")
    nn.fit(X_train, y_train, X_valid, y_valid,
           batch_size=20, n_epochs=20, learning_rate=0.05,
           random_state=RANDOM_STATE)

    y_pred = nn.predict(X_test)

    print("iris accuracy: {}%".format(
        accuracy_score(y_test.argmax(1), y_pred.argmax(1)) * 100))
开发者ID:benjamin-croker,项目名称:pynn,代码行数:30,代码来源:example.py


示例19: load_data

def load_data():
    '''
    Loads the data, turns into word2vec representation, and splits
    into training, validation, and testing sets with ratio 8:1:1
    '''
    trainingDataFile = '../data/traindata.txt'
    trainingPosFile = '../data/pos_Embedding.txt'
    trainingLabelFile = '../data/trainlabel.txt'
    wordToVecDictFile = '../data/glove/glove.6B.50d.txt'
    print('Vectorizing the features and labels...')
    start_time = timeit.default_timer()
    X,Y = word2vec.createVecFeatsLabels(trainingDataFile,trainingPosFile,trainingLabelFile,wordToVecDictFile,window_size)
    end_time = timeit.default_timer()
    print('Pickling the vectorization files')
    # pickling X-file
    clean_data = open('../data/clean_data.pkl','wb')
    pickle.dump(X, clean_data)
    clean_data.close()
    # pickling the labels-file
    clean_label = open('../data/clean_label.pkl', 'wb')
    pickle.dump(Y, clean_label)
    clean_label.close()
    print(('The vectorization ran for %.2fm' % ((end_time - start_time) / 60.)))
    print('Splitting into training, validation, and testing sets ...')
    X_train, X_rest, y_train, y_rest = train_test_split(X, Y, test_size=0.2, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_rest,y_rest, test_size=0.5, random_state=42)
    return X_train, X_val, X_test, y_train, y_val, y_test
开发者ID:546-NN2project,项目名称:baseline,代码行数:27,代码来源:runWithPos.py


示例20: conv_demo

def conv_demo():
    # load the digits dataset
    digits = load_digits()
    X = digits['data']
    y_labels = digits['target']

    lb = LabelBinarizer()
    y = lb.fit_transform(y_labels)

    # split into training, validation and test datasets
    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        test_size=0.25,
                                                        random_state=RANDOM_STATE)

    X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train,
                                                          test_size=0.25,
                                                          random_state=RANDOM_STATE)

    # train the neural net
    print("Building neural net to classify digits")
    conv_net = pynn.ConvNet(digits['images'][0].shape, 1, y.shape[1],
                            random_state=RANDOM_STATE)
    print("Training")
    conv_net.fit(X_train, y_train, X_valid, y_valid,
                 batch_size=20, n_epochs=20, learning_rate=0.05)

    y_pred = conv_net.predict(X_test)

    print("digits accuracy: {}%".format(
        accuracy_score(y_test.argmax(1), y_pred.argmax(1)) * 100))
开发者ID:benjamin-croker,项目名称:pynn,代码行数:30,代码来源:example.py



注:本文中的sklearn.cross_validation.train_test_split函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python datasets.dump_svmlight_file函数代码示例发布时间:2022-05-27
下一篇:
Python cross_validation.permutation_test_score函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap