本文整理汇总了Python中sklearn.cross_validation.train_test_split函数的典型用法代码示例。如果您正苦于以下问题:Python train_test_split函数的具体用法?Python train_test_split怎么用?Python train_test_split使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了train_test_split函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: splitDataset
def splitDataset(data, random_seed):
'''
Given a dataframe and a seed value, this function splits out the dataframe into a training set, a validation set, and a test set using the provided seed value for consistency. It uses a 60/20/20 split, but this could easily be parameterized and passed into the function. It returns a dictionary of dataframes with keys train, valid and test.
'''
#Get column headers
col_headers = list(data.columns.values)
feature_cols = copy.deepcopy(col_headers)
feature_cols.remove('Sample')
feature_cols.remove('Diagnosis')
class_col = ['Diagnosis']
#Train/test/validate split
train, test = train_test_split(data, test_size=0.2, random_state=random_seed)
train = pd.DataFrame(train)
test = pd.DataFrame(test)
train.columns = col_headers
test.columns = col_headers
train, validate = train_test_split(train, test_size=0.25, random_state=random_seed)
train = pd.DataFrame(train)
validate = pd.DataFrame(validate)
train.columns = col_headers
validate.columns = col_headers
#Separate features and classes
all_data = {'train': train, 'valid': validate, 'test': test}
return extractFeatures(all_data)
开发者ID:bwelsh,项目名称:projectW4761,代码行数:26,代码来源:cgen_include.py
示例2: tribunalTrain
def tribunalTrain(data,predict,tribunal,split=.2,stat=False,statLis=None):
#data for testing the tribunal performance, not in actual judge training
dat_train, dat_test, lab_train, lab_test = train_test_split(data,predict, test_size=split)
verdict = []
print 'Tribunal in session'
for judge in tribunal:
jdat_train, jdat_test, jlab_train, jlab_test = train_test_split(dat_train,lab_train, test_size=split)
judge.fit(jdat_train, jlab_train)
print 'judge trained'
for d in dat_test:
votes = []
for judge in tribunal:
v = judge.predict(d)
votes.append(v)
decision = stats.mode(votes,axis=None)
verdict.append(decision[0])
npVerdict = np.array(verdict)
if stat == False:
svmDesc(npVerdict,lab_test,title='Tribunal Confusion Matrix')
else:
jac = jaccard_similarity_score(npVerdict,lab_test)
statLis.append(jac)
开发者ID:am4002,项目名称:Hybrid-SOM-for-MEG,代码行数:26,代码来源:som_cluster_lib.py
示例3: processMethod3
def processMethod3(userid, featureCondition=1, classificationCondition=1, offsetFeatureOn=False):
""" User-i Device-j hack in User-i Device-k Model: iphone6plus hack iphone5
Returns
-------
float : error rate
"""
# rawDataiPhone6Plus = loadUserData(userid, 1, datatype=1) # moment data
# rawDataiPhone5 = loadUserData(userid, 2, datatype=1) # moment data
# trainingData = splitMomentDataByFeature(rawDataiPhone5, featureCondition=featureCondition)
# trainingLabel = rawDataiPhone5[:, 4]
# testData = splitMomentDataByFeature(rawDataiPhone6Plus, featureCondition=featureCondition)
# testLabel = rawDataiPhone6Plus[:, 4]
iPhone6Plus = 1
iPhone5 = 2
trainingData, trainingLabel = splitMomentDataByFeatureAndLabel(userid, iPhone5, featureCondition, classificationCondition, offsetFeatureOn=offsetFeatureOn)
testData, testLabel = splitMomentDataByFeatureAndLabel(userid, iPhone6Plus, featureCondition, classificationCondition, offsetFeatureOn=offsetFeatureOn)
# use same test size with method1
trainingDataIP5, testDataIP5, trainingLabelIP5, testLabelIP5 = train_test_split(trainingData, trainingLabel, test_size=my_test_size, random_state=my_random_state)
trainingDataIP6, testDataIP6, trainingLabelIP6, testLabelIP6 = train_test_split( testData, testLabel, test_size=my_test_size, random_state=my_random_state)
return classify(trainingDataIP5, trainingLabelIP5, testDataIP6, testLabelIP6, kernel=my_kernel, max_iter=my_max_iteration)
开发者ID:changkun,项目名称:AugmentedTouch,代码行数:26,代码来源:moment.py
示例4: test_train_test_split
def test_train_test_split():
X = np.arange(100).reshape((10, 10))
X_s = coo_matrix(X)
y = np.arange(10)
# simple test
split = cval.train_test_split(X, y, test_size=None, train_size=.5)
X_train, X_test, y_train, y_test = split
assert_equal(len(y_test), len(y_train))
# test correspondence of X and y
assert_array_equal(X_train[:, 0], y_train * 10)
assert_array_equal(X_test[:, 0], y_test * 10)
# conversion of lists to arrays (deprecated?)
split = cval.train_test_split(X, X_s, y.tolist(), allow_lists=False)
X_train, X_test, X_s_train, X_s_test, y_train, y_test = split
assert_array_equal(X_train, X_s_train.toarray())
assert_array_equal(X_test, X_s_test.toarray())
# don't convert lists to anything else by default
split = cval.train_test_split(X, X_s, y.tolist())
X_train, X_test, X_s_train, X_s_test, y_train, y_test = split
assert_true(isinstance(y_train, list))
assert_true(isinstance(y_test, list))
# allow nd-arrays
X_4d = np.arange(10 * 5 * 3 * 2).reshape(10, 5, 3, 2)
y_3d = np.arange(10 * 7 * 11).reshape(10, 7, 11)
split = cval.train_test_split(X_4d, y_3d)
assert_equal(split[0].shape, (7, 5, 3, 2))
assert_equal(split[1].shape, (3, 5, 3, 2))
assert_equal(split[2].shape, (7, 7, 11))
assert_equal(split[3].shape, (3, 7, 11))
开发者ID:jjhelmus,项目名称:scikit-learn,代码行数:33,代码来源:test_cross_validation.py
示例5: tuning_l2_penalty
def tuning_l2_penalty(out_file, featurizers = None):
# featurizers for blog/blog, twitter+wiki/blog and twitter+wiki/twitter+wiki respectively
if not featurizers:
featurizers = [feat4, feat5, feat4]
# used to weigh L-2 penalty
c_vals = [ v / 100.0 for v in range(50, 110, 10)]
# data splits used
b_train, b_test = train_test_split(blog_80, test_size = 0.1, random_state = 1)
tw_train, tw_test = train_test_split(tw, test_size = 0.1, random_state = 1)
# count sizes only once
n_btest = float(len(b_test))
n_b80 = float(len(blog_80))
n_twtest = float(len(tw_test))
for c_val in c_vals:
print "Running l-2 tunning for C:%.2f" % c_val
# Using split validation, as otherwise too slow
make_model = lambda: Models.LogisticRegression(C = c_val)
blog_errors = error_analyze(make_model, b_train, b_test, featurizers[0])
twb_errors = error_analyze(make_model, tw, blog_80, featurizers[1])
tw_errors = error_analyze(make_model, tw_train, tw_test, featurizers[2])
blog_acc = 1 - len(blog_errors["error_indices"]) / n_btest
twb_acc = 1 - len(twb_errors['error_indices']) / n_b80
tw_acc = 1 - len(tw_errors['error_indices']) / n_twtest
# write to file provided
out_file.write("C=%f\n" % c_val)
out_file.write("b=%f, twb=%f, tw=%f\n\n" % (blog_acc, twb_acc, tw_acc))
开发者ID:josepablocam,项目名称:snlp_project,代码行数:28,代码来源:maxent_experiments.py
示例6: get_best_k_model
def get_best_k_model(model, max_k, x, y):
# Fit a model using a range of best-k values,
# returning the model that produces the best test score
# Input
# model: scikit-learn model
# max_k: maximum k-value to iterate to (inclusive)
# x: independent variables
# y: dependent variable
# Output
# best_k: Number of dependent variables using to produce output
# train_score: training score
# test_score: test score
# train_mse: training mse
# test_mse: test mse
test_scores = []
k_vals = []
k_limit = min(max_k, len(x.columns))
for k_val in range(1, k_limit + 1):
best_x = fs.SelectKBest(fs.chi2, k = k_val).fit_transform(x, y)
x_train, x_test, y_train, y_test = cv.train_test_split(best_x, y, test_size = 0.2, random_state = 0)
test_scores.append(model.fit(x_train, y_train).score(x_test, y_test))
k_vals.append(k_val)
best_k = k_vals[np.argmax(test_scores)]
best_x = fs.SelectKBest(fs.chi2, k = best_k).fit_transform(x, y)
x_train, x_test, y_train, y_test = cv.train_test_split(best_x, y, test_size = 0.2, random_state = 0)
train_score, test_score, train_mse, test_mse = get_model_values(model, x_train, y_train, x_test, y_test)
return best_k, train_score, test_score, train_mse, test_mse
开发者ID:kcavagnolo,项目名称:ml_fun,代码行数:34,代码来源:linkedin_salary.py
示例7: load_dataset
def load_dataset(path_id="", folder="", use_float_32=False, test_ratio=0.3, valid_ratio=0.1):
#def load_dataset(path_id="", use_float_32=False, test_ratio=0.2, valid_ratio=0.1):
# reading full dataset
features_path = "data/%s/features%s.npy"%(folder, path_id)
labels_path = "data/%s/labels%s.npy"%(folder, path_id)
features = np.load(features_path)
if use_float_32:
features = features.astype(np.float32)
labels = np.load(labels_path)
# splitting data
train_set_x, test_set_x, train_set_y, test_set_y = train_test_split(features, labels, test_size=test_ratio, random_state=89677)
#train_set_x = features[:2500]
#train_set_y = labels[:2500]
#test_set_x = features[2500:]
#test_set_y = labels[2500:]
test_set_x = theano.shared(value=test_set_x, name='test_set_x', borrow=True)
test_set_y = theano.shared(value=np.array(test_set_y), name='test_set_y', borrow=True)
# split train set into validation set
train_set_x, valid_set_x, train_set_y, valid_set_y = train_test_split(train_set_x, train_set_y, test_size=valid_ratio, random_state=89677)
print train_set_x.shape, valid_set_x.shape, test_set_x.get_value(borrow=True).shape
train_set_x = theano.shared(value=train_set_x, name='train_set_x', borrow=True)
train_set_y = theano.shared(value=np.array(train_set_y), name='train_set_y', borrow=True)
valid_set_x = theano.shared(value=valid_set_x, name='valid_set_x', borrow=True)
valid_set_y = theano.shared(value=np.array(valid_set_y), name='valid_set_y', borrow=True)
return ((train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y))
开发者ID:Ahmed--Mohsen,项目名称:authorship,代码行数:34,代码来源:data_handler.py
示例8: getImages
def getImages():
digitsImagesNormalized = getImagesFromDir(digitsPath)
lettersImagesNormalized = getImagesFromDir(lettersPath)
digitsImagesNormalized = [skpre.scale(digitsImagesNormalized[0]), digitsImagesNormalized[1]]
lettersImagesNormalized = [skpre.scale(lettersImagesNormalized[0]), lettersImagesNormalized[1]]
allImages = []
for i in digitsImagesNormalized[0]:
allImages.append(i)
for i in lettersImagesNormalized[0]:
allImages.append(i)
# Divide em teste e treino.
# Calcula PCA - Reducao de dimensionalidade dos dados. :)
pca = computePCA(allImages)
digitstransformedData = pca.transform(digitsImagesNormalized[0])
letterstransformedData = pca.transform(lettersImagesNormalized[0])
dtrainDataTF, dtestDataTF, dclassesTrainTF, dclassesTestTF = train_test_split(digitstransformedData, digitsImagesNormalized[1], train_size=0.65)
ltrainDataTF, ltestDataTF, lclassesTrainTF, lclassesTestTF = train_test_split(letterstransformedData, lettersImagesNormalized[1], train_size=0.65)
return [[dtrainDataTF, dclassesTrainTF], [dtestDataTF, dclassesTestTF]], [[ltrainDataTF, lclassesTrainTF], [ltestDataTF, lclassesTestTF]]
开发者ID:pedrokalmeida,项目名称:mc861,代码行数:25,代码来源:charDetect.py
示例9: train_lsvr
def train_lsvr():
train_sys = np.load('fc2_train_sys.npy')
test_sys = np.load('fc2_test_sys.npy')
# from sklearn.preprocessing import StandardScaler
# sle = StandardScaler()
# train_sys = sle.fit_transform(train_sys)
# test_sys = sle.fit_transform(test_sys)
y = np.load('data/y_train.npy')
from sklearn import svm
#from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
lsvr = svm.SVR(C=0.1) # 0.045
#lsvr = RandomForestRegressor(n_estimators = 100)
train_sys, val_sys, train_y_sys, val_y_sys = train_test_split(train_sys, y[:,0])
lsvr.fit(train_sys, train_y_sys)
#print mean_squared_error(val_y_sys, l
pred_systole = lsvr.predict(val_sys)
cdf_val = real_to_cdf(val_y_sys)
cdf_pred_systole = real_to_cdf(pred_systole)
crps_val = crps(cdf_val, cdf_pred_systole)
print('CRPS(val sys) = {0}'.format(crps_val))
train_dia = np.load('fc2_train_dia.npy')
test_dia = np.load('fc2_test_dia.npy')
train_dia, val_dia, train_y_dia, val_y_dia = train_test_split(train_dia, y[:,1])
lsvr.fit(train_dia, train_y_dia)
pred_dia = lsvr.predict(val_dia)
cdf_val_dia = real_to_cdf(val_y_dia)
cdf_pred_dia = real_to_cdf(pred_dia)
crps_val = crps(cdf_val_dia, cdf_pred_dia)
print('CRPS(val dia) = {0}'.format(crps_val))
开发者ID:ouceduxzk,项目名称:kaggle-ndsb2,代码行数:34,代码来源:extract.py
示例10: split_dataset
def split_dataset(index, random_state, test_ratio=0.2, valid_ratio=0.2):
index = list(index)
ix_train, ix_test = train_test_split(index, test_size=test_ratio,
random_state=random_state)
ix_train, ix_valid = train_test_split(ix_train,
test_size=valid_ratio / (1 - test_ratio), random_state=random_state)
return {'train': ix_train, 'valid': ix_valid, 'test': ix_test}
开发者ID:bzamecnik,项目名称:ml-playground,代码行数:7,代码来源:prepare_training_data.py
示例11: create_sets
def create_sets(img_dir, train_set_proportion=.6, test_set_proportion=.2, val_set_proportion=.2):
'''Split a list of image files up into training, testing and validation sets.'''
if os.path.isfile(img_dir+ 'imgs.list'):
baseimgfilenames = pickle.load(open(img_dir+'imgs.list','rb'))
else:
imgfilenames = glob.glob(img_dir + '*.jpg')
baseimgfilenames = [os.path.basename(f) for f in imgfilenames]
train,val = train_test_split(np.arange(len(baseimgfilenames)),
train_size=train_set_proportion+test_set_proportion,
test_size=val_set_proportion,
random_state=1)
train_test_prop = train_set_proportion + test_set_proportion
train,test = train_test_split(train,
train_size=train_set_proportion/train_test_prop,
test_size=test_set_proportion/train_test_prop,
random_state=1)
trainfiles = [baseimgfilenames[i] for i in train]
valfiles = [baseimgfilenames[i] for i in val]
testfiles = [baseimgfilenames[i] for i in test]
return trainfiles, valfiles,testfiles
开发者ID:emwebaze,项目名称:microscopy-object-detection,代码行数:25,代码来源:createdb.py
示例12: create_sets
def create_sets(img_dir, train_set_proportion=.6, test_set_proportion=.2, val_set_proportion=.2):
'''Split a list of image files up into training, testing and validation sets.'''
imgfilenames = glob.glob(img_dir + '*.jpg')
baseimgfilenames = [os.path.basename(f) for f in imgfilenames]
if train_set_proportion + test_set_proportion < 1:
train,val = train_test_split(np.arange(len(baseimgfilenames)),
train_size=train_set_proportion+test_set_proportion,
test_size=val_set_proportion,
random_state=1)
else:
train = np.arange(len(baseimgfilenames))
val = []
train_test_prop = train_set_proportion + test_set_proportion
train,test = train_test_split(train,
train_size=train_set_proportion/train_test_prop,
test_size=test_set_proportion/train_test_prop,
random_state=1)
trainfiles = [baseimgfilenames[i] for i in train]
testfiles = [baseimgfilenames[i] for i in test]
valfiles = [baseimgfilenames[i] for i in val]
return trainfiles, valfiles,testfiles
开发者ID:jqug,项目名称:microscopy-object-detection,代码行数:26,代码来源:readdata.py
示例13: main
def main(unused_argv):
iris = datasets.load_iris()
x_train, x_test, y_train, y_test = train_test_split(
iris.data, iris.target, test_size=0.2, random_state=42)
x_train, x_val, y_train, y_val = train_test_split(
x_train, y_train, test_size=0.2, random_state=42)
val_monitor = learn.monitors.ValidationMonitor(
x_val, y_val, early_stopping_rounds=200)
# classifier with early stopping on training data
classifier1 = learn.DNNClassifier(
hidden_units=[10, 20, 10], n_classes=3, model_dir='/tmp/iris_model/')
classifier1.fit(x=x_train, y=y_train, steps=2000)
score1 = metrics.accuracy_score(y_test, classifier1.predict(x_test))
# classifier with early stopping on validation data, save frequently for
# monitor to pick up new checkpoints.
classifier2 = learn.DNNClassifier(
hidden_units=[10, 20, 10], n_classes=3, model_dir='/tmp/iris_model_val/',
config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1))
classifier2.fit(x=x_train, y=y_train, steps=2000, monitors=[val_monitor])
score2 = metrics.accuracy_score(y_test, classifier2.predict(x_test))
# In many applications, the score is improved by using early stopping
print('score1: ', score1)
print('score2: ', score2)
print('score2 > score1: ', score2 > score1)
开发者ID:AadityaJ,项目名称:tensorflow,代码行数:28,代码来源:iris_val_based_early_stopping.py
示例14: split_data
def split_data(x_train, y_train):
"""
Given training data cropped from the original dataset by create_training_set.py, split this data up into training, cross-validation, and test data.
INPUTS:
x_train = Features cropped from original dataset
y_train = Labels manually inputed from x_train
OUTPUTS:
new_x_train = New training data randomly selected from x_train
new_x_crossval = Cross-validation samples from x_train
new_x_test = Test samples from x_train
new_y_train = Training labels
new_y_crossval = Cross-validation labels
new_y_test = Testing labels
"""
new_x_train, new_x_test, new_y_train, new_y_test \
= cross_val.train_test_split(x_train,
y_train,
test_size=0.3,
random_state=53)
new_x_crossval, new_x_test, new_y_crossval, new_y_test \
= cross_val.train_test_split(new_x_test,
new_y_test,
test_size=0.5,
random_state=41)
return new_x_train, new_x_crossval, new_x_test, new_y_train, \
new_y_crossval, new_y_test
开发者ID:EthanRosenthal,项目名称:stm-routines,代码行数:28,代码来源:train_model.py
示例15: cook
def cook():
x, y, weights = load_data()
n_components = 200
svd = TruncatedSVD(n_components, random_state=42)
x_unweighted = svd.fit_transform(x)
x_weighted = svd.fit_transform(weighted(x, weights))
for i in range(9):
frac = 1 - (i * 0.01 + 0.01)
print frac
x_train, x_test, y_train, y_test = train_test_split(x_unweighted, y, test_size=frac)
classifier = AdaBoostClassifier(n_estimators=100)
classifier.fit(x_train, y_train)
print "Unweighted: ", classifier.score(x_test, y_test)
x_train, x_test, y_train, y_test = train_test_split(x_weighted, y, test_size=frac)
classifier = AdaBoostClassifier(n_estimators=100)
classifier.fit(x_train, y_train)
print "Weighted: ", classifier.score(x_test, y_test)
print '--------------------------'
'''
开发者ID:wangchr,项目名称:eMeriL,代码行数:25,代码来源:cook.py
示例16: main
def main():
X, Y, encoder, scale = load_train_data('train.csv')
estimators = 500
X_train, X_valid, Y_train, Y_valid = train_test_split(X, Y, test_size=0.2, random_state=0)
X_train_real, X_test_real, Y_train_real, Y_test_real = train_test_split(X_train, Y_train, test_size=0.2, random_state=42)
log.info('Loaded training file')
X_test, _ = load_csv_file('test.csv', cut_end=False)
log.info('Loaded test file')
#Classifier Setup
tree_clf = ExtraTreesClassifier(n_estimators=estimators, n_jobs=-1,
random_state=42, max_depth=55, min_samples_split=1)
clf = make_pipeline(TfidfTransformer(), DenseTransformer(), tree_clf)
log.info('Fitting GradientBoost')
clf.fit(X_train_real, Y_train_real)
clf_probs = clf.predict_proba(X_test_real)
score = log_loss(Y_test_real, clf_probs)
log.info('Log Loss score un-trained = %f' % score)
# Calibrate Classifier using ground truth in X,Y_valid
sig_clf = CalibratedClassifierCV(clf, method="isotonic", cv="prefit")
log.info('Fitting CalibratedClassifierCV')
sig_clf.fit(X_valid, Y_valid)
sig_clf_probs = sig_clf.predict_proba(X_test_real)
sig_score = log_loss(Y_test_real, sig_clf_probs)
log.info('Log loss score trained = %f' % sig_score)
# Ok lets predict the test data with our funky new classifier
sig_submission_probs = sig_clf.predict_proba(X_test)
write_out_submission(sig_submission_probs, 'submission.csv')
开发者ID:Almclean,项目名称:otto-group,代码行数:31,代码来源:main.py
示例17: load_data_sets
def load_data_sets(input_data, labels, split_only=True, valid_set=False):
class DataSets(object):
pass
data_sets = DataSets()
print("\nSplitting to Train & Test sets for Finetuning")
if valid_set:
train_examples, test_examples, train_labels, test_labels = \
train_test_split(input_data, labels, test_size=0.2)
train_examples, validation_examples, train_labels, validation_labels = \
train_test_split(train_examples, train_labels, test_size=0.05)
data_sets.validation = DataSet(validation_examples, validation_labels)
else:
train_examples, test_examples, train_labels, test_labels = \
train_test_split(input_data, labels, test_size=0.3)
data_sets.validation = None
# validation_examples = input_data[:VALIDATION_SIZE]
# train_examples = input_data[VALIDATION_SIZE:]
data_sets.train = DataSet(train_examples, train_labels)
data_sets.test = DataSet(test_examples, test_labels)
if not split_only:
data_sets.all = DataSet(input_data, labels)
return data_sets
开发者ID:glrs,项目名称:StackedDAE,代码行数:28,代码来源:utils.py
示例18: iris_demo
def iris_demo():
# load the iris dataset
iris = load_iris()
X = iris['data']
y_labels = iris['target']
lb = LabelBinarizer()
y = lb.fit_transform(y_labels)
# split into training, validation and test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.25,
random_state=RANDOM_STATE)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train,
test_size=0.25,
random_state=RANDOM_STATE)
# train the neural net
print("Building logistic regression classifier to classify iris data")
nn = pynn.ArtificialNeuralNet([X_train.shape[1], 20, y_train.shape[1]])
print("Training")
nn.fit(X_train, y_train, X_valid, y_valid,
batch_size=20, n_epochs=20, learning_rate=0.05,
random_state=RANDOM_STATE)
y_pred = nn.predict(X_test)
print("iris accuracy: {}%".format(
accuracy_score(y_test.argmax(1), y_pred.argmax(1)) * 100))
开发者ID:benjamin-croker,项目名称:pynn,代码行数:30,代码来源:example.py
示例19: load_data
def load_data():
'''
Loads the data, turns into word2vec representation, and splits
into training, validation, and testing sets with ratio 8:1:1
'''
trainingDataFile = '../data/traindata.txt'
trainingPosFile = '../data/pos_Embedding.txt'
trainingLabelFile = '../data/trainlabel.txt'
wordToVecDictFile = '../data/glove/glove.6B.50d.txt'
print('Vectorizing the features and labels...')
start_time = timeit.default_timer()
X,Y = word2vec.createVecFeatsLabels(trainingDataFile,trainingPosFile,trainingLabelFile,wordToVecDictFile,window_size)
end_time = timeit.default_timer()
print('Pickling the vectorization files')
# pickling X-file
clean_data = open('../data/clean_data.pkl','wb')
pickle.dump(X, clean_data)
clean_data.close()
# pickling the labels-file
clean_label = open('../data/clean_label.pkl', 'wb')
pickle.dump(Y, clean_label)
clean_label.close()
print(('The vectorization ran for %.2fm' % ((end_time - start_time) / 60.)))
print('Splitting into training, validation, and testing sets ...')
X_train, X_rest, y_train, y_rest = train_test_split(X, Y, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_rest,y_rest, test_size=0.5, random_state=42)
return X_train, X_val, X_test, y_train, y_val, y_test
开发者ID:546-NN2project,项目名称:baseline,代码行数:27,代码来源:runWithPos.py
示例20: conv_demo
def conv_demo():
# load the digits dataset
digits = load_digits()
X = digits['data']
y_labels = digits['target']
lb = LabelBinarizer()
y = lb.fit_transform(y_labels)
# split into training, validation and test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.25,
random_state=RANDOM_STATE)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train,
test_size=0.25,
random_state=RANDOM_STATE)
# train the neural net
print("Building neural net to classify digits")
conv_net = pynn.ConvNet(digits['images'][0].shape, 1, y.shape[1],
random_state=RANDOM_STATE)
print("Training")
conv_net.fit(X_train, y_train, X_valid, y_valid,
batch_size=20, n_epochs=20, learning_rate=0.05)
y_pred = conv_net.predict(X_test)
print("digits accuracy: {}%".format(
accuracy_score(y_test.argmax(1), y_pred.argmax(1)) * 100))
开发者ID:benjamin-croker,项目名称:pynn,代码行数:30,代码来源:example.py
注:本文中的sklearn.cross_validation.train_test_split函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论