本文整理汇总了Python中sklearn.utils.shuffle函数的典型用法代码示例。如果您正苦于以下问题:Python shuffle函数的具体用法?Python shuffle怎么用?Python shuffle使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了shuffle函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: generate_feature
def generate_feature(in_file, dump=False, single_only=False, min_count=0):
f = open(in_file, 'r')
f.readline()
training_data, tags = [], []
total_features = {}
for line in f.readlines():
tokens = line.replace('\n', '').split(',')
fs = [s for s in tokens[1:] if s.isdigit()]
# ignore invalid data
if len(fs) != 10:
continue
tags.append(tokens[0])
features = get_feature_array(fs, single_only)
update_total_features(total_features, features)
training_data.append(features)
training_data = transform_to_matrix(total_features, training_data)
training_data = cut_off(training_data, min_count)
shuffle(training_data, tags)
tags = np.array(tags)
if dump:
np.savetxt('preprocessing/dumpX.txt', training_data, fmt='%d', delimiter=',')
np.savetxt('preprocessing/dumpY.txt', tags[np.newaxis].T, fmt='%s', delimiter=',')
return total_features, training_data, np.array(tags)
开发者ID:joshua924,项目名称:MachineLearningProject_Team509,代码行数:25,代码来源:feature_generation.py
示例2: _subsample_data
def _subsample_data(self, X, Y, n=10000):
if Y is not None:
X, Y = shuffle(X, Y)
return X[:n], Y[:n]
else:
X = shuffle(X)
return X[:n]
开发者ID:lazyprogrammer,项目名称:machine_learning_examples,代码行数:7,代码来源:fake_neural_net.py
示例3: main
def main(is_binary=True):
train, test, word2idx = get_ptb_data()
for t in train:
add_idx_to_tree(t, 0)
train = [tree2list(t, -1, is_binary) for t in train]
if is_binary:
train = [t for t in train if t[3][-1] >= 0] # for filtering binary labels
for t in test:
add_idx_to_tree(t, 0)
test = [tree2list(t, -1, is_binary) for t in test]
if is_binary:
test = [t for t in test if t[3][-1] >= 0] # for filtering binary labels
train = shuffle(train)
train = train[:5000]
# n_pos = sum(t[3][-1] for t in train)
# print "n_pos train:", n_pos
test = shuffle(test)
test = test[:1000]
# n_pos = sum(t[3][-1] for t in test)
# print "n_pos test:", n_pos
V = len(word2idx)
print "vocab size:", V
D = 20
K = 2 if is_binary else 5
model = RecursiveNN(V, D, K)
model.fit(train)
print "train accuracy:", model.score(train)
print "test accuracy:", model.score(test)
print "train f1:", model.f1_score(train)
print "test f1:", model.f1_score(test)
开发者ID:renjinghai,项目名称:machine_learning_examples,代码行数:35,代码来源:rntn_theano.py
示例4: splitIntoTrainingAndValidation
def splitIntoTrainingAndValidation(A, B):
data1 = shuffle(sourceSets[A]) # Note this is a random shuffle, that's
data2 = shuffle(sourceSets[B]) # why we need many iterations
freqM = np.minimum(freqs[A], freqs[B])
freq1tr = np.round(freqM * 0.8) # Randomly selected 80% for the training set,
freq1va = freqM - freq1tr # and the remaining 20% for the validation set
freq2tr = np.copy(freq1tr)
freq2va = np.copy(freq1va)
trainingSetSize = int(sum(freq1tr)) # 1/2 size actually
validatnSetSize = int(sum(freq1va))
testSet1size = len(data1) - trainingSetSize - validatnSetSize
testSet2size = len(data2) - trainingSetSize - validatnSetSize
X = np.zeros((trainingSetSize*2, numFeatures))
Xv = np.zeros((validatnSetSize*2, numFeatures))
Xt = np.zeros((testSet1size+testSet2size, numFeatures))
y = np.ravel([([0]*trainingSetSize) + ([1]*trainingSetSize)])
yv = np.ravel([([0]*validatnSetSize) + ([1]*validatnSetSize)])
yt = np.ravel([([0]*testSet1size) + ([1]*testSet2size)])
trnIdx = vldIdx = tstIdx = 0
for item in data1:
year = item[0]
if freq1tr[year] > 0: X[trnIdx], trnIdx, freq1tr[year] = item[1:], trnIdx+1, freq1tr[year]-1
elif freq1va[year] > 0: Xv[vldIdx], vldIdx, freq1va[year] = item[1:], vldIdx+1, freq1va[year]-1
else: Xt[tstIdx], tstIdx = item[1:], tstIdx+1
assert trnIdx==trainingSetSize and vldIdx==validatnSetSize and tstIdx==testSet1size
for item in data2:
year = item[0]
if freq2tr[year] > 0: X[trnIdx], trnIdx, freq2tr[year] = item[1:], trnIdx+1, freq2tr[year]-1
elif freq2va[year] > 0: Xv[vldIdx], vldIdx, freq2va[year] = item[1:], vldIdx+1, freq2va[year]-1
else: Xt[tstIdx], tstIdx = item[1:], tstIdx+1
assert trnIdx==trainingSetSize*2 and vldIdx==validatnSetSize*2 and tstIdx==testSet1size+testSet2size
X, y = shuffle(X, y) # Just in case... perhaps no reason to shuffle again here?
fs = SelectKBest(f_classif, k = numFeatures) # TODO: try other feature selection methods?
fs.fit(np.concatenate((X, Xv)), np.concatenate((y, yv)))
return X, y, Xv, yv, Xt, yt, testSet1size, testSet2size, fs.scores_
开发者ID:lelou6666,项目名称:WavesOfWhat,代码行数:35,代码来源:Validation_and_testing.py
示例5: compute_distances_and_pairs
def compute_distances_and_pairs(self, pdb_file, nr_contacts=None, nr_noncontacts=None):
#distance and contacts
self.features['pair']['Cbdist'] = pdb.distance_map(pdb_file, self.L)
#mask positions that have too many gaps
gap_freq = 1 - (self.Ni / self.neff)
highly_gapped_pos = np.where(gap_freq > self.max_gap_percentage)[0]
self.features['pair']['Cbdist'][:,highly_gapped_pos] = np.nan
self.features['pair']['Cbdist'][highly_gapped_pos, :] = np.nan
#if there are unresolved residues, there will be nan in the distance_map
with np.errstate(invalid='ignore'):
self.features['pair']['contact'] = (self.features['pair']['Cbdist'] <= self.contact_threshold) * 1
self.features['pair']['nocontact'] = (self.features['pair']['Cbdist'] > self.non_contact_threshold) * 1
indices_contact = np.where(np.triu(self.features['pair']['contact'], k=self.seq_separation))
indices_contact = tuple(shuffle(indices_contact[0],indices_contact[1], random_state=0))
if nr_contacts:
indices_contact = indices_contact[0][:nr_contacts], indices_contact[1][:nr_contacts]
indices_nocontact = np.where(np.triu(self.features['pair']['nocontact'], k=self.seq_separation))
indices_nocontact = tuple(shuffle(indices_nocontact[0],indices_nocontact[1], random_state=0))
if nr_noncontacts:
indices_nocontact = indices_nocontact[0][:nr_noncontacts], indices_nocontact[1][:nr_noncontacts]
#update indices of i<j for only relevant pairs
self.ij_ind_upper = np.array(list(indices_contact[0]) + list(indices_nocontact[0])), np.array(list(indices_contact[1]) + list(indices_nocontact[1]))
开发者ID:susannvorberg,项目名称:contact_prediction,代码行数:28,代码来源:AlignmentFeatures.py
示例6: get_aa_cross_val
def get_aa_cross_val(L, X, Y, AA, tsize=None, rstate=-1):
"""Get test data from dataset"""
test_position = []
aa_y = np.zeros(Y.shape)
for i in xrange(len(Y)):
if L[i][-1] == AA:
aa_y[i] = 1
test_position.append(i)
if tsize:
t_len = int(tsize * len(Y))
# positions that are 0 without being the one for AA
zero_pos = np.where(np.logical_and(Y == 0, aa_y == 0))[0]
clen = t_len - len(test_position)
if clen > 0:
random_zero_pos = np.random.choice(zero_pos, clen, replace=False)
test_position.extend(random_zero_pos)
test_position = np.random.permutation(test_position)
mask = np.ones(Y.shape, dtype=bool)
mask[test_position] = False
train_position = np.array(range(len(mask)))[mask]
if rstate > 0:
return shuffle(train_position, random_state=rstate), shuffle(test_position, random_state=rstate)
# in this case, suppose we want only the train and test index
else:
return train_position, test_position
开发者ID:UdeM-LBIT,项目名称:CoreTracker,代码行数:28,代码来源:classifier.py
示例7: generator3
def generator3(samples, batch_size=32):
num_samples = len(samples)
while 1: # Loop forever so the generator never terminates
shuffle(samples)
for offset in range(0, num_samples, batch_size):
batch_samples = samples[offset:offset+batch_size]
car_images = []
steering_angles = []
for batch_sample in batch_samples:
img_center = cv2.imread(path+batch_sample[0].split('\\')[-1])
img_left = cv2.imread(path+batch_sample[1].split('\\')[-1])
img_right = cv2.imread(path+batch_sample[2].split('\\')[-1])
correction = 0.3 # this is a parameter to tune
steering_center = float(batch_sample[3])
steering_left = steering_center + correction
steering_right = steering_center - correction
# add images and angles to data set
car_images.extend([img_center, img_left, img_right])
steering_angles.extend([steering_center, steering_left, steering_right])
# trim image to only see section with road
X_train = np.array(car_images)
y_train = np.array(steering_angles)
yield shuffle(X_train, y_train)
开发者ID:chauvinj735,项目名称:Behavior-Cloning,代码行数:28,代码来源:model.py
示例8: import_images
def import_images():
#IMPLEMENT TIMER CUTOFF FR+OR IF FEAT EXT TAKES TOO LONG
d_feats = {'orb': []}
c_feats = {'orb': []}
(cat_paths, dog_paths) = get_filenames(TRAINING_FOLDER)
cat_train_pts = []
dog_train_pts = []
for image_fn in shuffle(dog_paths, n_samples = 400, random_state=0):
odesc_pts = extract_desc_pts(image_fn)
try:
for pt in odesc_pts:
d_feats['orb'].append(pt)
except TypeError:
print image_fn
continue
for image_fn in shuffle(cat_paths, n_samples = 400, random_state=0):
odesc_pts = extract_desc_pts(image_fn)
try:
for pt in odesc_pts:
c_feats['orb'].append(pt)
except TypeError:
print image_fn
continue
cat_k_means = KMeans(n_jobs=-1, n_clusters=200)
cat_k_means.fit(c_feats['orb'])
print 'dog calc'
dog_k_means = KMeans(n_jobs=-1, n_clusters=200)
dog_k_means.fit(d_feats['orb'])
print 'saving....'
with open('/home/max/CVD/d_o200c200s400.pickle', 'wb') as handle:
pickle.dump(dog_k_means.cluster_centers_, handle)
with open('/home/max/CVD/c_o200c200s400.pickle', 'wb') as handle:
pickle.dump(cat_k_means.cluster_centers_, handle)
return '\n\n\n DONE '
开发者ID:Bingjiling,项目名称:Cat-VS-Dog,代码行数:34,代码来源:CVD_feat.py
示例9: generate_training_data
def generate_training_data(image_paths, angles, batch_size=128, validation_flag=False):
'''
method for the model training data generator to load, process, and distort images, then yield them to the
model. if 'validation_flag' is true the image is not distorted. also flips images with turning angle magnitudes of greater than 0.33, as to give more weight to them and mitigate bias toward low and zero turning angles
'''
image_paths, angles = shuffle(image_paths, angles)
X,y = ([],[])
while True:
for i in range(len(angles)):
img = cv2.imread(image_paths[i])
angle = angles[i]
img = preprocess_image(img)
if not validation_flag:
img, angle = random_distort(img, angle)
X.append(img)
y.append(angle)
if len(X) == batch_size:
yield (np.array(X), np.array(y))
X, y = ([],[])
image_paths, angles = shuffle(image_paths, angles)
# flip horizontally and invert steer angle, if magnitude is > 0.33
if abs(angle) > 0.33:
img = cv2.flip(img, 1)
angle *= -1
X.append(img)
y.append(angle)
if len(X) == batch_size:
yield (np.array(X), np.array(y))
X, y = ([],[])
image_paths, angles = shuffle(image_paths, angles)
开发者ID:Shtaiven,项目名称:CarND-Behavioral-Cloning-Project,代码行数:30,代码来源:model.py
示例10: splitIntoTrainingValidation
def splitIntoTrainingValidation(A, B): # TODO: 3rd parameter: the desired value of (validatSet1size + validatSet2size)
data1 = shuffle(sourceSets[A]) # Note this is a random shuffle, that's
data2 = shuffle(sourceSets[B]) # why we need many iterations
freq1 = np.minimum(freqs[A], freqs[B])
if sum(freq1) > maxTrainSetSz: freq1 = np.round(freq1 * (maxTrainSetSz * 1.0 / sum(freq1)))
trainingSetSize = int(sum(freq1)) # Half size actually. Approximately <= maxTrainSetSz
validatSet1size = len(data1) - trainingSetSize
validatSet2size = len(data2) - trainingSetSize
X = np.zeros((trainingSetSize*2, numFeatures))
Xv = np.zeros((validatSet1size+validatSet2size, numFeatures))
y = np.ravel([([0]*trainingSetSize) + ([1]*trainingSetSize)])
yv = np.ravel([([0]*validatSet1size) + ([1]*validatSet2size)])
freq2 = np.copy(freq1)
trnIdx = valIdx = 0
for item in data1:
year = item[0]
if freq1[year] > 0:
freq1[year]-=1
X[trnIdx] = item[1:]
trnIdx+=1
else:
Xv[valIdx] = item[1:]
valIdx += 1
assert trnIdx==trainingSetSize and valIdx==validatSet1size
for item in data2:
year = item[0]
if freq2[year] > 0:
freq2[year]-=1
X[trnIdx] = item[1:]
trnIdx+=1
else:
Xv[valIdx] = item[1:]
valIdx += 1
assert trnIdx==trainingSetSize*2 and valIdx==validatSet1size+validatSet2size
return X, y, Xv, yv, validatSet1size, validatSet2size
开发者ID:boris-k,项目名称:WavesOfWhat,代码行数:35,代码来源:Classify.py
示例11: cluster
def cluster(m, n_colors=32):
from sklearn.utils import shuffle
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin
def recreate_image(codebook, labels, w, h):
"""Recreate the (compressed) image from the code book & labels"""
d = codebook.shape[1]
image = np.zeros((w, h, d))
label_idx = 0
for i in range(w):
for j in range(h):
image[i][j] = codebook[labels[label_idx]]
label_idx += 1
return image
# Load Image and transform to a 2D numpy array.
w, h, d = original_shape = tuple(m.shape)
image_array = np.reshape(m, (w * h, d))
image_array_sample = shuffle(image_array, random_state=0)[:1000]
kmeans = KMeans(n_clusters=n_colors).fit(image_array_sample)
codebook_random = shuffle(image_array, random_state=0)[:n_colors + 1]
labels_random = pairwise_distances_argmin(codebook_random, image_array, axis=0)
return recreate_image(codebook_random, labels_random, w, h)
开发者ID:salvador-dali,项目名称:hackerrank_ai,代码行数:26,代码来源:p_7_rubiks_cube_investigation.py
示例12: main
def main():
train, test, word2idx = get_ptb_data()
for t in train:
add_idx_to_tree(t, 0)
train = [tree2list(t, -1, True) for t in train]
train = [t for t in train if t[3][-1] >= 0] # for filtering binary labels
for t in test:
add_idx_to_tree(t, 0)
test = [tree2list(t, -1, True) for t in test]
test = [t for t in test if t[3][-1] >= 0] # for filtering binary labels
train = shuffle(train)
train = train[:1000]
# n_pos = sum(t[3][-1] for t in train)
# print "n_pos train:", n_pos
test = shuffle(test)
test = test[:100]
# n_pos = sum(t[3][-1] for t in test)
# print "n_pos test:", n_pos
V = len(word2idx)
print "vocab size:", V
D = 80
K = 5
model = RecursiveNN(V, D, K)
model.fit(train, epochs=3, activation=T.nnet.relu)
print "train accuracy:", model.score(train)
print "test accuracy:", model.score(test)
print "train f1:", model.f1_score(train)
print "test f1:", model.f1_score(test)
开发者ID:CesarChaMal,项目名称:machine_learning_examples,代码行数:33,代码来源:rntn_theano.py
示例13: process_data
def process_data():
global num_classes, num_train, num_test
X_train , Y_train = load_data('Train')
X_test , Y_test = load_data('Test')
X_train = X_train.astype(np.float64)
X_test = X_test.astype(np.float64)
num_train = X_train.shape[0]
num_test = X_test.shape[0]
mean_image = np.mean(X_train,axis=0)
X_train -= mean_image
X_test -= mean_image
X_train = X_train.reshape(-1, 1, img_dim, img_dim)
Y_train -= 1
X_train , Y_train = shuffle(X_train, Y_train)
X_test = X_test.reshape(-1, 1, img_dim, img_dim)
Y_test -= 1
X_test , Y_test = shuffle(X_test, Y_test)
print 'Training X shape :- ', X_train.shape
print 'Training Y shape :- ', Y_train.shape
print 'Testing X shape :- ', X_test.shape
print 'Testing Y shape :- ', Y_test.shape
return X_train, Y_train, X_test, Y_test
开发者ID:PankajKataria,项目名称:BanglaReco,代码行数:28,代码来源:solution.py
示例14: frames2batch
def frames2batch(k = 12,batch_size = 1024, is_calib = False):
pos = util.get_files(rootdir = 'F:\\train_data\\pos\\')
neg = util.get_files(rootdir = 'F:\\train_data\\neg\\')
pos = shuffle(pos)
neg = shuffle(neg)
total = pos + neg
total = shuffle(total)
batch = []
c = 0
bpath = 'F:\\train_data\\batch\\'
for item_path in total:
frame = fr.get_frame(item_path)
frame_r = fr.resize_frame(frame,(k,k))
if frame_r == None:
continue
vec = fr.frame_to_vect(frame_r)
label = 1 if item_path.split('\\')[-1].find('pos') > 0 else 0
print(item_path,label)
batch.append((vec,label))
if len(batch) > 0 and len(batch) % batch_size == 0:
batch = sp.array(batch)
sp.savez(bpath + str(c) + '_' + str(k) + ('_' if not is_calib else '_calib-') + 'net',batch)
batch = []
c += 1
if len(batch) > 0 and len(batch) % batch_size == 0:
batch = sp.array(batch)
sp.savez(bpath + str(c) + '_' + str(k) + ('_' if not is_calib else '_calib') + '-net',batch)
batch = []
c += 1
开发者ID:gogolgrind,项目名称:Cascade-CNN-Face-Detection,代码行数:31,代码来源:datasets.py
示例15: getMNIST
def getMNIST():
# data shape: train (50000, 784), test (10000, 784)
# already scaled from 0..1 and converted to float32
datadir = '../large_files/'
if not os.path.exists(datadir):
datadir = ''
input_file = "%smnist.pkl.gz" % datadir
if not os.path.exists(input_file):
url = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
with open(input_file, "wb") as out:
f = urllib2.urlopen(url)
out.write(f.read())
out.flush()
with gzip.open(input_file) as f:
train, valid, test = cPickle.load(f)
Xtrain, Ytrain = train
Xvalid, Yvalid = valid
Xtest, Ytest = test
Ytrain_ind = y2indicator(Ytrain)
Ytest_ind = y2indicator(Ytest)
Xtrain, Ytrain = shuffle(Xtrain, Ytrain)
Xtest, Ytest = shuffle(Xtest, Ytest)
# try to take a smaller sample
Xtrain = Xtrain[0:30000]
Ytrain = Ytrain[0:30000]
Xtest = Xtest[0:1000]
Ytest = Ytest[0:1000]
return Xtrain.reshape(len(Xtrain), 1, 28, 28), Ytrain, Ytrain_ind, Xtest.reshape(len(Xtest), 1, 28, 28), Ytest, Ytest_ind
开发者ID:CesarChaMal,项目名称:machine_learning_examples,代码行数:35,代码来源:renet.py
示例16: load_whale_data
def load_whale_data(train_file, test_file, nb_classes=447):
print("loading whale data")
# nomalize train data
print("--> loading training data")
train_data = read_csv(train_file)
X_train = train_data[:, 1:]
X_train = X_train.astype(np.float32)
X_train = X_train / 255
y_train = np.vstack(train_data[:, 0])
y_train = y_train.astype(np.uint16)
X_train, y_train = shuffle(X_train, y_train, random_state=42)
X_train = X_train.reshape(-1, 1, 96, 96)
Y_train = np_utils.to_categorical(y_train, 447)
print("--> training data loaded")
# nomalize test data
print("--> loading test data")
test_data = read_csv(test_file)
X_test = test_data[:, 1:]
X_test = X_test.astype(np.float32)
X_test = X_test / 255
y_test = np.vstack(test_data[:, 0])
y_test = y_test.astype(np.uint16)
X_test, y_test = shuffle(X_test, y_test, random_state=42)
X_test = X_test.reshape(-1, 1, 96, 96)
Y_test = np_utils.to_categorical(y_test, 447)
print("--> test data loaded")
return (X_train, Y_train, X_test, Y_test)
开发者ID:deerishi,项目名称:genetic-algorithm-for-cnn,代码行数:34,代码来源:whale_test2_keras.py
示例17: run_kmeans
def run_kmeans(inFile, n_colors):
china = cv2.imread(inFile)
china = np.array(china, dtype=np.float64) / 255
w, h, d = original_shape = tuple(china.shape)
assert d == 3
image_array = np.reshape(china, (w * h, d))
print("\tFitting model on a small sub-sample of the data")
t0 = time()
image_array_sample = shuffle(image_array, random_state=0)[:1000]
kmeans = KMeans(k=n_colors, random_state=0).fit(image_array_sample)
print("\tdone in %0.3fs." % (time() - t0))
# Get labels for all points
print("\tPredicting color indices on the full image (k-means)")
t0 = time()
labels = kmeans.predict(image_array)
print("\tdone in %0.3fs." % (time() - t0))
codebook_random = shuffle(image_array, random_state=0)[:n_colors + 1]
print("\tPredicting color indices on the full image (random)")
t0 = time()
dist = euclidean_distances(codebook_random, image_array, squared=True)
labels_random = dist.argmin(axis=0)
print("\tdone in %0.3fs." % (time() - t0))
img_kmeans = recreate_image(kmeans.cluster_centers_, labels, w, h)
img_random = recreate_image(codebook_random, labels_random, w, h)
return china, img_kmeans, img_random
开发者ID:AmirooR,项目名称:scripts,代码行数:29,代码来源:run_kmeans.py
示例18: getTrainTestData
def getTrainTestData():
data = pickle.load(open('./data/60_unnormalized.p', "rb"))
raw_meta = []
raw_data = []
for k,v in data.iteritems():
for i in range(len(v)):
_d = v[i]
previous = [[0]*LOCATION_ID_MAX,[0]*LOCATION_ID_MAX]
if i==0:
# previous date
date_time = datetime.datetime.strptime(k, '%Y-%m-%d')
previous_day = date_time - datetime.timedelta(1)
str_previous_day = previous_day.strftime('%Y-%m-%d')
if str_previous_day in data:
previous[0]=data[str_previous_day][-2]
previous[1]=data[str_previous_day][-1]
elif i==1:
# previous date
date_time = datetime.datetime.strptime(k, '%Y-%m-%d')
previous_day = date_time - datetime.timedelta(1)
str_previous_day = previous_day.strftime('%Y-%m-%d')
previous[1]=v[i-1]
if str_previous_day in data:
previous[0]=data[str_previous_day][-1]
else:
previous[0]=v[i-2]
previous[1]=v[i-1]
raw_meta.append({"date":k,"interval":i,"previous":previous})
raw_data.append(_d)
num = len(raw_data)
train_meta_data = raw_meta[0:int(0.6*num)]
valid_meta_data = raw_meta[int(0.6*num):int(0.8*num)]
test_meta_data = raw_meta[int(0.8*num):]
train_y = raw_data[0:int(0.6*num)]
valid_y = raw_data[int(0.6*num):int(0.8*num)]
test_y = raw_data[int(0.8*num):]
train_X = getFeatures(train_meta_data)
valid_X = getFeatures(valid_meta_data)
test_X = getFeatures(test_meta_data)
train_X = np.array(train_X, dtype=np.float32)
valid_X = np.array(valid_X, dtype=np.float32)
test_X = np.array(test_X, dtype=np.float32)
train_y = np.array(train_y, dtype=np.float32)
valid_y = np.array(valid_y, dtype=np.float32)
test_y = np.array(test_y, dtype=np.float32)
train_X, train_y = shuffle(train_X, train_y, random_state=0)
valid_X, valid_y = shuffle(valid_X, valid_y, random_state=1)
test_X, test_y = shuffle(test_X, test_y, random_state=2)
return train_X, train_y, valid_X, valid_y, test_X, test_y
开发者ID:Jerryzcn,项目名称:rnn_hack,代码行数:60,代码来源:train_data3.py
示例19: main
def main():
train, test, word2idx = get_ptb_data()
for t in train:
add_idx_to_tree(t, 0)
train = [tree2list(t, -1, is_binary=True) for t in train]
train = [t for t in train if t[3][-1] >= 0] # for filtering binary labels
for t in test:
add_idx_to_tree(t, 0)
test = [tree2list(t, -1, is_binary=True) for t in test]
test = [t for t in test if t[3][-1] >= 0] # for filtering binary labels
train = shuffle(train)
train = train[:1000]
test = shuffle(test)
test = test[:500]
V = len(word2idx)
print "vocab size:", V
D = 80
K = 5
model = RecursiveNN(V, D, K)
model.fit(train, reg=0, activation=T.nnet.relu)
print "train accuracy:", model.score(train)
print "test accuracy:", model.score(test)
开发者ID:CesarChaMal,项目名称:machine_learning_examples,代码行数:27,代码来源:recursive_theano.py
示例20: shuffle_data
def shuffle_data(X,y,no_lable_vs_lable):
X, y = shuffle(X, y, random_state=0)
# balance labels by subsampling:
y_dict = defaultdict(list)
for i, y_i in enumerate(y):
y_dict[y_i[0]].append(i)
# subsample
X_sub = []
y_sub = []
y_set = set(y_dict)
y_dict_len = [len(y_dict[y_set_i]) for y_set_i in sorted(list(y_set))]
quotent = y_dict_len[0] / sum(y_dict_len)
print 'lenth cutting'
print str(len(X))
# generalize over multiple classes:
if(quotent > no_lable_vs_lable):
# decrease 0 class labels:
newLen = int(2*y_dict_len[1]*no_lable_vs_lable)
id_new = y_dict['0'][:newLen] + [y_dict[id] for id in y_set if not id in ['0']][0]
X_sub = [X[id] for id in id_new]
y_sub = [y[id] for id in id_new]
print(str(newLen), 'new 0 class length: ', str(len(id_new)))
else:
newLen = int(y_dict_len[0]*(1-no_lable_vs_lable))
id_new = y_dict['1'][:newLen] + [y_dict[id] for id in y_set if not id in ['0']][0]
X_sub = [X[id] for id in id_new]
y_sub = [y[id] for id in id_new]
print(str(newLen), 'new 1 class length')
X, y = shuffle(X_sub, y_sub, random_state=0)
print str(len(X_sub))
print '--------------'
return X,y
开发者ID:victorbergelin,项目名称:TQTM33,代码行数:32,代码来源:learning_working.py
注:本文中的sklearn.utils.shuffle函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论