• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python utils.shuffle函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.utils.shuffle函数的典型用法代码示例。如果您正苦于以下问题:Python shuffle函数的具体用法?Python shuffle怎么用?Python shuffle使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了shuffle函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: generate_feature

def generate_feature(in_file, dump=False, single_only=False, min_count=0):
  f = open(in_file, 'r')
  f.readline()
  training_data, tags = [], []
  total_features = {}

  for line in f.readlines():
    tokens = line.replace('\n', '').split(',')
    fs = [s for s in tokens[1:] if s.isdigit()]
    # ignore invalid data
    if len(fs) != 10:
      continue
    tags.append(tokens[0])
    features = get_feature_array(fs, single_only)
    update_total_features(total_features, features)
    training_data.append(features)

  training_data = transform_to_matrix(total_features, training_data)
  training_data = cut_off(training_data, min_count)
  shuffle(training_data, tags)
  tags = np.array(tags)
  if dump:
    np.savetxt('preprocessing/dumpX.txt', training_data, fmt='%d', delimiter=',')
    np.savetxt('preprocessing/dumpY.txt', tags[np.newaxis].T, fmt='%s', delimiter=',')
  return total_features, training_data, np.array(tags)
开发者ID:joshua924,项目名称:MachineLearningProject_Team509,代码行数:25,代码来源:feature_generation.py


示例2: _subsample_data

 def _subsample_data(self, X, Y, n=10000):
   if Y is not None:
     X, Y = shuffle(X, Y)
     return X[:n], Y[:n]
   else:
     X = shuffle(X)
     return X[:n]
开发者ID:lazyprogrammer,项目名称:machine_learning_examples,代码行数:7,代码来源:fake_neural_net.py


示例3: main

def main(is_binary=True):
    train, test, word2idx = get_ptb_data()

    for t in train:
        add_idx_to_tree(t, 0)
    train = [tree2list(t, -1, is_binary) for t in train]
    if is_binary:
        train = [t for t in train if t[3][-1] >= 0] # for filtering binary labels

    for t in test:
        add_idx_to_tree(t, 0)
    test = [tree2list(t, -1, is_binary) for t in test]
    if is_binary:
        test = [t for t in test if t[3][-1] >= 0] # for filtering binary labels

    train = shuffle(train)
    train = train[:5000]
    # n_pos = sum(t[3][-1] for t in train)
    # print "n_pos train:", n_pos
    test = shuffle(test)
    test = test[:1000]
    # n_pos = sum(t[3][-1] for t in test)
    # print "n_pos test:", n_pos

    V = len(word2idx)
    print "vocab size:", V
    D = 20
    K = 2 if is_binary else 5

    model = RecursiveNN(V, D, K)
    model.fit(train)
    print "train accuracy:", model.score(train)
    print "test accuracy:", model.score(test)
    print "train f1:", model.f1_score(train)
    print "test f1:", model.f1_score(test)
开发者ID:renjinghai,项目名称:machine_learning_examples,代码行数:35,代码来源:rntn_theano.py


示例4: splitIntoTrainingAndValidation

def splitIntoTrainingAndValidation(A, B):
	data1 = shuffle(sourceSets[A])    # Note this is a random shuffle, that's
	data2 = shuffle(sourceSets[B])    #                                   why we need many iterations
	freqM = np.minimum(freqs[A], freqs[B])
	freq1tr = np.round(freqM * 0.8)        # Randomly selected 80% for the training set,
	freq1va = freqM - freq1tr              # and the remaining 20% for the validation set
	freq2tr = np.copy(freq1tr)
	freq2va = np.copy(freq1va)
	trainingSetSize = int(sum(freq1tr))  # 1/2 size actually
	validatnSetSize = int(sum(freq1va))
	testSet1size = len(data1) - trainingSetSize - validatnSetSize
	testSet2size = len(data2) - trainingSetSize - validatnSetSize
	X  = np.zeros((trainingSetSize*2,         numFeatures))
	Xv = np.zeros((validatnSetSize*2,         numFeatures))
	Xt = np.zeros((testSet1size+testSet2size, numFeatures))
	y  = np.ravel([([0]*trainingSetSize) + ([1]*trainingSetSize)])
	yv = np.ravel([([0]*validatnSetSize) + ([1]*validatnSetSize)])
	yt = np.ravel([([0]*testSet1size)    + ([1]*testSet2size)])
	trnIdx = vldIdx = tstIdx = 0
	for item in data1:
		year = item[0]
		if   freq1tr[year] > 0:   X[trnIdx], trnIdx, freq1tr[year]  =  item[1:],  trnIdx+1,  freq1tr[year]-1
		elif freq1va[year] > 0:  Xv[vldIdx], vldIdx, freq1va[year]  =  item[1:],  vldIdx+1,  freq1va[year]-1
		else:                    Xt[tstIdx], tstIdx                 =  item[1:],  tstIdx+1
	assert trnIdx==trainingSetSize   and vldIdx==validatnSetSize   and tstIdx==testSet1size
	for item in data2:
		year = item[0]
		if   freq2tr[year] > 0:   X[trnIdx], trnIdx, freq2tr[year]  =  item[1:],  trnIdx+1,  freq2tr[year]-1
		elif freq2va[year] > 0:  Xv[vldIdx], vldIdx, freq2va[year]  =  item[1:],  vldIdx+1,  freq2va[year]-1
		else:                    Xt[tstIdx], tstIdx                 =  item[1:],  tstIdx+1
	assert trnIdx==trainingSetSize*2 and vldIdx==validatnSetSize*2 and tstIdx==testSet1size+testSet2size
	X, y = shuffle(X, y)   # Just in case... perhaps no reason to shuffle again here?
	fs = SelectKBest(f_classif, k = numFeatures)   # TODO: try other feature selection methods?
	fs.fit(np.concatenate((X, Xv)), np.concatenate((y, yv)))
	return X, y, Xv, yv, Xt, yt, testSet1size, testSet2size, fs.scores_
开发者ID:lelou6666,项目名称:WavesOfWhat,代码行数:35,代码来源:Validation_and_testing.py


示例5: compute_distances_and_pairs

    def compute_distances_and_pairs(self, pdb_file, nr_contacts=None, nr_noncontacts=None):
        #distance and contacts
        self.features['pair']['Cbdist'] = pdb.distance_map(pdb_file, self.L)

        #mask positions that have too many gaps
        gap_freq = 1 - (self.Ni / self.neff)
        highly_gapped_pos = np.where(gap_freq > self.max_gap_percentage)[0]
        self.features['pair']['Cbdist'][:,highly_gapped_pos] = np.nan
        self.features['pair']['Cbdist'][highly_gapped_pos, :] = np.nan

        #if there are unresolved residues, there will be nan in the distance_map
        with np.errstate(invalid='ignore'):
            self.features['pair']['contact'] = (self.features['pair']['Cbdist'] <= self.contact_threshold) * 1
            self.features['pair']['nocontact'] = (self.features['pair']['Cbdist'] > self.non_contact_threshold) * 1

        indices_contact = np.where(np.triu(self.features['pair']['contact'], k=self.seq_separation))
        indices_contact = tuple(shuffle(indices_contact[0],indices_contact[1], random_state=0))
        if nr_contacts:
            indices_contact = indices_contact[0][:nr_contacts], indices_contact[1][:nr_contacts]

        indices_nocontact = np.where(np.triu(self.features['pair']['nocontact'], k=self.seq_separation))
        indices_nocontact = tuple(shuffle(indices_nocontact[0],indices_nocontact[1], random_state=0))
        if nr_noncontacts:
            indices_nocontact = indices_nocontact[0][:nr_noncontacts], indices_nocontact[1][:nr_noncontacts]


        #update indices of i<j for only relevant pairs
        self.ij_ind_upper = np.array(list(indices_contact[0]) + list(indices_nocontact[0])), np.array(list(indices_contact[1]) + list(indices_nocontact[1]))
开发者ID:susannvorberg,项目名称:contact_prediction,代码行数:28,代码来源:AlignmentFeatures.py


示例6: get_aa_cross_val

def get_aa_cross_val(L, X, Y, AA, tsize=None, rstate=-1):
    """Get test data from dataset"""
    test_position = []
    aa_y = np.zeros(Y.shape)
    for i in xrange(len(Y)):
        if L[i][-1] == AA:
            aa_y[i] = 1
            test_position.append(i)

    if tsize:
        t_len = int(tsize * len(Y))
        # positions that are 0 without being the one for AA
        zero_pos = np.where(np.logical_and(Y == 0, aa_y == 0))[0]
        clen = t_len - len(test_position)
        if clen > 0:
            random_zero_pos = np.random.choice(zero_pos, clen, replace=False)
            test_position.extend(random_zero_pos)

    test_position = np.random.permutation(test_position)
    mask = np.ones(Y.shape, dtype=bool)
    mask[test_position] = False
    train_position = np.array(range(len(mask)))[mask]

    if rstate > 0:
        return shuffle(train_position, random_state=rstate), shuffle(test_position, random_state=rstate)
    # in this case, suppose we want only the train and test index
    else:
        return train_position, test_position
开发者ID:UdeM-LBIT,项目名称:CoreTracker,代码行数:28,代码来源:classifier.py


示例7: generator3

def generator3(samples, batch_size=32):
    num_samples = len(samples)
    
    while 1: # Loop forever so the generator never terminates
        shuffle(samples)
        for offset in range(0, num_samples, batch_size):
            batch_samples = samples[offset:offset+batch_size]

            car_images = []
            steering_angles = []
            for batch_sample in batch_samples:
                img_center = cv2.imread(path+batch_sample[0].split('\\')[-1])
                img_left   = cv2.imread(path+batch_sample[1].split('\\')[-1])
                img_right  = cv2.imread(path+batch_sample[2].split('\\')[-1])
                
                correction = 0.3 # this is a parameter to tune
                steering_center = float(batch_sample[3])
                steering_left   = steering_center + correction
                steering_right  = steering_center - correction
                
                # add images and angles to data set
                car_images.extend([img_center, img_left, img_right])
                steering_angles.extend([steering_center, steering_left, steering_right])
                
            # trim image to only see section with road
            X_train = np.array(car_images)
            y_train = np.array(steering_angles)
            yield shuffle(X_train, y_train)
开发者ID:chauvinj735,项目名称:Behavior-Cloning,代码行数:28,代码来源:model.py


示例8: import_images

def import_images():
	#IMPLEMENT TIMER CUTOFF FR+OR IF FEAT EXT TAKES TOO LONG
	d_feats = {'orb': []}
	c_feats = {'orb': []}
	(cat_paths, dog_paths) = get_filenames(TRAINING_FOLDER)
	cat_train_pts = []
	dog_train_pts = []
	for image_fn in shuffle(dog_paths, n_samples = 400, random_state=0):
		odesc_pts = extract_desc_pts(image_fn)
		try:
			for pt in odesc_pts:
				d_feats['orb'].append(pt)
		except TypeError:
			print image_fn
			continue
	for image_fn in shuffle(cat_paths, n_samples = 400, random_state=0):
		odesc_pts = extract_desc_pts(image_fn)
		try:
			for pt in odesc_pts:
				c_feats['orb'].append(pt)
		except TypeError:
			print image_fn
			continue
	cat_k_means = KMeans(n_jobs=-1, n_clusters=200)
	cat_k_means.fit(c_feats['orb'])
	print 'dog calc'
	dog_k_means = KMeans(n_jobs=-1, n_clusters=200)
	dog_k_means.fit(d_feats['orb'])
	print 'saving....'
	with open('/home/max/CVD/d_o200c200s400.pickle', 'wb') as handle:
		pickle.dump(dog_k_means.cluster_centers_, handle)
	with open('/home/max/CVD/c_o200c200s400.pickle', 'wb') as handle:
		pickle.dump(cat_k_means.cluster_centers_, handle)
	return '\n\n\n DONE   '	
开发者ID:Bingjiling,项目名称:Cat-VS-Dog,代码行数:34,代码来源:CVD_feat.py


示例9: generate_training_data

def generate_training_data(image_paths, angles, batch_size=128, validation_flag=False):
    '''
    method for the model training data generator to load, process, and distort images, then yield them to the
    model. if 'validation_flag' is true the image is not distorted. also flips images with turning angle magnitudes of greater than 0.33, as to give more weight to them and mitigate bias toward low and zero turning angles
    '''
    image_paths, angles = shuffle(image_paths, angles)
    X,y = ([],[])
    while True:       
        for i in range(len(angles)):
            img = cv2.imread(image_paths[i])
            angle = angles[i]
            img = preprocess_image(img)
            if not validation_flag:
                img, angle = random_distort(img, angle)
            X.append(img)
            y.append(angle)
            if len(X) == batch_size:
                yield (np.array(X), np.array(y))
                X, y = ([],[])
                image_paths, angles = shuffle(image_paths, angles)
            # flip horizontally and invert steer angle, if magnitude is > 0.33
            if abs(angle) > 0.33:
                img = cv2.flip(img, 1)
                angle *= -1
                X.append(img)
                y.append(angle)
                if len(X) == batch_size:
                    yield (np.array(X), np.array(y))
                    X, y = ([],[])
                    image_paths, angles = shuffle(image_paths, angles)
开发者ID:Shtaiven,项目名称:CarND-Behavioral-Cloning-Project,代码行数:30,代码来源:model.py


示例10: splitIntoTrainingValidation

def splitIntoTrainingValidation(A, B):  # TODO: 3rd parameter: the desired value of (validatSet1size + validatSet2size)
	data1 = shuffle(sourceSets[A])    # Note this is a random shuffle, that's
	data2 = shuffle(sourceSets[B])    #                                   why we need many iterations
	freq1 = np.minimum(freqs[A], freqs[B])
	if sum(freq1) > maxTrainSetSz:  freq1 = np.round(freq1 * (maxTrainSetSz * 1.0 / sum(freq1)))
	trainingSetSize = int(sum(freq1))  # Half size actually.  Approximately <= maxTrainSetSz
	validatSet1size = len(data1) - trainingSetSize
	validatSet2size = len(data2) - trainingSetSize
	X  = np.zeros((trainingSetSize*2,               numFeatures))
	Xv = np.zeros((validatSet1size+validatSet2size, numFeatures))
	y  = np.ravel([([0]*trainingSetSize) + ([1]*trainingSetSize)])
	yv = np.ravel([([0]*validatSet1size) + ([1]*validatSet2size)])
	freq2  = np.copy(freq1)
	trnIdx = valIdx = 0
	for item in data1:
		year = item[0]
		if freq1[year] > 0:
					freq1[year]-=1
					X[trnIdx] = item[1:]
					trnIdx+=1
		else:
			  Xv[valIdx] = item[1:]
			  valIdx += 1
	assert trnIdx==trainingSetSize and valIdx==validatSet1size
	for item in data2:
		year = item[0]
		if freq2[year] > 0:
					freq2[year]-=1
					X[trnIdx] = item[1:]
					trnIdx+=1
		else:
			  Xv[valIdx] = item[1:]
			  valIdx += 1
	assert trnIdx==trainingSetSize*2 and valIdx==validatSet1size+validatSet2size
	return X, y, Xv, yv, validatSet1size, validatSet2size
开发者ID:boris-k,项目名称:WavesOfWhat,代码行数:35,代码来源:Classify.py


示例11: cluster

def cluster(m, n_colors=32):
    from sklearn.utils import shuffle
    from sklearn.cluster import KMeans
    from sklearn.metrics import pairwise_distances_argmin

    def recreate_image(codebook, labels, w, h):
        """Recreate the (compressed) image from the code book & labels"""
        d = codebook.shape[1]
        image = np.zeros((w, h, d))
        label_idx = 0
        for i in range(w):
            for j in range(h):
                image[i][j] = codebook[labels[label_idx]]
                label_idx += 1
        return image

    # Load Image and transform to a 2D numpy array.
    w, h, d = original_shape = tuple(m.shape)
    image_array = np.reshape(m, (w * h, d))
    image_array_sample = shuffle(image_array, random_state=0)[:1000]
    kmeans = KMeans(n_clusters=n_colors).fit(image_array_sample)

    codebook_random = shuffle(image_array, random_state=0)[:n_colors + 1]
    labels_random = pairwise_distances_argmin(codebook_random, image_array, axis=0)

    return recreate_image(codebook_random, labels_random, w, h)
开发者ID:salvador-dali,项目名称:hackerrank_ai,代码行数:26,代码来源:p_7_rubiks_cube_investigation.py


示例12: main

def main():
    train, test, word2idx = get_ptb_data()

    for t in train:
        add_idx_to_tree(t, 0)
    train = [tree2list(t, -1, True) for t in train]
    train = [t for t in train if t[3][-1] >= 0] # for filtering binary labels

    for t in test:
        add_idx_to_tree(t, 0)
    test = [tree2list(t, -1, True) for t in test]
    test = [t for t in test if t[3][-1] >= 0] # for filtering binary labels

    train = shuffle(train)
    train = train[:1000]
    # n_pos = sum(t[3][-1] for t in train)
    # print "n_pos train:", n_pos
    test = shuffle(test)
    test = test[:100]
    # n_pos = sum(t[3][-1] for t in test)
    # print "n_pos test:", n_pos

    V = len(word2idx)
    print "vocab size:", V
    D = 80
    K = 5

    model = RecursiveNN(V, D, K)
    model.fit(train, epochs=3, activation=T.nnet.relu)
    print "train accuracy:", model.score(train)
    print "test accuracy:", model.score(test)
    print "train f1:", model.f1_score(train)
    print "test f1:", model.f1_score(test)
开发者ID:CesarChaMal,项目名称:machine_learning_examples,代码行数:33,代码来源:rntn_theano.py


示例13: process_data

def process_data():
  global num_classes, num_train, num_test

  X_train , Y_train = load_data('Train')
  X_test , Y_test = load_data('Test')
  X_train = X_train.astype(np.float64)
  X_test = X_test.astype(np.float64)
  num_train = X_train.shape[0]
  num_test = X_test.shape[0]

  mean_image = np.mean(X_train,axis=0)
  X_train -= mean_image
  X_test -= mean_image

  X_train = X_train.reshape(-1, 1, img_dim, img_dim)
  Y_train -= 1
  X_train , Y_train = shuffle(X_train, Y_train)

  X_test = X_test.reshape(-1, 1, img_dim, img_dim)
  Y_test -= 1
  X_test , Y_test = shuffle(X_test, Y_test)

  print 'Training X shape :- ', X_train.shape
  print 'Training Y shape :- ', Y_train.shape
  print 'Testing X shape :- ', X_test.shape
  print 'Testing Y shape :- ', Y_test.shape

  return X_train, Y_train, X_test, Y_test
开发者ID:PankajKataria,项目名称:BanglaReco,代码行数:28,代码来源:solution.py


示例14: frames2batch

 def frames2batch(k = 12,batch_size = 1024, is_calib = False):
     pos = util.get_files(rootdir = 'F:\\train_data\\pos\\')
     neg = util.get_files(rootdir = 'F:\\train_data\\neg\\')
     pos = shuffle(pos)
     neg = shuffle(neg)
     total = pos + neg
     total  = shuffle(total)
     batch = []
     c = 0
     bpath = 'F:\\train_data\\batch\\'
     for item_path in total:
         
         frame = fr.get_frame(item_path)
         frame_r = fr.resize_frame(frame,(k,k))
         if frame_r == None:
             continue
         vec = fr.frame_to_vect(frame_r)
         label = 1 if item_path.split('\\')[-1].find('pos') > 0 else 0
         print(item_path,label)
         batch.append((vec,label))
         if len(batch) > 0 and len(batch) % batch_size == 0:
             batch = sp.array(batch)
             sp.savez(bpath + str(c) + '_' + str(k) + ('_' if not is_calib else '_calib-')  + 'net',batch)
             batch = []
             
             c += 1
     if len(batch) > 0 and len(batch) % batch_size == 0:
         batch = sp.array(batch)
         sp.savez(bpath + str(c) + '_' + str(k) + ('_' if not is_calib else '_calib')  + '-net',batch)
         batch = []
         c += 1
开发者ID:gogolgrind,项目名称:Cascade-CNN-Face-Detection,代码行数:31,代码来源:datasets.py


示例15: getMNIST

def getMNIST():
    # data shape: train (50000, 784), test (10000, 784)
    # already scaled from 0..1 and converted to float32
    datadir = '../large_files/'
    if not os.path.exists(datadir):
        datadir = ''

    input_file = "%smnist.pkl.gz" % datadir
    if not os.path.exists(input_file):
        url = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
        with open(input_file, "wb") as out:
            f = urllib2.urlopen(url)
            out.write(f.read())
            out.flush()

    with gzip.open(input_file) as f:
        train, valid, test = cPickle.load(f)

    Xtrain, Ytrain = train
    Xvalid, Yvalid = valid
    Xtest, Ytest = test

    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    Xtrain, Ytrain = shuffle(Xtrain, Ytrain)
    Xtest, Ytest = shuffle(Xtest, Ytest)

    # try to take a smaller sample
    Xtrain = Xtrain[0:30000]
    Ytrain = Ytrain[0:30000]
    Xtest = Xtest[0:1000]
    Ytest = Ytest[0:1000]

    return Xtrain.reshape(len(Xtrain), 1, 28, 28), Ytrain, Ytrain_ind, Xtest.reshape(len(Xtest), 1, 28, 28), Ytest, Ytest_ind
开发者ID:CesarChaMal,项目名称:machine_learning_examples,代码行数:35,代码来源:renet.py


示例16: load_whale_data

def load_whale_data(train_file, test_file, nb_classes=447):
    print("loading whale data")

    # nomalize train data
    print("--> loading training data")
    train_data = read_csv(train_file)
    X_train = train_data[:, 1:]
    X_train = X_train.astype(np.float32)
    X_train = X_train / 255

    y_train = np.vstack(train_data[:, 0])
    y_train = y_train.astype(np.uint16)

    X_train, y_train = shuffle(X_train, y_train, random_state=42)
    X_train = X_train.reshape(-1, 1, 96, 96)
    Y_train = np_utils.to_categorical(y_train, 447)
    print("--> training data loaded")

    # nomalize test data
    print("--> loading test data")
    test_data = read_csv(test_file)
    X_test = test_data[:, 1:]
    X_test = X_test.astype(np.float32)
    X_test = X_test / 255

    y_test = np.vstack(test_data[:, 0])
    y_test = y_test.astype(np.uint16)

    X_test, y_test = shuffle(X_test, y_test, random_state=42)
    X_test = X_test.reshape(-1, 1, 96, 96)
    Y_test = np_utils.to_categorical(y_test, 447)
    print("--> test data loaded")

    return (X_train, Y_train, X_test, Y_test)
开发者ID:deerishi,项目名称:genetic-algorithm-for-cnn,代码行数:34,代码来源:whale_test2_keras.py


示例17: run_kmeans

def run_kmeans(inFile,  n_colors):
	china = cv2.imread(inFile)
	china = np.array(china, dtype=np.float64) / 255
	w, h, d = original_shape = tuple(china.shape)
	assert d == 3
	image_array = np.reshape(china, (w * h, d))
	
	print("\tFitting model on a small sub-sample of the data")
	t0 = time()
	image_array_sample = shuffle(image_array, random_state=0)[:1000]
	kmeans = KMeans(k=n_colors, random_state=0).fit(image_array_sample)
	print("\tdone in %0.3fs." % (time() - t0))
	
	# Get labels for all points
	print("\tPredicting color indices on the full image (k-means)")
	t0 = time()
	labels = kmeans.predict(image_array)
	print("\tdone in %0.3fs." % (time() - t0))
	
	codebook_random = shuffle(image_array, random_state=0)[:n_colors + 1]
	print("\tPredicting color indices on the full image (random)")
	t0 = time()
	dist = euclidean_distances(codebook_random, image_array, squared=True)
	labels_random = dist.argmin(axis=0)
	print("\tdone in %0.3fs." % (time() - t0))

	img_kmeans = recreate_image(kmeans.cluster_centers_, labels, w, h)
	img_random = recreate_image(codebook_random, labels_random, w, h)
	return china, img_kmeans, img_random
开发者ID:AmirooR,项目名称:scripts,代码行数:29,代码来源:run_kmeans.py


示例18: getTrainTestData

def getTrainTestData():
    data = pickle.load(open('./data/60_unnormalized.p', "rb"))

    raw_meta = []
    raw_data = []
    for k,v in data.iteritems():
        for i in range(len(v)):

            _d = v[i]
            previous = [[0]*LOCATION_ID_MAX,[0]*LOCATION_ID_MAX]
            if i==0:
                # previous date
                date_time = datetime.datetime.strptime(k, '%Y-%m-%d')
                previous_day = date_time - datetime.timedelta(1)
                str_previous_day = previous_day.strftime('%Y-%m-%d')
                if str_previous_day in data:
                    previous[0]=data[str_previous_day][-2]
                    previous[1]=data[str_previous_day][-1]
            elif i==1:
                # previous date
                date_time = datetime.datetime.strptime(k, '%Y-%m-%d')
                previous_day = date_time - datetime.timedelta(1)
                str_previous_day = previous_day.strftime('%Y-%m-%d')
                previous[1]=v[i-1]
                if str_previous_day in data:
                    previous[0]=data[str_previous_day][-1]
            else:
                previous[0]=v[i-2]
                previous[1]=v[i-1]

            raw_meta.append({"date":k,"interval":i,"previous":previous})
            raw_data.append(_d)

    num = len(raw_data)

    train_meta_data = raw_meta[0:int(0.6*num)]
    valid_meta_data = raw_meta[int(0.6*num):int(0.8*num)]
    test_meta_data = raw_meta[int(0.8*num):]

    train_y = raw_data[0:int(0.6*num)]
    valid_y = raw_data[int(0.6*num):int(0.8*num)]
    test_y = raw_data[int(0.8*num):]

    train_X = getFeatures(train_meta_data)
    valid_X = getFeatures(valid_meta_data)
    test_X = getFeatures(test_meta_data)

    train_X = np.array(train_X, dtype=np.float32)
    valid_X = np.array(valid_X, dtype=np.float32)
    test_X = np.array(test_X, dtype=np.float32)

    train_y = np.array(train_y, dtype=np.float32)
    valid_y = np.array(valid_y, dtype=np.float32)
    test_y = np.array(test_y, dtype=np.float32)

    train_X, train_y = shuffle(train_X, train_y, random_state=0)
    valid_X, valid_y = shuffle(valid_X, valid_y, random_state=1)
    test_X, test_y = shuffle(test_X, test_y, random_state=2)

    return train_X, train_y, valid_X, valid_y, test_X, test_y
开发者ID:Jerryzcn,项目名称:rnn_hack,代码行数:60,代码来源:train_data3.py


示例19: main

def main():
    train, test, word2idx = get_ptb_data()

    for t in train:
        add_idx_to_tree(t, 0)
    train = [tree2list(t, -1, is_binary=True) for t in train]
    train = [t for t in train if t[3][-1] >= 0] # for filtering binary labels

    for t in test:
        add_idx_to_tree(t, 0)
    test = [tree2list(t, -1, is_binary=True) for t in test]
    test = [t for t in test if t[3][-1] >= 0] # for filtering binary labels

    train = shuffle(train)
    train = train[:1000]
    test = shuffle(test)
    test = test[:500]

    V = len(word2idx)
    print "vocab size:", V
    D = 80
    K = 5

    model = RecursiveNN(V, D, K)
    model.fit(train, reg=0, activation=T.nnet.relu)
    print "train accuracy:", model.score(train)
    print "test accuracy:", model.score(test)
开发者ID:CesarChaMal,项目名称:machine_learning_examples,代码行数:27,代码来源:recursive_theano.py


示例20: shuffle_data

def shuffle_data(X,y,no_lable_vs_lable):
	X, y = shuffle(X, y, random_state=0)
	# balance labels by subsampling:
	y_dict = defaultdict(list)
	for i, y_i in enumerate(y):
		y_dict[y_i[0]].append(i)
	# subsample
	X_sub = []
	y_sub = []
	y_set = set(y_dict)
	y_dict_len = [len(y_dict[y_set_i]) for y_set_i in sorted(list(y_set))]
	quotent = y_dict_len[0] / sum(y_dict_len)
	print 'lenth cutting'
	print str(len(X))
	# generalize over multiple classes: 
	if(quotent > no_lable_vs_lable):
		# decrease 0 class labels:
		newLen = int(2*y_dict_len[1]*no_lable_vs_lable)
		id_new = y_dict['0'][:newLen] + [y_dict[id] for id in y_set if not id in ['0']][0]
		X_sub = [X[id] for id in id_new]
		y_sub = [y[id] for id in id_new]
		print(str(newLen), 'new 0 class length: ', str(len(id_new)))
	else:
		newLen = int(y_dict_len[0]*(1-no_lable_vs_lable))
		id_new = y_dict['1'][:newLen] + [y_dict[id] for id in y_set if not id in ['0']][0]
		X_sub = [X[id] for id in id_new]
        y_sub = [y[id] for id in id_new]
        print(str(newLen), 'new 1 class length')
	X, y = shuffle(X_sub, y_sub, random_state=0)
	print str(len(X_sub))
	print '--------------'
	return X,y
开发者ID:victorbergelin,项目名称:TQTM33,代码行数:32,代码来源:learning_working.py



注:本文中的sklearn.utils.shuffle函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python estimator_checks.check_estimator函数代码示例发布时间:2022-05-27
下一篇:
Python utils.safe_mask函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap