本文整理汇总了Python中sklearn.datasets.fetch_mldata函数的典型用法代码示例。如果您正苦于以下问题:Python fetch_mldata函数的具体用法?Python fetch_mldata怎么用?Python fetch_mldata使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了fetch_mldata函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: __init__
def __init__( self, use_mnist=False ):
self.use_mnist = use_mnist
if self.use_mnist:
#self.digits = fetch_mldata('MNIST original')
self.mnist_digits_train = fetch_mldata('MNIST original', subset='train')
self.mnist_digits_test = fetch_mldata('MNIST original', subset='test')
else:
self.digits = load_digits()
self.X = self.digits.data
self.y = self.digits.target
self.best_f1_score = 0
self.best_score = 0
"""
开发者ID:bjkomer,项目名称:sklearn-sandbox,代码行数:13,代码来源:hyperopt_sklearn_sandbox.py
示例2: testScript
def testScript():
print "\n---> Started Logistic Regression - Iris dataset - Own function - k class...\n"
attributes, outcomes = getDataFromFile("../Data/iriskc.data.shuffled")
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(-1,1))
attributes, outcomes = min_max_scaler.fit_transform(np.array(attributes)), np.array(outcomes)
#attributes, outcomes = np.array(attributes), np.array(outcomes)
accrValues, presValues, recallValues, fMeasValues = crossValidate(attributes, outcomes, 10, learningRate=0.01, iterCountMax=750, threshold=0.005, ownFunction=True)
for itr in range(10):
print "Fold %d: \tAccuracy: %f\tPrecision: %f\tRecall: %f\tF-Measure: %f" %(itr+1,accrValues[itr],presValues[itr],recallValues[itr],fMeasValues[itr])
print "\nMean values:\tAccuracy: %f\tPrecision: %f\tRecall: %f\tF-Measure: %f\n" % (np.mean(accrValues),np.mean(presValues),\
np.mean(recallValues),np.mean(fMeasValues))
print "---> Started Logistic Regression - Iris dataset - Inbuilt function - k class...\n"
attributes, outcomes = getDataFromFile("../Data/iriskc.data.shuffled")
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(-1,1))
attributes, outcomes = min_max_scaler.fit_transform(np.array(attributes)), np.array(outcomes)
#attributes, outcomes = np.array(attributes), np.array(outcomes)
accrValues, presValues, recallValues, fMeasValues = crossValidate(attributes, outcomes, 10, learningRate=0.01, iterCountMax=750, threshold=0.005, ownFunction=False)
for itr in range(10):
print "Fold %d: \tAccuracy: %f\tPrecision: %f\tRecall: %f\tF-Measure: %f" %(itr+1,accrValues[itr],presValues[itr],recallValues[itr],fMeasValues[itr])
print "\nMean values:\tAccuracy: %f\tPrecision: %f\tRecall: %f\tF-Measure: %f\n" % (np.mean(accrValues),np.mean(presValues),\
np.mean(recallValues),np.mean(fMeasValues))
print "---> Started Logistic Regression - Digits dataset - Own function - k class...\n"
mnist = datasets.fetch_mldata('MNIST original')
X, y = mnist.data / 255., mnist.target
attributes = X[:20000]
outcomes = y[:20000]
#print list(set(outcomes))
accrValues, presValues, recallValues, fMeasValues = crossValidate(attributes, outcomes, 10, learningRate=0.01, iterCountMax=100, threshold=0.005, ownFunction=False)
for itr in range(10):
print "Fold %d: \tAccuracy: %f\tPrecision: %f\tRecall: %f\tF-Measure: %f" %(itr+1,accrValues[itr],presValues[itr],recallValues[itr],fMeasValues[itr])
print "\nMean values:\tAccuracy: %f\tPrecision: %f\tRecall: %f\tF-Measure: %f\n" % (np.mean(accrValues),np.mean(presValues),\
np.mean(recallValues),np.mean(fMeasValues))
print "---> Started Logistic Regression - Digits dataset - Inbuilt function - k class...\n"
mnist = datasets.fetch_mldata('MNIST original')
X, y = mnist.data / 255., mnist.target
attributes = X[:20000]
outcomes = y[:20000]
#print list(set(outcomes))
accrValues, presValues, recallValues, fMeasValues = crossValidate(attributes, outcomes, 10, learningRate=0.01, iterCountMax=100, threshold=0.005, ownFunction=False)
for itr in range(10):
print "Fold %d: \tAccuracy: %f\tPrecision: %f\tRecall: %f\tF-Measure: %f" %(itr+1,accrValues[itr],presValues[itr],recallValues[itr],fMeasValues[itr])
print "\nMean values:\tAccuracy: %f\tPrecision: %f\tRecall: %f\tF-Measure: %f\n" % (np.mean(accrValues),np.mean(presValues),\
np.mean(recallValues),np.mean(fMeasValues))
开发者ID:arajago6,项目名称:MachineLearningPython,代码行数:51,代码来源:2_LogisticRegression_Kc.py
示例3: main
def main():
files = [
join(SCRIPT_DIR, "train_x.npy"),
join(SCRIPT_DIR, "train_y.npy"),
join(SCRIPT_DIR, "validate_x.npy"),
join(SCRIPT_DIR, "validate_y.npy"),
join(SCRIPT_DIR, "test_x.npy"),
join(SCRIPT_DIR, "test_y.npy")
]
if all([exists(fname) and stat(fname).st_size > 100 for fname in files]):
print("Already downloaded. Skipping")
else:
mnist = fetch_mldata('MNIST original')
np.random.seed(1234)
data = mnist.data
target = mnist.target
indices = np.arange(len(data))
np.random.shuffle(indices)
data = data[indices]
target = target[indices]
train_x, train_y = (data[:-10000].astype(np.float32) / 255.0).astype(np.float32), target[:-10000].astype(np.int32)
test_x, test_y = (data[-10000:].astype(np.float32) / 255.0).astype(np.float32), target[-10000:].astype(np.int32)
np.save(join(SCRIPT_DIR, "train_x.npy"), train_x[:int(0.9 * train_x.shape[0])])
np.save(join(SCRIPT_DIR, "train_y.npy"), train_y[:int(0.9 * train_y.shape[0])])
np.save(join(SCRIPT_DIR, "validate_x.npy"), train_x[int(0.9 * train_x.shape[0]):])
np.save(join(SCRIPT_DIR, "validate_y.npy"), train_y[int(0.9 * train_y.shape[0]):])
np.save(join(SCRIPT_DIR, "test_x.npy"), test_x)
np.save(join(SCRIPT_DIR, "test_y.npy"), test_y)
print("Done.")
开发者ID:dali-ml,项目名称:dali-examples,代码行数:34,代码来源:generate.py
示例4: prepare_dataset
def prepare_dataset():
print('load MNIST dataset')
mnist = fetch_mldata('MNIST original')
mnist['data'] = mnist['data'].astype(np.float32)
mnist['data'] /= 255
mnist['target'] = mnist['target'].astype(np.int32)
return mnist
开发者ID:fukatani,项目名称:soinn,代码行数:7,代码来源:train_mnist.py
示例5: load
def load(config, test=False):
"""Load MNIST dataset using scikit-learn. Returns a dict with the
following entries:
- images: n x 28 x 28 array
- data: n x 784 array
- target: n array
"""
dataset = fetch_mldata('mnist-original')
X, y = dataset.data, dataset.target
X = X.astype(np.float32) / 255.0
if test:
idx_start, idx_end = config['test_set']
else:
idx_start, idx_end = config['train_set']
X, y = shuffle(X, y, random_state=42)
X = X[idx_start:idx_end]
y = y[idx_start:idx_end]
return {
'images': X.reshape(-1, 28, 28),
'data': X,
'target': y,
}
开发者ID:RokIrt,项目名称:dsr-fall-2014,代码行数:26,代码来源:mnist.py
示例6: get_mnist
def get_mnist(start=None, end=None, random=False, num=None):
mnist = fetch_mldata('MNIST original', data_home='~/diss/mnist')
if random is not None and num is not None:
idx = np.random.choice(range(mnist.data.shape[0]), num)
elif start is not None and end is not None:
idx = range(start, end)
return mnist.data[idx], mnist.target[idx]
开发者ID:ahmedassal,项目名称:ml-playground,代码行数:7,代码来源:utils.py
示例7: main
def main(description, gpu, output):
logging.basicConfig(level=logging.INFO)
logging.info('fetch MNIST dataset')
mnist = fetch_mldata(description)
mnist.data = mnist.data.astype(numpy.float32)
mnist.data /= 255
mnist.target = mnist.target.astype(numpy.int32)
data_train, data_test, target_train, target_test = train_test_split(mnist.data, mnist.target)
data = data_train, data_test
target = target_train, target_test
start_time = time.time()
if gpu >= 0:
cuda.check_cuda_available()
cuda.get_device(gpu).use()
logging.info("Using gpu device {}".format(gpu))
else:
logging.info("Not using gpu device")
mlp = MLP(data=data, target=target, gpu=gpu)
mlp.train_and_test(n_epoch=1)
end_time = time.time()
logging.info("time = {} min".format((end_time - start_time) / 60.0))
logging.info('saving trained mlp into {}'.format(output))
with open(output, 'wb') as fp:
pickle.dump(mlp, fp)
开发者ID:medinfo2,项目名称:deeplearning,代码行数:32,代码来源:mlp.py
示例8: test_classifier_chain_vs_independent_models
def test_classifier_chain_vs_independent_models():
# Verify that an ensemble of classifier chains (each of length
# N) can achieve a higher Jaccard similarity score than N independent
# models
yeast = fetch_mldata('yeast')
X = yeast['data']
Y = yeast['target'].transpose().toarray()
X_train = X[:2000, :]
X_test = X[2000:, :]
Y_train = Y[:2000, :]
Y_test = Y[2000:, :]
ovr = OneVsRestClassifier(LogisticRegression())
ovr.fit(X_train, Y_train)
Y_pred_ovr = ovr.predict(X_test)
chain = ClassifierChain(LogisticRegression(),
order=np.array([0, 2, 4, 6, 8, 10,
12, 1, 3, 5, 7, 9,
11, 13]))
chain.fit(X_train, Y_train)
Y_pred_chain = chain.predict(X_test)
assert_greater(jaccard_similarity_score(Y_test, Y_pred_chain),
jaccard_similarity_score(Y_test, Y_pred_ovr))
开发者ID:fabionukui,项目名称:scikit-learn,代码行数:25,代码来源:test_multioutput.py
示例9: load_script
def load_script(script_vars):
def define(var_name, fun, overwrite=False):
if script_vars.has_key(var_name) and not overwrite:
print('%s is already defined' % var_name)
return script_vars[var_name]
else:
print('computing variables %s' % var_name)
value = fun()
script_vars[var_name] = value
globals()[var_name] = value
return value
print(globals().keys())
custom_data_home="/home/stefan2/mnistdata"
custom_data_home="/home/stefan2/mnistdata"
define('mnist', lambda: fetch_mldata('MNIST original', data_home=custom_data_home))
data = mnist.data.astype(float) #[0:100,:] #convert to float
labels = mnist.target #[0:100]
n,m = data.shape
print("num data points %s" % n)
#run the method after successive othogonalization
for j in range(0, 50):
print("iteration: " + str(j))
res = find_dominant_directions(data)
plot_vector_png("pattern_" + str(j), res)
for i in range(0, n):
v = data[i,:]
proj = np.reshape(v, (1, m)).dot(np.reshape(res, (m,1)))[0,0]
data[i,:] = v - proj*res
开发者ID:Remi-M34,项目名称:HumeurDeTweets,代码行数:31,代码来源:dominant_patterns.py
示例10: test_configs
def test_configs():
from sklearn import datasets
from datetime import datetime
import sys
import os
import logging
log = logging.getLogger()
handler = logging.StreamHandler(sys.stdout)
fmt = logging.Formatter('%(asctime)s %(levelname)s: %(message)s','%Y-%m-%d %H:%M:%S')
handler.setFormatter(fmt)
log.addHandler(handler)
log.setLevel(logging.DEBUG)
custom_data_home = os.getcwd() + '/sk_data'
digits = datasets.fetch_mldata('MNIST original', data_home=custom_data_home)
X = np.asarray(digits.data, 'float32')
X = X
# images = [imresize(im.reshape(28, 28), (32, 32)) for im in X]
# X = np.vstack([im.flatten() for im in images])
X[X < 128] = 0
X[X >= 128] = 1
X /= 256.
models = []
for w_sigma in [.1, .5, 1, 2, 5]:
for sparsity in [.001, .01, .05, .1, .5]:
log.info('Building RBM_dl:\n w_sigma=%s\n sparsity=%s' %(w_sigma,sparsity,))
model = ConvRBM((28, 28), 40, w_size=11, n_iter=3, verbose=True, w_sigma=w_sigma, sparsity=sparsity)
model.fit(X)
models.append({
'model' : model,
'w_sigma' : w_sigma,
'sparsity' : sparsity,
})
log.info('Done')
return models
开发者ID:rajendraranabhat,项目名称:S3Lab_Projects,代码行数:35,代码来源:crbm.py
示例11: run
def run(data_path):
print "Reading the dataset:", data_path
## http://continuum.io/blog/wiserf-use-cases-and-benchmarks
mnist = fetch_mldata('MNIST original')
# Define training and testing sets
inds = arange(len(mnist.data))
test_i = random.sample(xrange(len(inds)), int(0.1 * len(inds)))
train_i = numpy.delete(inds, test_i)
X_train = mnist.data[train_i].astype(numpy.double)
y_train = mnist.target[train_i].astype(numpy.double)
X_test = mnist.data[test_i].astype(numpy.double)
y_test = mnist.target[test_i].astype(numpy.double)
# Trunk the data
X_digits, y_digits = shuffle(X_train, y_train)
X_digits_train = X_digits[:1000]
y_digits_train = y_digits[:1000]
X_digits_valid = X_digits[1000:2000]
y_digits_valid = y_digits[1000:2000]
X_digits_test = X_digits[2000:3000]
y_digits_test = y_digits[2000:3000]
knn_digits = KNeighborsClassifier(n_neighbors=10)
knn_digits.fit(X_digits_train, y_digits_train)
print "KNN validation accuracy on MNIST digits: ",
print knn_digits.score(X_digits_valid, y_digits_valid)
开发者ID:Sean1989,项目名称:draft-ml-workflow,代码行数:32,代码来源:mnist_knn.py
示例12: main
def main():
print '... get mnist data'
mnist = fetch_mldata('MNIST original', data_home='.')
fig, axes = plt.subplots(5, 3, figsize=(6, 8))
data = mnist.data[[0, 7000, 14000, 21000, 28000]]
print '... start training'
for i, (axrow, img) in enumerate(zip(axes, data)):
img = img.reshape(28, 28)
img = (img >= 128).astype(int)
corrupted = get_corrupted_input(img, 0.05)
mrf = MRF(corrupted)
if i == 0:
axes[i][0].set_title('元画像')
axes[i][1].set_title('ノイズあり')
axes[i][2].set_title('ノイズ除去')
axes[i][0].imshow(img, cmap=cm.Greys_r)
axes[i][1].imshow(corrupted, cmap=cm.Greys_r)
axes[i][2].imshow(mrf.denoised, cmap=cm.Greys_r)
for ax in axrow:
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
plt.show()
开发者ID:makora9143,项目名称:theano_tutorial,代码行数:27,代码来源:mrf.py
示例13: load
def load(train_n, test_n):
mnist = fetch_mldata('MNIST original', data_home='.')
mnist.data = mnist.data.astype(np.float32) / 256.0
mnist.target = mnist.target.astype(np.int32)
N = len(mnist.data)
order = np.random.permutation(N)
train = {i: [] for i in range(10)}
test = {i: [] for i in range(10)}
train_m = math.ceil(train_n / 10)
train_sum = 0
test_m = math.ceil(test_n / 10)
test_sum = 0
for i in range(N):
x = mnist.data[order[i]]
y = mnist.target[order[i]]
if train_sum < train_n and len(train[y]) < train_m:
train[y].append(x)
train_sum += 1
if test_sum < test_n and len(test[y]) < test_m:
test[y].append(x)
test_sum += 1
return train, test
开发者ID:cympfh,项目名称:simeji,代码行数:30,代码来源:load_mnist.py
示例14: download__by_category
def download__by_category():
# mnist = fetch_mldata('MNIST original')
mnist = fetch_mldata('MNIST original')
# mnist.data = random.sample(mnist.data, 1000)
# mnist.target = random.sample(mnist.target, 1000)
# mnist.data (70000, 784), mnist.target (70000, 1)
trainX, trainY = mnist.data[:-10000], mnist.target[:-10000]
testX, testY = mnist.data[-10000:], mnist.target[-10000:]
if not exists('train'):
os.makedirs('train')
x = {i:[] for i in range(10)}
for i in range(len(trainY)):
tmp = x[trainY[i]]
tmp.append(trainX[i])
x[trainY[i]] = tmp
for i in range(10):
cPickle.dump(x[i], open(join('train', '{}.pkl'.format(i)), 'w+'))
if not exists('test'):
os.makedirs('test')
x = {i:[] for i in range(10)}
for i in range(len(testY)):
tmp = x[testY[i]]
tmp.append(testX[i])
x[testY[i]] = tmp
for i in range(10):
cPickle.dump(x[i], open(join('test', '{}.pkl'.format(i)), 'w+'))
开发者ID:katyprogrammer,项目名称:CNN_experiment_code,代码行数:26,代码来源:CNN.py
示例15: make_data
def make_data(N):
print("fetch MNIST dataset")
mnist = fetch_mldata('MNIST original',data_home='.')
mnist.data = mnist.data.astype(np.float32)
mnist.data /= 255
mnist.taret = mnist.target.astype(np.int32)
# make y label
mnist_target = np.zeros((mnist.target.shape[0],10))
for index, num in enumerate(mnist.target):
mnist_target[index][num] = 1.
# print(mnist_target)
# mazemaze
index = random.sample(range(mnist.target.shape[0]), (mnist.target.shape[0]))
tmp_target = [mnist_target[i] for i in index]
tmp_data = [mnist.data[i] for i in index]
# print("N : ", len(tmp_target))
# print("tmp_target : ", tmp_target)
x_train, x_test = np.split(tmp_data, [N])
y_train, y_test = np.split(tmp_target, [N])
return [x_train, x_test, y_train, y_test]
开发者ID:kirInFPGA,项目名称:DBN,代码行数:28,代码来源:dbn.py
示例16: main
def main():
"""TODO: Docstring for main.
:returns: TODO
"""
alpha = 1.
decay = 0.0006
iter_num = 600
finetune_iter = 220
hyper_params = {
'hidden_layers_sizes':[196,], 'iter_nums':[400,],
'alphas':[1.,], 'decays':[0.003,],
'betas':[3,], 'rhos':[0.1,]
}
enc = OneHotEncoder(sparse=False)
mnist = fetch_mldata('MNIST original', data_home='./')
x_train, x_test, y_train, y_test = \
train_test_split(scale(mnist.data.astype(float)).astype('float32'),
mnist.target.astype('float32'),
test_size=0.5, random_state=0)
x_unlabeled = scale(mnist.data[mnist.target>=5,:].astype(float)).astype('float32')
y_train = enc.fit_transform(y_train.reshape(y_train.shape[0],1)).astype('float32')
t_x = T.matrix()
params, extracted = pretrain_sae(x_unlabeled, hyper_params)
extracted = function(inputs=[t_x], outputs=[sae_extract(t_x, params)])(x_train)[0]
params.append(train_softmax(extracted, y_train, iter_num, alpha, decay))
weights = finetune_sae(x_train, y_train, params, finetune_iter, alpha, decay)
all_label = np.array(range(0, 10))
pred = all_label[softmax2class_max(sae_predict(x_test, weights))]
print accuracy_score(y_test, pred)
print classification_report(y_test, pred)
print confusion_matrix(y_test, pred)
开发者ID:ShiehShieh,项目名称:UFLDL-Solution,代码行数:35,代码来源:sae.py
示例17: get_datasets
def get_datasets():
mnist = fetch_mldata('MNIST original')
data = mnist['data']
target = mnist['target']
data = (data - data.mean(axis=0))
std = data.std(axis=0)
data[:, std > 0] /= std[std > 0]
train_split = 60000
output_size = 10
train_ordered = data[:train_split]
train_labels_ordered = target[:train_split]
training_data = zip(train_ordered, train_labels_ordered)
random.shuffle(training_data)
train = np.array([p[0] for p in training_data])
train_labels = np.array([p[1] for p in training_data])
train_outs = np.array([one_hot(i, output_size)
for i in train_labels])
test = data[train_split:]
test_labels = target[train_split:]
test_outs = np.array([one_hot(i, output_size)
for i in test_labels])
return train, train_outs, test, test_outs
开发者ID:pajkossy,项目名称:nn,代码行数:27,代码来源:utils.py
示例18: iris_binary
def iris_binary():
iris = fetch_mldata('iris')
X = iris.data
y = iris.target
idx = y < 3 # only binary
y[y == 2] = -1
return X[idx, :], y[idx]
开发者ID:giorgiop,项目名称:nips15exps,代码行数:7,代码来源:test_algorithm.py
示例19: get_mnist
def get_mnist():
np.random.seed(1234) # set seed for deterministic ordering
mnist = fetch_mldata('MNIST original', data_home='../../data')
p = np.random.permutation(mnist.data.shape[0])
X = mnist.data[p].astype(np.float32)*0.02
Y = mnist.target[p]
return X, Y
开发者ID:nexcafe,项目名称:mxnet,代码行数:7,代码来源:data.py
示例20: MNIST
def MNIST():
add_fit_and_score(RegularizedNet)
from sklearn.datasets import fetch_mldata
mnist = fetch_mldata('MNIST original')
X = numpy.asarray(mnist.data, dtype='float32')
#X = numpy.asarray(mnist.data, dtype='float64')
if SCALE:
#X = preprocessing.scale(X)
X /= 255.
y = numpy.asarray(mnist.target, dtype='int32')
#y = numpy.asarray(mnist.target, dtype='int64')
print("Total dataset size:")
print("n samples: %d" % X.shape[0])
print("n features: %d" % X.shape[1])
print("n classes: %d" % len(set(y)))
from sklearn import cross_validation, preprocessing
x_train, x_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.2, random_state=42)
dnn=RegularizedNet(numpy_rng=numpy.random.RandomState(123), theano_rng=None,
n_ins=x_train.shape[1],
layers_types=[ReLU, ReLU, LogisticRegression],
layers_sizes=[200, 200],
n_outs=10,
rho=0.95,
eps=1.E-6,
max_norm=0.,
debugprint=False,
L1_reg=0.,
L2_reg=1./x_train.shape[0])#,
dnn.fit(x_train, y_train, max_epochs=60, method='adadelta', verbose=True, plot=False)
test_error = dnn.score(x_test, y_test)
print("score: %f" % (1. - test_error))
开发者ID:KayneWest,项目名称:Stuff,代码行数:31,代码来源:nesterov_dnn.py
注:本文中的sklearn.datasets.fetch_mldata函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论