• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python datasets.load_digits函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.datasets.load_digits函数的典型用法代码示例。如果您正苦于以下问题:Python load_digits函数的具体用法?Python load_digits怎么用?Python load_digits使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了load_digits函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_training_continuation

    def test_training_continuation(self):
        digits_2class = load_digits(2)
        digits_5class = load_digits(5)

        X_2class = digits_2class['data']
        y_2class = digits_2class['target']

        X_5class = digits_5class['data']
        y_5class = digits_5class['target']

        dtrain_2class = xgb.DMatrix(X_2class, label=y_2class)
        dtrain_5class = xgb.DMatrix(X_5class, label=y_5class)

        gbdt_01 = xgb.train(self.xgb_params_01, dtrain_2class, num_boost_round=10)
        ntrees_01 = len(gbdt_01.get_dump())
        assert ntrees_01 == 10

        gbdt_02 = xgb.train(self.xgb_params_01, dtrain_2class, num_boost_round=0)
        gbdt_02.save_model('xgb_tc.model')

        gbdt_02a = xgb.train(self.xgb_params_01, dtrain_2class, num_boost_round=10, xgb_model=gbdt_02)
        gbdt_02b = xgb.train(self.xgb_params_01, dtrain_2class, num_boost_round=10, xgb_model="xgb_tc.model")
        ntrees_02a = len(gbdt_02a.get_dump())
        ntrees_02b = len(gbdt_02b.get_dump())
        assert ntrees_02a == 10
        assert ntrees_02b == 10
        assert mean_squared_error(y_2class, gbdt_01.predict(dtrain_2class)) == \
               mean_squared_error(y_2class, gbdt_02a.predict(dtrain_2class))
        assert mean_squared_error(y_2class, gbdt_01.predict(dtrain_2class)) == \
               mean_squared_error(y_2class, gbdt_02b.predict(dtrain_2class))

        gbdt_03 = xgb.train(self.xgb_params_01, dtrain_2class, num_boost_round=3)
        gbdt_03.save_model('xgb_tc.model')

        gbdt_03a = xgb.train(self.xgb_params_01, dtrain_2class, num_boost_round=7, xgb_model=gbdt_03)
        gbdt_03b = xgb.train(self.xgb_params_01, dtrain_2class, num_boost_round=7, xgb_model="xgb_tc.model")
        ntrees_03a = len(gbdt_03a.get_dump())
        ntrees_03b = len(gbdt_03b.get_dump())
        assert ntrees_03a == 10
        assert ntrees_03b == 10
        assert mean_squared_error(y_2class, gbdt_03a.predict(dtrain_2class)) == \
               mean_squared_error(y_2class, gbdt_03b.predict(dtrain_2class))

        gbdt_04 = xgb.train(self.xgb_params_02, dtrain_2class, num_boost_round=3)
        assert gbdt_04.best_ntree_limit == (gbdt_04.best_iteration + 1) * self.num_parallel_tree
        assert mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class)) == \
               mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class, ntree_limit=gbdt_04.best_ntree_limit))

        gbdt_04 = xgb.train(self.xgb_params_02, dtrain_2class, num_boost_round=7, xgb_model=gbdt_04)
        assert gbdt_04.best_ntree_limit == (gbdt_04.best_iteration + 1) * self.num_parallel_tree
        assert mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class)) == \
               mean_squared_error(y_2class, gbdt_04.predict(dtrain_2class, ntree_limit=gbdt_04.best_ntree_limit))

        gbdt_05 = xgb.train(self.xgb_params_03, dtrain_5class, num_boost_round=7)
        assert gbdt_05.best_ntree_limit == (gbdt_05.best_iteration + 1) * self.num_parallel_tree
        gbdt_05 = xgb.train(self.xgb_params_03, dtrain_5class, num_boost_round=3, xgb_model=gbdt_05)
        assert gbdt_05.best_ntree_limit == (gbdt_05.best_iteration + 1) * self.num_parallel_tree
        assert np.any(gbdt_05.predict(dtrain_5class) !=
                      gbdt_05.predict(dtrain_5class, ntree_limit=gbdt_05.best_ntree_limit)) == False
开发者ID:000Nelson000,项目名称:xgboost,代码行数:59,代码来源:test_training_continuation.py


示例2: load_data

def load_data(digits=[]):
    """
        Loads data from sklearn's digits dataset
        (http://scikit-learn.org/stable/datasets/)
        and performs preprocessing.
        ----
        Note that the digits dataset has:
        d = 64   (dimensionality)
        m = ~180 (number of instances per class)
        z = 10   (number of classes)
        digits: An np array which has the digits you want to train on. The
                digits must be in the range of [0,9].
        Output: Returns the train/test, digits and targets data after
                performing preprocessing.
    """

    #Loads the data and the targets, resp.
    #Note they should be indexed the same way. So digits_data[n] corresponds
    #to digits_labels[n] for any n.
    digits_data = pd.DataFrame(datasets.load_digits().data)
    digits_labels = pd.Series(datasets.load_digits().target)

    #If the digits to train on are not specified, pick randomly
    if len(digits) == 0:
        r_digits = range(0,10)
        random.shuffle(r_digits)
        #0-6 is 70% of the data
        training_digits = set()
        testing_digits = set()
        for a in range(0,7):
            training_digits.add(r_digits[a])
        for a in range(7,10):
            testing_digits.add(r_digits[a])
    else:
        if len(digits) > 0:
            #If they specify digits outside of the range, throw
            if (max(digits)>9 or min(digits)<0):
                raise ValueError('The dataset only has digits 0-9. The parameter passed to load_data had a digit outside of that range')
                if len(digits) >= 10:
                    raise ValueError('The dataset only has digits 0-9. You said to train on all of them leaving no testing data')

            all_digits = set([0,1,2,3,4,5,6,7,8,9])
            training_digits = set(digits)
            testing_digits = all_digits - training_digits

    #Training data
    raw_train_labels = digits_labels[digits_labels.isin(training_digits)]
    training_data = digits_data.loc[raw_train_labels.index]
    #Maps the labels to 0...n
    training_labels = pd.DataFrame(preprocessing.LabelEncoder().fit_transform(raw_train_labels))

    #Testing data
    raw_test_labels = digits_labels[digits_labels.isin(testing_digits)]
    testing_data = digits_data.loc[raw_test_labels.index]
    #Maps the labels to 0...n
    testing_labels = pd.DataFrame(preprocessing.LabelEncoder().fit_transform(raw_test_labels))

    processed = collections.namedtuple('processed', ['training_data', 'training_labels', 'testing_data','testing_labels', 'training_digits', 'testing_digits'])
    return processed(training_data,training_labels,testing_data,testing_labels,training_digits,testing_digits)
开发者ID:aelkholy,项目名称:one-shot,代码行数:59,代码来源:EmbarrassingZeroShot.py


示例3: run

def run(sc):
	iris = datasets.load_iris()
	digits = [ datasets.load_digits(), datasets.load_digits()]


	def learn(x):
		clf = svm.SVC(gamma=0.001, C=100.)
		clf.fit(x.data[:-1], x.target[:-1] )
		return clf.predict(x.data[-1])

	return sc.parallelize(digits).map(learn).collect()
开发者ID:jinhoyoo,项目名称:spark_cluster_example,代码行数:11,代码来源:scipy_example.py


示例4: test_load_digits

def test_load_digits():
    digits = load_digits()
    assert_equal(digits.data.shape, (1797, 64))
    assert_equal(numpy.unique(digits.target).size, 10)

    # test return_X_y option
    X_y_tuple = load_digits(return_X_y=True)
    bunch = load_digits()
    assert_true(isinstance(X_y_tuple, tuple))
    assert_array_equal(X_y_tuple[0], bunch.data)
    assert_array_equal(X_y_tuple[1], bunch.target)
开发者ID:TaihuaLi,项目名称:scikit-learn,代码行数:11,代码来源:test_base.py


示例5: test_digits

def test_digits() :
    
    from sklearn.cross_validation import train_test_split 
    from sklearn.datasets import load_digits
    from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
    from sklearn.preprocessing import LabelBinarizer
    
    digits = load_digits()
    X = digits.data
    y = digits.target   #labels
    X /= X.max()        #norm

    nn = NeuralNetwork([64,100,10],'logistic')  #8x8 input, 10 output
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
    labels_train = LabelBinarizer().fit_transform(y_train)  #convert no to vector
    labels_test = LabelBinarizer().fit_transform(y_test)

    nn.fit(X_train,labels_train,epochs=100)
    predictions = []
    for i in range(X_test.shape[0]) :
        o = nn.predict(X_test[i])
        predictions.append(np.argmax(o))
    print confusion_matrix(y_test,predictions)
    print classification_report(y_test,predictions)
    print 'accuracy at %0.3f'%accuracy_score(y_test,predictions)
开发者ID:akashdarak,项目名称:Neural-Networks,代码行数:25,代码来源:Linear_Activation.py


示例6: test_classification

def test_classification():
    from sklearn.datasets import load_digits
    from sklearn.cross_validation import KFold
    from sklearn.metrics import normalized_mutual_info_score
    digits = load_digits()
    X, y = digits.data, digits.target
    folds = 3
    cv = KFold(y.shape[0], folds)
    total = 0.0
    oo_score_bag = []
    for tr, te in cv:
        mlp = MLPClassifier(use_dropout=True, n_hidden=200, lr=1.)
        print(mlp)
        mlp.fit(X[tr], y[tr], max_epochs=100, staged_sample=X[te])
        t = normalized_mutual_info_score(mlp.predict(X[te]), y[te])
        print("Fold training accuracy: %f" % t)
        total += t
        this_score = []
        for i in mlp.oo_score:
            this_score.append(normalized_mutual_info_score(i, y[te]))
        oo_score_bag.append(this_score)
    from matplotlib import pyplot as plt
    plt.plot(oo_score_bag[0])
    plt.show()

    print("training accuracy: %f" % (total / float(folds)))
开发者ID:JakeMick,项目名称:sk-mlp,代码行数:26,代码来源:mlp.py


示例7: test_predict_probability

    def test_predict_probability(self):
        dataset = datasets.load_digits()
        x_train, x_test, y_train, y_test = train_test_split(
            dataset.data, dataset.target, train_size=0.7
        )

        x_train_before = x_train.copy()
        x_test_before = x_test.copy()
        y_train_before = y_train.copy()

        number_of_classes = len(np.unique(dataset.target))

        pnnet = algorithms.PNN(verbose=False, std=10)
        pnnet.train(x_train, y_train)
        result = pnnet.predict_prob(x_test)

        n_test_inputs = x_test.shape[0]
        self.assertEqual(result.shape, (n_test_inputs, number_of_classes))

        total_classes_prob = np.round(result.sum(axis=1), 10)
        np.testing.assert_array_equal(
            total_classes_prob,
            np.ones(n_test_inputs)
        )
        old_result = result.copy()

        # Test problem with variable links
        np.testing.assert_array_equal(x_train, x_train_before)
        np.testing.assert_array_equal(x_test, x_test_before)
        np.testing.assert_array_equal(y_train, y_train_before)

        x_train[:, :] = 0
        result = pnnet.predict_prob(x_test)
        total_classes_prob = np.round(result.sum(axis=1), 10)
        np.testing.assert_array_almost_equal(result, old_result)
开发者ID:Neocher,项目名称:neupy,代码行数:35,代码来源:test_pnn.py


示例8: test_multiclass_prediction_early_stopping

    def test_multiclass_prediction_early_stopping(self):
        X, y = load_digits(10, True)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
        params = {
            'objective': 'multiclass',
            'metric': 'multi_logloss',
            'num_class': 10,
            'verbose': -1
        }
        lgb_train = lgb.Dataset(X_train, y_train, params=params)
        lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train, params=params)
        evals_result = {}
        gbm = lgb.train(params, lgb_train,
                        num_boost_round=50,
                        valid_sets=lgb_eval,
                        verbose_eval=False,
                        evals_result=evals_result)

        pred_parameter = {"pred_early_stop": True, "pred_early_stop_freq": 5, "pred_early_stop_margin": 1.5}
        ret = multi_logloss(y_test, gbm.predict(X_test, pred_parameter=pred_parameter))
        self.assertLess(ret, 0.8)
        self.assertGreater(ret, 0.5)  # loss will be higher than when evaluating the full model

        pred_parameter = {"pred_early_stop": True, "pred_early_stop_freq": 5, "pred_early_stop_margin": 5.5}
        ret = multi_logloss(y_test, gbm.predict(X_test, pred_parameter=pred_parameter))
        self.assertLess(ret, 0.2)
开发者ID:hubei2626662,项目名称:LightGBM,代码行数:26,代码来源:test_engine.py


示例9: ModelSelectionTest01

def ModelSelectionTest01():
	from sklearn import datasets, svm
	import numpy as np
	digits = datasets.load_digits()
	X_digits = digits.data
	Y_digits = digits.target
	svc = svm.SVC(C = 1, kernel = 'linear')
	score = svc.fit(X_digits[:-100], Y_digits[:-100]).score(X_digits[-100:], Y_digits[-100:])

	#print score

	X_folds = np.array_split(X_digits, 3)
	Y_folds = np.array_split(Y_digits, 3)

	#print len(X_folds[0])

	scores = list()

	for k in range(3):
		X_train = list(X_folds) #这里的X_folds是一个具有3个元素的list
		X_test = X_train.pop(k) #test是train的第K个元素
		X_train = np.concatenate(X_train) #这里是把X_train减去X_test
		#print len(X_train)
		Y_train = list(Y_folds)
		Y_test = Y_train.pop(k)
		Y_train = np.concatenate(Y_train)

		scores.append(svc.fit(X_train, Y_train).score(X_test, Y_test))

	#print scores


	from sklearn import cross_validation
	k_fold = cross_validation.KFold(n = 6, n_folds = 3)
	for train_indices, test_indices in k_fold:
		print train_indices, test_indices

	k_fold = cross_validation.KFold(len(X_digits), n_folds = 3)
	scores = [svc.fit(X_digits[train], Y_digits[train]).score(X_digits[test], Y_digits[test]) for train , test in k_fold]

	#print scores

	scores = cross_validation.cross_val_score(svc, X_digits, Y_digits, cv = k_fold, n_jobs = 1)
	#print scores

	from sklearn.grid_search import GridSearchCV
	gammas = np.logspace(-6, -1, 10)
	clf = GridSearchCV(estimator = svc, param_grid = dict(gamma = gammas), n_jobs = 1)
	clf.fit(X_digits[:1000], Y_digits[:1000])
	print clf.best_score_
	print clf.best_estimator_.gamma

	from sklearn import linear_model, datasets
	lasso = linear_model.LassoCV()    #这里的lassoCV和lasso有什么区别?
	diabetes = datasets.load_diabetes()
	X_diabetes = diabetes.data
	Y_diabetes = diabetes.target
	lasso.fit(X_diabetes, Y_diabetes)

	print lasso.alpha_
开发者ID:hyliu0302,项目名称:scikit-learn-notes,代码行数:60,代码来源:myScikitLearnFcns.py


示例10: test_unsorted_indices

def test_unsorted_indices():
    # test that the result with sorted and unsorted indices in csr is the same
    # we use a subset of digits as iris, blobs or make_classification didn't
    # show the problem
    digits = load_digits()
    X, y = digits.data[:50], digits.target[:50]
    X_test = sparse.csr_matrix(digits.data[50:100])

    X_sparse = sparse.csr_matrix(X)
    coef_dense = svm.SVC(kernel='linear', probability=True,
                         random_state=0).fit(X, y).coef_
    sparse_svc = svm.SVC(kernel='linear', probability=True,
                         random_state=0).fit(X_sparse, y)
    coef_sorted = sparse_svc.coef_
    # make sure dense and sparse SVM give the same result
    assert_array_almost_equal(coef_dense, coef_sorted.toarray())

    X_sparse_unsorted = X_sparse[np.arange(X.shape[0])]
    X_test_unsorted = X_test[np.arange(X_test.shape[0])]

    # make sure we scramble the indices
    assert_false(X_sparse_unsorted.has_sorted_indices)
    assert_false(X_test_unsorted.has_sorted_indices)

    unsorted_svc = svm.SVC(kernel='linear', probability=True,
                           random_state=0).fit(X_sparse_unsorted, y)
    coef_unsorted = unsorted_svc.coef_
    # make sure unsorted indices give same result
    assert_array_almost_equal(coef_unsorted.toarray(), coef_sorted.toarray())
    assert_array_almost_equal(sparse_svc.predict_proba(X_test_unsorted),
                              sparse_svc.predict_proba(X_test))
开发者ID:as133,项目名称:scikit-learn,代码行数:31,代码来源:test_sparse.py


示例11: test_tutorial

    def test_tutorial(self):
        """

        Verifies we can do what sklearn does here:
        http://scikit-learn.org/stable/tutorial/basic/tutorial.html

        """
        digits = datasets.load_digits()
        digits_data = digits.data
        # for now, we need a column vector rather than an array
        digits_target = digits.target

        p = Pipeline()

        # load data from a numpy dataset
        stage_data = NumpyRead(digits_data)
        stage_target = NumpyRead(digits_target)

        # train/test split
        stage_split_data = SplitTrainTest(2, test_size=1, random_state=0)

        # build a classifier
        stage_clf = wrap_and_make_instance(SVC, gamma=0.001, C=100.)

        # output to a csv
        stage_csv = CSVWrite(self._tmp_files('out.csv'))

        node_data, node_target, node_split, node_clf, node_csv = map(
            p.add, [
                stage_data, stage_target, stage_split_data, stage_clf,
                stage_csv])

        # connect the pipeline stages together
        node_data['output'] > node_split['input0']
        node_target['output'] > node_split['input1']
        node_split['train0'] > node_clf['X_train']
        node_split['train1'] > node_clf['y_train']
        node_split['test0'] > node_clf['X_test']
        node_clf['y_pred'] > node_csv['input']

        self.run_pipeline(p)
        
        result = self._tmp_files.csv_read('out.csv', True)

        # making sure we get the same result as sklearn
        clf = SVC(gamma=0.001, C=100.)
        # The tutorial just splits using array slicing, but we need to make
        #   sure that both UPSG and sklearn are splitting the same way, so we
        #   do something more sophisticated
        train_X, test_X, train_y, test_y = train_test_split(
            digits_data, digits_target, test_size=1, random_state=0)
        clf.fit(train_X, np.ravel(train_y))
        control = clf.predict(test_X)[0]

        self.assertAlmostEqual(result, control)

        # model persistance
        s = pickle.dumps(stage_clf)
        stage_clf2 = pickle.loads(s)
        self.assertEqual(stage_clf.get_params(), stage_clf2.get_params())
开发者ID:macressler,项目名称:UPSG,代码行数:60,代码来源:test_sklearn_parity.py


示例12: main

def main():
    digits = load_digits()
    X = digits.data
    y = digits.target
    mds = MDS()
    X_mds = mds.fit_transform(X)
    plot_embedding(X_mds, y)
开发者ID:JackBass,项目名称:ml-algorithms-simple,代码行数:7,代码来源:mds_sklearn_sample.py


示例13: test_constraint_removal

def test_constraint_removal():
    digits = load_digits()
    X, y = digits.data, digits.target
    y = 2 * (y % 2) - 1  # even vs odd as +1 vs -1
    X = X / 16.
    pbl = BinarySVMModel(n_features=X.shape[1])
    clf_no_removal = OneSlackSSVM(model=pbl, max_iter=500, verbose=1, C=10,
                                  inactive_window=0, tol=0.01)
    clf_no_removal.fit(X, y)
    clf = OneSlackSSVM(model=pbl, max_iter=500, verbose=1, C=10, tol=0.01,
                       inactive_threshold=1e-8)
    clf.fit(X, y)

    # results are mostly equal
    # if we decrease tol, they will get more similar
    assert_less(np.mean(clf.predict(X) != clf_no_removal.predict(X)), 0.02)

    # without removal, have as many constraints as iterations
    # +1 for true y constraint
    assert_equal(len(clf_no_removal.objective_curve_) + 1,
                 len(clf_no_removal.constraints_))

    # with removal, there are less constraints than iterations
    assert_less(len(clf.constraints_),
                len(clf.objective_curve_))
开发者ID:hushell,项目名称:pystruct,代码行数:25,代码来源:test_one_slack_ssvm.py


示例14: main

def main():
    # load iris data in, make a binary decision problem out of it
    data = load_digits()

    X = Array2Dict().fit_transform(data.data)
    y = data.target  + 1

    i = int(0.8 * len(X))
    X_train, X_test = X[:i], X[i:]
    y_train, y_test = y[:i], y[i:]

    # do the actual learning
    m =  VW_Classifier(loss='logistic', moniker='example_sklearn', passes=10, silent=True, learning_rate=10, raw=True, oaa = 10)
    m.fit(X_train, y_train)
    # print confusion matrix on test data
    y_est = m.predict_proba(X_test)
    lines = y_est
    #print y_est
    probs = []
    for i, line in enumerate(lines):
      line = line.split()
      labels, vs = zip(*[[float(x) for x in l.split(':')] for l in line[:]])
      probs__ = sigmoid(asarray(vs))
      probs_ = probs__/probs__.sum()
      probs.append(probs_)

    probs = np.asarray(probs)
    print probs
开发者ID:aboSamoor,项目名称:vowpal_porpoise,代码行数:28,代码来源:example_predictproba.py


示例15: main

def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target

    # One-hot encoding of nominal y-values
    y = to_categorical(y)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1)

    # Perceptron
    clf = Perceptron(n_iterations=5000,
        learning_rate=0.001, 
        loss=CrossEntropy,
        activation_function=Sigmoid)
    clf.fit(X_train, y_train)

    y_pred = np.argmax(clf.predict(X_test), axis=1)
    y_test = np.argmax(y_test, axis=1)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    Plot().plot_in_2d(X_test, y_pred, title="Perceptron", accuracy=accuracy, legend_labels=np.unique(y))
开发者ID:PSEUDOBUBLAR,项目名称:ML-From-Scratch,代码行数:26,代码来源:perceptron.py


示例16: code

def code():
    digits = datasets.load_digits()
    X_digits = digits.data
    y_digits = digits.target
    """X_folds = np.array_split(X_digits, 3)
    y_folds = np.array_split(y_digits, 3)"""
    svc = svm.SVC(C=1, kernel='linear')
    """scores = list()


    for k in range(3):
        # We use 'list' to copy, in order to 'pop' later on
        X_train = list(X_folds)
        X_test = X_train.pop(k)
        X_train = np.concatenate(X_train)
        y_train = list(y_folds)
        y_test  = y_train.pop(k)
        y_train = np.concatenate(y_train)
        scores.append(svc.fit(X_train, y_train).score(X_test, y_test))

    print(scores)"""

    k_fold = cross_validation.KFold(len(X_digits), n_folds=3)
    """for train_indices, test_indices in k_fold:
        print('Train: %s | test: %s' % (train_indices, test_indices))"""


    #print([svc.fit(X_digits[train], y_digits[train]).score(X_digits[test], y_digits[test]) for train, test in k_fold])

    print(cross_validation.cross_val_score(svc, X_digits, y_digits, cv=k_fold,n_jobs=-1))
开发者ID:imaculate,项目名称:scikit-learn-tutorials,代码行数:30,代码来源:KFoldScore.py


示例17: test_backprop

def test_backprop():
    # loading data
    digits = load_digits()
    X = digits['data']
    y = digits['target']

    # dividing in training, validation, and test set
    nsamples = X.shape[0]
    end_train_idx = int(0.5 * nsamples)
    end_val_idx = int(0.7 * nsamples)
    perm = np.random.permutation(nsamples)
    Xtrain = X[perm[:end_train_idx]]
    Xval = X[perm[end_train_idx:end_val_idx]]
    Xtest = X[perm[end_val_idx:]]
    ytrain = y[perm[:end_train_idx]]
    yval = y[perm[end_train_idx:end_val_idx]]
    ytest = y[perm[end_val_idx:]]

    # data normalization
    mean = Xtrain.mean(0)
    std = Xtrain.std(0)
    std[std == 0] = 1
    Xtrain = (Xtrain - mean) / std
    Xval = (Xval - mean) / std
    Xtest = (Xtest - mean) / std

    # net params
    input_size = Xtrain.shape[1]
    hidden_size = 30
    output_size = np.unique(y).size

    net = Sigmoidal2LayerMLP_WithSoftmax(input_size,
                                         hidden_size,
                                         output_size,
                                         bias_init=0.0,
                                         lr=0.0001,
                                         momen_decay=0.0,
                                         l2=0.1)

    x = Xtrain[0]
    yi = y[0]
    net.forward(x)
    loss = net.backward(yi)
    Wih_grad = net.Wih_grad.copy()
    Who_grad = net.Who_grad.copy()
    hb_grad = net.hb_grad.copy()
    ob_grad = net.ob_grad.copy()
    e = 1e-6
    for i in xrange(net.Wih.shape[0]):
        for h in xrange(net.Wih.shape[1]):
            net.Wih[i, h] += e
            net.forward(x)
            loss1 = net.loss(yi)
            net.Wih[i, h] -= 2 * e
            net.forward(x)
            loss2 = net.loss(yi)
            print 'estimated grad W%d_%d = %.4f' % (i, h,
                                                    (loss1 - loss2) / (2 * e))
            print 'backprop grad = %.4f' % Wih_grad[i, h]
            net.Wih[i, h] += e
开发者ID:cesarsalgado,项目名称:selfimprovingpy,代码行数:60,代码来源:mlp.py


示例18: main

def main():
    # parameters to cross-validate over
    parameters = {
        'l2': np.logspace(-5, 0, num=6),
    }

    # load iris data in, make a binary decision problem out of it
    data = load_digits()

    X = Array2Dict().fit_transform(data.data)
    y = 2 * (data.target >= 5) - 1

    i = int(0.8 * len(X))
    X_train, X_test = X[:i], X[i:]
    y_train, y_test = y[:i], y[i:]

    # do the actual learning
    gs = GridSearchCV(
        VW_Classifier(loss='logistic', moniker='example_sklearn',
                      passes=10, silent=True, learning_rate=10),
        param_grid=parameters,
        score_func=f1_score,
        cv=StratifiedKFold(y_train),
    ).fit(X_train, y_train)

    # print out results from cross-validation
    estimator = gs.best_estimator_
    score = gs.best_score_
    print 'Achieved a F1 score of %f using l2 == %f during cross-validation' % (score, estimator.l2)

    # print confusion matrix on test data
    y_est = estimator.fit(X_train, y_train).predict(X_test)
    print 'Confusion Matrix:'
    print confusion_matrix(y_test, y_est)
开发者ID:ChenglongChen,项目名称:vowpal_porpoise,代码行数:34,代码来源:example_sklearn.py


示例19: test_load_digits

def test_load_digits():
    digits = load_digits()
    assert_equal(digits.data.shape, (1797, 64))
    assert_equal(numpy.unique(digits.target).size, 10)

    # test return_X_y option
    check_return_X_y(digits, partial(load_digits))
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:7,代码来源:test_base.py


示例20: test_sklearn_nfolds_cv

def test_sklearn_nfolds_cv():
    tm._skip_if_no_sklearn()
    from sklearn.datasets import load_digits
    from sklearn.model_selection import StratifiedKFold

    digits = load_digits(3)
    X = digits['data']
    y = digits['target']
    dm = xgb.DMatrix(X, label=y)

    params = {
        'max_depth': 2,
        'eta': 1,
        'silent': 1,
        'objective':
        'multi:softprob',
        'num_class': 3
    }

    seed = 2016
    nfolds = 5
    skf = StratifiedKFold(n_splits=nfolds, shuffle=True, random_state=seed)

    cv1 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, seed=seed)
    cv2 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, folds=skf, seed=seed)
    cv3 = xgb.cv(params, dm, num_boost_round=10, nfold=nfolds, stratified=True, seed=seed)
    assert cv1.shape[0] == cv2.shape[0] and cv2.shape[0] == cv3.shape[0]
    assert cv2.iloc[-1, 0] == cv3.iloc[-1, 0]
开发者ID:ChangXiaodong,项目名称:xgboost-withcomments,代码行数:28,代码来源:test_with_sklearn.py



注:本文中的sklearn.datasets.load_digits函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python datasets.load_files函数代码示例发布时间:2022-05-27
下一篇:
Python datasets.load_diabetes函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap