• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python datasets.fetch_20newsgroups_vectorized函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.datasets.fetch_20newsgroups_vectorized函数的典型用法代码示例。如果您正苦于以下问题:Python fetch_20newsgroups_vectorized函数的具体用法?Python fetch_20newsgroups_vectorized怎么用?Python fetch_20newsgroups_vectorized使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了fetch_20newsgroups_vectorized函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_20news_vectorized

def test_20news_vectorized():
    try:
        datasets.fetch_20newsgroups(subset='all',
                                    download_if_missing=False)
    except IOError:
        raise SkipTest("Download 20 newsgroups to run this test")

    # test subset = train
    bunch = datasets.fetch_20newsgroups_vectorized(subset="train")
    assert_true(sp.isspmatrix_csr(bunch.data))
    assert_equal(bunch.data.shape, (11314, 130107))
    assert_equal(bunch.target.shape[0], 11314)
    assert_equal(bunch.data.dtype, np.float64)

    # test subset = test
    bunch = datasets.fetch_20newsgroups_vectorized(subset="test")
    assert_true(sp.isspmatrix_csr(bunch.data))
    assert_equal(bunch.data.shape, (7532, 130107))
    assert_equal(bunch.target.shape[0], 7532)
    assert_equal(bunch.data.dtype, np.float64)

    # test return_X_y option
    fetch_func = partial(datasets.fetch_20newsgroups_vectorized, subset='test')
    check_return_X_y(bunch, fetch_func)

    # test subset = all
    bunch = datasets.fetch_20newsgroups_vectorized(subset='all')
    assert_true(sp.isspmatrix_csr(bunch.data))
    assert_equal(bunch.data.shape, (11314 + 7532, 130107))
    assert_equal(bunch.target.shape[0], 11314 + 7532)
    assert_equal(bunch.data.dtype, np.float64)
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:31,代码来源:test_20news.py


示例2: test_LogisticRegressionCV

def test_LogisticRegressionCV():
    bunch = fetch_20newsgroups_vectorized(subset="train")
    X = bunch.data
    y = bunch.target

    y[y < y.mean()] = -1
    y[y >= y.mean()] = 1
    Xt, Xh, yt, yh = cross_validation.train_test_split(
        X, y, test_size=.5, random_state=0)

    # compute the scores
    all_scores = []
    all_alphas = np.linspace(-12, 0, 5)
    for a in all_alphas:
        lr = linear_model.LogisticRegression(
            solver='lbfgs', C=np.exp(-a), fit_intercept=False, tol=1e-6,
            max_iter=100)
        lr.fit(Xt, yt)
        score_scv = linear_model.logistic._logistic_loss(
            lr.coef_.ravel(), Xh, yh, 0)
        all_scores.append(score_scv)
    all_scores = np.array(all_scores)

    best_alpha = all_alphas[np.argmin(all_scores)]

    clf = LogisticRegressionCV(max_iter=50)
    clf.fit(Xt, yt, Xh, yh)
    np.testing.assert_array_less(np.abs(clf.alpha_ - best_alpha), 0.5)
开发者ID:dongzhixiang,项目名称:hoag,代码行数:28,代码来源:tests.py


示例3: generate_data

    def generate_data(case, sparse=False):
        # Generate regression / classification data. 
        bunch = None 
        if case == 'regression':
            bunch = datasets.load_boston()
        elif case == 'classification': 
            bunch = datasets.fetch_20newsgroups_vectorized(subset='all')
        X, y = shuffle(bunch.data, bunch.target)
        offset = int(X.shape[0] * 0.8) 
        X_train, y_train = X[:offset], y[:offset]
        X_test, y_test = X[offset:], y[offset:] 
        if sparse:
            X_train = csr_matrix(X_train)
            X_test = csr_matrix(X_test)
        else:
            X_train = np.array(X_train)
            X_test = np.array(X_test)
        y_test = np.array(y_test)
        y_train = np.array(y_train)
        data = {
            'X_train': X_train,
            'X_test': X_test,
            'y_train': y_train,
            'y_test': y_test,
        }

        return data 
开发者ID:0x0all,项目名称:machineLearning,代码行数:27,代码来源:plot_model_complexity_influence.py


示例4: test_20news_vectorized

def test_20news_vectorized():
    # This test is slow.
    raise SkipTest

    bunch = datasets.fetch_20newsgroups_vectorized(subset="train")
    assert_equal(bunch.data.shape, (11314, 107130))
    assert_equal(bunch.target.shape[0], 11314)
    assert_equal(bunch.data.dtype, np.float64)

    bunch = datasets.fetch_20newsgroups_vectorized(subset="test")
    assert_equal(bunch.data.shape, (7532, 107130))
    assert_equal(bunch.target.shape[0], 7532)
    assert_equal(bunch.data.dtype, np.float64)

    bunch = datasets.fetch_20newsgroups_vectorized(subset="all")
    assert_equal(bunch.data.shape, (11314 + 7532, 107130))
    assert_equal(bunch.target.shape[0], 11314 + 7532)
    assert_equal(bunch.data.dtype, np.float64)
开发者ID:forkloop,项目名称:scikit-learn,代码行数:18,代码来源:test_20news.py


示例5: load_data

def load_data(name, partition_id, n_partitions):
    """load partition of data into global var `name`"""
    from sklearn.datasets import fetch_20newsgroups_vectorized
    from sklearn.utils import gen_even_slices
    dataset = fetch_20newsgroups_vectorized('test')
    size = dataset.data.shape[0]
    slices = list(gen_even_slices(size, n_partitions))
    
    part = dataset.data[slices[partition_id]]
    # put it in globals
    globals().update({name : part})
    return part.shape
开发者ID:minrk,项目名称:pycon-pydata-sprint,代码行数:12,代码来源:parallel_kmeans.py


示例6: test_20news_vectorized

def test_20news_vectorized():
    # This test is slow.
    raise SkipTest("Test too slow.")

    bunch = datasets.fetch_20newsgroups_vectorized(subset="train")
    assert_true(sp.isspmatrix_csr(bunch.data))
    assert_equal(bunch.data.shape, (11314, 107428))
    assert_equal(bunch.target.shape[0], 11314)
    assert_equal(bunch.data.dtype, np.float64)

    bunch = datasets.fetch_20newsgroups_vectorized(subset="test")
    assert_true(sp.isspmatrix_csr(bunch.data))
    assert_equal(bunch.data.shape, (7532, 107428))
    assert_equal(bunch.target.shape[0], 7532)
    assert_equal(bunch.data.dtype, np.float64)

    bunch = datasets.fetch_20newsgroups_vectorized(subset="all")
    assert_true(sp.isspmatrix_csr(bunch.data))
    assert_equal(bunch.data.shape, (11314 + 7532, 107428))
    assert_equal(bunch.target.shape[0], 11314 + 7532)
    assert_equal(bunch.data.dtype, np.float64)
开发者ID:0664j35t3r,项目名称:scikit-learn,代码行数:21,代码来源:test_20news.py


示例7: create_plot_curve

def create_plot_curve():
    clients = parallel.Client()
    lview = clients.load_balanced_view()
    dview = clients[:]
    dview['data']= fetch_20newsgroups_vectorized(remove=('headers', 'footers', 'quotes'))
    lview.block = True
    alphas = [1E-4, 1E-3, 1E-2, 1E-1]

    with dview.sync_imports():
        import numpy
        from sklearn.naive_bayes import MultinomialNB
        from sklearn.cross_validation import cross_val_score

    res = lview.map(grid_search, alphas)
    return res
开发者ID:miramzz,项目名称:machine_learning,代码行数:15,代码来源:machine_l.py


示例8: get_data

def get_data(dataset_name):
    print("Getting dataset: %s" % dataset_name)

    if dataset_name == 'lfw_people':
        X = fetch_lfw_people().data
    elif dataset_name == '20newsgroups':
        X = fetch_20newsgroups_vectorized().data[:, :100000]
    elif dataset_name == 'olivetti_faces':
        X = fetch_olivetti_faces().data
    elif dataset_name == 'rcv1':
        X = fetch_rcv1().data
    elif dataset_name == 'CIFAR':
        if handle_missing_dataset(CIFAR_FOLDER) == "skip":
            return
        X1 = [unpickle("%sdata_batch_%d" % (CIFAR_FOLDER, i + 1))
              for i in range(5)]
        X = np.vstack(X1)
        del X1
    elif dataset_name == 'SVHN':
        if handle_missing_dataset(SVHN_FOLDER) == 0:
            return
        X1 = sp.io.loadmat("%strain_32x32.mat" % SVHN_FOLDER)['X']
        X2 = [X1[:, :, :, i].reshape(32 * 32 * 3) for i in range(X1.shape[3])]
        X = np.vstack(X2)
        del X1
        del X2
    elif dataset_name == 'low rank matrix':
        X = make_low_rank_matrix(n_samples=500, n_features=np.int(1e4),
                                 effective_rank=100, tail_strength=.5,
                                 random_state=random_state)
    elif dataset_name == 'uncorrelated matrix':
        X, _ = make_sparse_uncorrelated(n_samples=500, n_features=10000,
                                        random_state=random_state)
    elif dataset_name == 'big sparse matrix':
        sparsity = np.int(1e6)
        size = np.int(1e6)
        small_size = np.int(1e4)
        data = np.random.normal(0, 1, np.int(sparsity/10))
        data = np.repeat(data, 10)
        row = np.random.uniform(0, small_size, sparsity)
        col = np.random.uniform(0, small_size, sparsity)
        X = sp.sparse.csr_matrix((data, (row, col)), shape=(size, small_size))
        del data
        del row
        del col
    else:
        X = fetch_mldata(dataset_name).data
    return X
开发者ID:0664j35t3r,项目名称:scikit-learn,代码行数:48,代码来源:bench_plot_randomized_svd.py


示例9: generate_data

def generate_data(case, sparse=False):
    """Generate regression/classification data."""
    bunch = None
    if case == "regression":
        bunch = datasets.load_boston()
    elif case == "classification":
        bunch = datasets.fetch_20newsgroups_vectorized(subset="all")
    X, y = shuffle(bunch.data, bunch.target)
    offset = int(X.shape[0] * 0.8)
    X_train, y_train = X[:offset], y[:offset]
    X_test, y_test = X[offset:], y[offset:]
    if sparse:
        X_train = csr_matrix(X_train)
        X_test = csr_matrix(X_test)
    else:
        X_train = np.array(X_train)
        X_test = np.array(X_test)
    y_test = np.array(y_test)
    y_train = np.array(y_train)
    data = {"X_train": X_train, "X_test": X_test, "y_train": y_train, "y_test": y_test}
    return data
开发者ID:xuq,项目名称:Dash-Docset,代码行数:21,代码来源:plot_model_complexity_influence.py


示例10: get_results

def get_results():
    # get data
    data = fetch_20newsgroups_vectorized(remove=('headers',
                                                 'footers',
                                                 'quotes'))
    alphas = [1E-4, 1E-3, 1E-2, 1E-1]
    # set up dview for imports
    clients = parallel.Client()
    dview = clients[:]
    with dview.sync_imports():
        # doesn't seem to like import numpy as np, using numpy instead
        import numpy
        from sklearn.naive_bayes import MultinomialNB
        from sklearn.cross_validation import cross_val_score
    dview.block = True
    # send data to clients
    dview['data'] = data
    # set up load balanced view for parallel processing
    lview = clients.load_balanced_view()
    # set blocking to True to get all results once processing is done
    lview.block = True
    results = lview.map(get_single_result, alphas)
    return results
开发者ID:MikeDelaney,项目名称:sentiment,代码行数:23,代码来源:parallel.py


示例11: evaluate

from sklearn.datasets import fetch_20newsgroups_vectorized
from sklearn.decomposition import PCA
import featurelearning

def evaluate(dataset_name, fl, ratio):
    print dataset_name, fl.__name__, ratio
    d = dataset.load_dataset(dataset_name)
    fea = d.data
    label = d.target
    fea = fl(fea)
    ss = StratifiedShuffleSplit(label, 3, test_size=(1-ratio), random_state=0)
    svc = LinearSVC()
    for train, test in ss:
        svc.fit(fea[train,:], label[train,:])
        predict = svc.predict(fea[test, :])
        acc = accuracy_score(label[test, :], predict)
        print acc

if __name__ == '__main__':
    pca = PCA()
    train = fetch_20newsgroups_vectorized('train')
    test = fetch_20newsgroups_vectorized('test')
    svc = LinearSVC()
    train_data = pca.fit_transform(train.data.toarray())
    svc.fit(train_data, train.target)
    test_data = pca.transform(test.data.toarray())
    predict = svc.predict(test_data)
    acc = accuracy_score(test.target, predict)
    print acc
    # evaluate('20newsgroups', featurelearning.TF_IDF, 0.1)
    # evaluate('20newsgroups', featurelearning.LDA, 0.1)
开发者ID:shirc,项目名称:text-feature-learning,代码行数:31,代码来源:evaluation.py


示例12: zip

for n_samples, color in zip(n_samples_range, colors):
    min_n_components = johnson_lindenstrauss_min_dim(n_samples, eps=eps_range)
    plt.semilogy(eps_range, min_n_components, color=color)

plt.legend(["n_samples = %d" % n for n in n_samples_range], loc="upper right")
plt.xlabel("Distortion eps")
plt.ylabel("Minimum number of dimensions")
plt.title("Johnson-Lindenstrauss bounds:\nn_components vs eps")

# Part 2: perform sparse random projection of some digits images which are
# quite low dimensional and dense or documents of the 20 newsgroups dataset
# which is both high dimensional and sparse

if '--twenty-newsgroups' in sys.argv:
    # Need an internet connection hence not enabled by default
    data = fetch_20newsgroups_vectorized().data[:500]
else:
    data = load_digits().data[:500]

n_samples, n_features = data.shape
print("Embedding %d samples with dim %d using various random projections"
      % (n_samples, n_features))

n_components_range = np.array([300, 1000, 10000])
dists = euclidean_distances(data, squared=True).ravel()

# select only non-identical samples pairs
nonzero = dists != 0
dists = dists[nonzero]

for n_components in n_components_range:
开发者ID:AlexanderFabisch,项目名称:scikit-learn,代码行数:31,代码来源:plot_johnson_lindenstrauss_bound.py


示例13: exp

def exp(solvers, penalties, single_target, n_samples=30000, max_iter=20,
        dataset='rcv1', n_jobs=1, skip_slow=False):
    mem = Memory(cachedir=expanduser('~/cache'), verbose=0)

    if dataset == 'rcv1':
        rcv1 = fetch_rcv1()

        lbin = LabelBinarizer()
        lbin.fit(rcv1.target_names)

        X = rcv1.data
        y = rcv1.target
        y = lbin.inverse_transform(y)
        le = LabelEncoder()
        y = le.fit_transform(y)
        if single_target:
            y_n = y.copy()
            y_n[y > 16] = 1
            y_n[y <= 16] = 0
            y = y_n

    elif dataset == 'digits':
        digits = load_digits()
        X, y = digits.data, digits.target
        if single_target:
            y_n = y.copy()
            y_n[y < 5] = 1
            y_n[y >= 5] = 0
            y = y_n
    elif dataset == 'iris':
        iris = load_iris()
        X, y = iris.data, iris.target
    elif dataset == '20newspaper':
        ng = fetch_20newsgroups_vectorized()
        X = ng.data
        y = ng.target
        if single_target:
            y_n = y.copy()
            y_n[y > 4] = 1
            y_n[y <= 16] = 0
            y = y_n

    X = X[:n_samples]
    y = y[:n_samples]

    cached_fit = mem.cache(fit_single)
    out = Parallel(n_jobs=n_jobs, mmap_mode=None)(
        delayed(cached_fit)(solver, X, y,
                            penalty=penalty, single_target=single_target,
                            C=1, max_iter=max_iter, skip_slow=skip_slow)
        for solver in solvers
        for penalty in penalties)

    res = []
    idx = 0
    for solver in solvers:
        for penalty in penalties:
            if not (skip_slow and solver == 'lightning' and penalty == 'l1'):
                lr, times, train_scores, test_scores, accuracies = out[idx]
                this_res = dict(solver=solver, penalty=penalty,
                                single_target=single_target,
                                times=times, train_scores=train_scores,
                                test_scores=test_scores,
                                accuracies=accuracies)
                res.append(this_res)
            idx += 1

    with open('bench_saga.json', 'w+') as f:
        json.dump(res, f)
开发者ID:AlexandreAbraham,项目名称:scikit-learn,代码行数:69,代码来源:bench_saga.py


示例14: fetch_20newsgroups_vectorized

import sparse_interaction
from sklearn.datasets import fetch_20newsgroups_vectorized
from sklearn.cross_validation import StratifiedKFold
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import f1_score

dat = fetch_20newsgroups_vectorized()
X = dat.data
Y = dat.target
cv = StratifiedKFold(Y)

X = X[:, :20000]

si = sparse_interaction.SparseInteractionFeatures()
X_i = si.transform(X)

scores, scores_i = [], []

clf = SGDClassifier(penalty='l1', n_iter=10)

for train, test in cv:
    clf.fit(X[train], Y[train])
    scores.append(f1_score(Y[test], clf.predict(X[test]), average='macro', pos_label=None))
    clf.fit(X_i[train], Y[train])
    scores_i.append(f1_score(Y[test], clf.predict(X_i[test]), average='macro', pos_label=None))
print sum(scores), sum(scores_i)
开发者ID:AWNystrom,项目名称:SparseInteraction,代码行数:26,代码来源:compare.py


示例15: calculate_result

analyze = tv.build_analyzer()
tv.get_feature_names()#statistical features/terms

#(准确率*召回率)/(准确率+召回率)
def calculate_result(actual,pred):
    m_precision = metrics.precision_score(actual,pred);
    m_recall = metrics.recall_score(actual,pred);
    print 'predict info:'
    print 'precision:{0:.3f}'.format(m_precision)
    print 'recall:{0:0.3f}'.format(m_recall);
    print 'f1-score:{0:.3f}'.format(metrics.f1_score(actual,pred));

#或者sklearn里封装好的抓feature函数,fetch_20newsgroups_vectorized
print '*************************\nfetch_20newsgroups_vectorized\n*************************'
from sklearn.datasets import fetch_20newsgroups_vectorized
tfidf_train_3 = fetch_20newsgroups_vectorized(subset = 'train');
tfidf_test_3 = fetch_20newsgroups_vectorized(subset = 'test');
print "the shape of train is "+repr(tfidf_train_3.data.shape)
print "the shape of test is "+repr(tfidf_test_3.data.shape)

#分类
######################################################
#Multinomial Naive Bayes Classifier
print '*************************\nNaive Bayes\n*************************'
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics
newsgroups_test = fetch_20newsgroups(subset = 'test',
                                     categories = categories);
fea_test = vectorizer.fit_transform(newsgroups_test.data);
#create the Multinomial Naive Bayesian Classifier
clf = MultinomialNB(alpha = 0.01)
开发者ID:GZJackCai,项目名称:machineLearnTest,代码行数:31,代码来源:testScipy.py


示例16: MultinomialNB

    "naive_bayes": MultinomialNB(),
    "adaboost": AdaBoostClassifier(n_estimators=10),
}


###############################################################################
# Data

if __name__ == "__main__":

    parser = argparse.ArgumentParser()
    parser.add_argument('-e', '--estimators', nargs="+", required=True,
                        choices=ESTIMATORS)
    args = vars(parser.parse_args())

    data_train = fetch_20newsgroups_vectorized(subset="train")
    data_test = fetch_20newsgroups_vectorized(subset="test")
    X_train = check_array(data_train.data, dtype=np.float32,
                          accept_sparse="csc")
    X_test = check_array(data_test.data, dtype=np.float32, accept_sparse="csr")
    y_train = data_train.target
    y_test = data_test.target

    print("20 newsgroups")
    print("=============")
    print("X_train.shape = {0}".format(X_train.shape))
    print("X_train.format = {0}".format(X_train.format))
    print("X_train.dtype = {0}".format(X_train.dtype))
    print("X_train density = {0}"
          "".format(X_train.nnz / np.product(X_train.shape)))
    print("y_train {0}".format(y_train.shape))
开发者ID:0664j35t3r,项目名称:scikit-learn,代码行数:31,代码来源:bench_20newsgroups.py


示例17: TruncatedSVD

import time
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import datasets
from sklearn.cross_validation import train_test_split
from sklearn.decomposition import TruncatedSVD
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

from GPSVI.core.GPClassifier import GPClassifier

np.random.seed(0)

data = datasets.fetch_20newsgroups_vectorized()

xTr, xTe, yTr, yTe = train_test_split(data.data, data.target, test_size=0.80)

svd = TruncatedSVD(algorithm='randomized', n_components=3, tol=0.0)
svd.fit(xTr)
x = svd.transform(xTr)
fig = plt.figure('Show data')
ax = fig.add_subplot(111, projection='3d')
ax.scatter(x[:,0], x[:,1], x[:,2], c=yTr, cmap=matplotlib.cm.rainbow)


t0 = time.time()
clf_lr = LogisticRegression(C=2.0)
clf_lr.fit(xTr, yTr)
lr_score = clf_lr.score(xTe, yTe)
开发者ID:AlchemicalChest,项目名称:Gaussian-Process-with-Stochastic-Variational-Inference,代码行数:31,代码来源:test20newsgroups.py


示例18: open

#!/usr/bin/env python
# -*- encoding: utf8 -*-
'''Train a svm model using 20newsgroup data.
'''

from sklearn import datasets, svm
import cPickle as pkl


__author__ = 'noahsark'


train = datasets.fetch_20newsgroups_vectorized(subset='train')
clf = svm.LinearSVC()
clf.fit(train.data, train.target)
with open('storm-starter/multilang/resources/svm_model.pkl', 'wb') as fp_:
    pkl.dump(clf, fp_)
开发者ID:jimmylai,项目名称:slideshare,代码行数:17,代码来源:train_model.py


示例19: print

print(__doc__)
import numpy as np
from scipy.linalg import svd

from sklearn.datasets import fetch_20newsgroups_vectorized
from sklearn.datasets.samples_generator import make_classification
from sklearn.feature_selection import SelectKBest, chi2

from lightning.classification import FistaClassifier

def rank(M, eps=1e-9):
    U, s, V = svd(M, full_matrices=False)
    return np.sum(s > eps)


bunch = fetch_20newsgroups_vectorized(subset="train")
X_train = bunch.data
y_train = bunch.target

# Reduces dimensionality to make the example faster
ch2 = SelectKBest(chi2, k=5000)
X_train = ch2.fit_transform(X_train, y_train)

bunch = fetch_20newsgroups_vectorized(subset="test")
X_test = bunch.data
y_test = bunch.target
X_test = ch2.transform(X_test)

clf = FistaClassifier(C=1.0 / X_train.shape[0],
                      max_iter=200,
                      penalty="trace",
开发者ID:DEVESHTARASIA,项目名称:lightning,代码行数:31,代码来源:trace.py


示例20: exp

def exp(solvers, penalty, single_target,
        n_samples=30000, max_iter=20,
        dataset='rcv1', n_jobs=1, skip_slow=False):
    dtypes_mapping = {
        "float64": np.float64,
        "float32": np.float32,
    }

    if dataset == 'rcv1':
        rcv1 = fetch_rcv1()

        lbin = LabelBinarizer()
        lbin.fit(rcv1.target_names)

        X = rcv1.data
        y = rcv1.target
        y = lbin.inverse_transform(y)
        le = LabelEncoder()
        y = le.fit_transform(y)
        if single_target:
            y_n = y.copy()
            y_n[y > 16] = 1
            y_n[y <= 16] = 0
            y = y_n

    elif dataset == 'digits':
        digits = load_digits()
        X, y = digits.data, digits.target
        if single_target:
            y_n = y.copy()
            y_n[y < 5] = 1
            y_n[y >= 5] = 0
            y = y_n
    elif dataset == 'iris':
        iris = load_iris()
        X, y = iris.data, iris.target
    elif dataset == '20newspaper':
        ng = fetch_20newsgroups_vectorized()
        X = ng.data
        y = ng.target
        if single_target:
            y_n = y.copy()
            y_n[y > 4] = 1
            y_n[y <= 16] = 0
            y = y_n

    X = X[:n_samples]
    y = y[:n_samples]

    out = Parallel(n_jobs=n_jobs, mmap_mode=None)(
        delayed(fit_single)(solver, X, y,
                            penalty=penalty, single_target=single_target,
                            dtype=dtype,
                            C=1, max_iter=max_iter, skip_slow=skip_slow)
        for solver in solvers
        for dtype in dtypes_mapping.values())

    res = []
    idx = 0
    for dtype_name in dtypes_mapping.keys():
        for solver in solvers:
            if not (skip_slow and
                    solver == 'lightning' and
                    penalty == 'l1'):
                lr, times, train_scores, test_scores, accuracies = out[idx]
                this_res = dict(solver=solver, penalty=penalty,
                                dtype=dtype_name,
                                single_target=single_target,
                                times=times, train_scores=train_scores,
                                test_scores=test_scores,
                                accuracies=accuracies)
                res.append(this_res)
            idx += 1

    with open('bench_saga.json', 'w+') as f:
        json.dump(res, f)
开发者ID:allefpablo,项目名称:scikit-learn,代码行数:76,代码来源:bench_saga.py



注:本文中的sklearn.datasets.fetch_20newsgroups_vectorized函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python datasets.fetch_lfw_people函数代码示例发布时间:2022-05-27
下一篇:
Python datasets.fetch_20newsgroups函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap