• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python label.MultiLabelBinarizer类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.preprocessing.label.MultiLabelBinarizer的典型用法代码示例。如果您正苦于以下问题:Python MultiLabelBinarizer类的具体用法?Python MultiLabelBinarizer怎么用?Python MultiLabelBinarizer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了MultiLabelBinarizer类的17个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_multilabel_binarizer_unknown_class

def test_multilabel_binarizer_unknown_class():
    mlb = MultiLabelBinarizer()
    y = [[1, 2]]
    assert_raises(KeyError, mlb.fit(y).transform, [[0]])

    mlb = MultiLabelBinarizer(classes=[1, 2])
    assert_raises(KeyError, mlb.fit_transform, [[0]])
开发者ID:tguillemot,项目名称:scikit-learn,代码行数:7,代码来源:test_label.py


示例2: test_multilabel_binarizer_empty_sample

def test_multilabel_binarizer_empty_sample():
    mlb = MultiLabelBinarizer()
    y = [[1, 2], [1], []]
    Y = np.array([[1, 1],
                  [1, 0],
                  [0, 0]])
    assert_array_equal(mlb.fit_transform(y), Y)
开发者ID:manhhomienbienthuy,项目名称:scikit-learn,代码行数:7,代码来源:test_label.py


示例3: test_multilabel_binarizer_given_classes

def test_multilabel_binarizer_given_classes():
    inp = [(2, 3), (1,), (1, 2)]
    indicator_mat = np.array([[0, 1, 1],
                              [1, 0, 0],
                              [1, 0, 1]])
    # fit_transform()
    mlb = MultiLabelBinarizer(classes=[1, 3, 2])
    assert_array_equal(mlb.fit_transform(inp), indicator_mat)
    assert_array_equal(mlb.classes_, [1, 3, 2])

    # fit().transform()
    mlb = MultiLabelBinarizer(classes=[1, 3, 2])
    assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat)
    assert_array_equal(mlb.classes_, [1, 3, 2])

    # ensure works with extra class
    mlb = MultiLabelBinarizer(classes=[4, 1, 3, 2])
    assert_array_equal(mlb.fit_transform(inp),
                       np.hstack(([[0], [0], [0]], indicator_mat)))
    assert_array_equal(mlb.classes_, [4, 1, 3, 2])

    # ensure fit is no-op as iterable is not consumed
    inp = iter(inp)
    mlb = MultiLabelBinarizer(classes=[1, 3, 2])
    assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat)

    # ensure a ValueError is thrown if given duplicate classes
    err_msg = "The classes argument contains duplicate classes. Remove " \
              "these duplicates before passing them to MultiLabelBinarizer."
    mlb = MultiLabelBinarizer(classes=[1, 3, 2, 3])
    assert_raise_message(ValueError, err_msg, mlb.fit, inp)
开发者ID:manhhomienbienthuy,项目名称:scikit-learn,代码行数:31,代码来源:test_label.py


示例4: test_multilabel_binarizer_unknown_class

def test_multilabel_binarizer_unknown_class():
    mlb = MultiLabelBinarizer()
    y = [[1, 2]]
    Y = np.array([[1, 0], [0, 1]])
    w = 'unknown class(es) [0, 4] will be ignored'
    matrix = assert_warns_message(UserWarning, w,
                                  mlb.fit(y).transform, [[4, 1], [2, 0]])
    assert_array_equal(matrix, Y)

    Y = np.array([[1, 0, 0], [0, 1, 0]])
    mlb = MultiLabelBinarizer(classes=[1, 2, 3])
    matrix = assert_warns_message(UserWarning, w,
                                  mlb.fit(y).transform, [[4, 1], [2, 0]])
    assert_array_equal(matrix, Y)
开发者ID:manhhomienbienthuy,项目名称:scikit-learn,代码行数:14,代码来源:test_label.py


示例5: test_multilabel_binarizer_given_classes

def test_multilabel_binarizer_given_classes():
    inp = [(2, 3), (1,), (1, 2)]
    indicator_mat = np.array([[0, 1, 1],
                              [1, 0, 0],
                              [1, 0, 1]])
    # fit_transform()
    mlb = MultiLabelBinarizer(classes=[1, 3, 2])
    assert_array_equal(mlb.fit_transform(inp), indicator_mat)
    assert_array_equal(mlb.classes_, [1, 3, 2])

    # fit().transform()
    mlb = MultiLabelBinarizer(classes=[1, 3, 2])
    assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat)
    assert_array_equal(mlb.classes_, [1, 3, 2])

    # ensure works with extra class
    mlb = MultiLabelBinarizer(classes=[4, 1, 3, 2])
    assert_array_equal(mlb.fit_transform(inp),
                       np.hstack(([[0], [0], [0]], indicator_mat)))
    assert_array_equal(mlb.classes_, [4, 1, 3, 2])

    # ensure fit is no-op as iterable is not consumed
    inp = iter(inp)
    mlb = MultiLabelBinarizer(classes=[1, 3, 2])
    assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat)
开发者ID:BasilBeirouti,项目名称:scikit-learn,代码行数:25,代码来源:test_label.py


示例6: test_multilabel_binarizer_multiple_calls

def test_multilabel_binarizer_multiple_calls():
    inp = [(2, 3), (1,), (1, 2)]
    indicator_mat = np.array([[0, 1, 1],
                              [1, 0, 0],
                              [1, 0, 1]])

    indicator_mat2 = np.array([[0, 1, 1],
                               [1, 0, 0],
                               [1, 1, 0]])

    # first call
    mlb = MultiLabelBinarizer(classes=[1, 3, 2])
    assert_array_equal(mlb.fit_transform(inp), indicator_mat)
    # second call change class
    mlb.classes = [1, 2, 3]
    assert_array_equal(mlb.fit_transform(inp), indicator_mat2)
开发者ID:manhhomienbienthuy,项目名称:scikit-learn,代码行数:16,代码来源:test_label.py


示例7: fit_binarizers

def fit_binarizers(all_values):
    binarizers = {}
    for f in range(len(all_values[0])):
        cur_features = [context[f] for context in all_values]
        # only categorical values need to be binarized, ints/floats are left as they are
        if type(cur_features[0]) == str or type(cur_features[0]) == unicode:
            lb = LabelBinarizer()
            lb.fit(cur_features)
            binarizers[f] = lb
        elif type(cur_features[0]) == list:
            mlb = MultiLabelBinarizer()
            # default feature for unknown values
            cur_features.append(tuple(("__unk__",)))
            mlb.fit([tuple(x) for x in cur_features])
            binarizers[f] = mlb
    return binarizers
开发者ID:qe-team,项目名称:marmot,代码行数:16,代码来源:preprocessing_utils_old.py


示例8: test_multilabel_binarizer_non_integer_labels

def test_multilabel_binarizer_non_integer_labels():
    tuple_classes = np.empty(3, dtype=object)
    tuple_classes[:] = [(1,), (2,), (3,)]
    inputs = [
        ([('2', '3'), ('1',), ('1', '2')], ['1', '2', '3']),
        ([('b', 'c'), ('a',), ('a', 'b')], ['a', 'b', 'c']),
        ([((2,), (3,)), ((1,),), ((1,), (2,))], tuple_classes),
    ]
    indicator_mat = np.array([[0, 1, 1],
                              [1, 0, 0],
                              [1, 1, 0]])
    for inp, classes in inputs:
        # fit_transform()
        mlb = MultiLabelBinarizer()
        assert_array_equal(mlb.fit_transform(inp), indicator_mat)
        assert_array_equal(mlb.classes_, classes)
        assert_array_equal(mlb.inverse_transform(indicator_mat), inp)

        # fit().transform()
        mlb = MultiLabelBinarizer()
        assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat)
        assert_array_equal(mlb.classes_, classes)
        assert_array_equal(mlb.inverse_transform(indicator_mat), inp)

    mlb = MultiLabelBinarizer()
    assert_raises(TypeError, mlb.fit_transform, [({}), ({}, {'a': 'b'})])
开发者ID:manhhomienbienthuy,项目名称:scikit-learn,代码行数:26,代码来源:test_label.py


示例9: test_multilabel_binarizer

def test_multilabel_binarizer():
    # test input as iterable of iterables
    inputs = [
        lambda: [(2, 3), (1,), (1, 2)],
        lambda: ({2, 3}, {1}, {1, 2}),
        lambda: iter([iter((2, 3)), iter((1,)), {1, 2}]),
    ]
    indicator_mat = np.array([[0, 1, 1],
                              [1, 0, 0],
                              [1, 1, 0]])
    inverse = inputs[0]()
    for inp in inputs:
        # With fit_transform
        mlb = MultiLabelBinarizer()
        got = mlb.fit_transform(inp())
        assert_array_equal(indicator_mat, got)
        assert_array_equal([1, 2, 3], mlb.classes_)
        assert_equal(mlb.inverse_transform(got), inverse)

        # With fit
        mlb = MultiLabelBinarizer()
        got = mlb.fit(inp()).transform(inp())
        assert_array_equal(indicator_mat, got)
        assert_array_equal([1, 2, 3], mlb.classes_)
        assert_equal(mlb.inverse_transform(got), inverse)
开发者ID:manhhomienbienthuy,项目名称:scikit-learn,代码行数:25,代码来源:test_label.py


示例10: test_sparse_output_multilabel_binarizer

def test_sparse_output_multilabel_binarizer():
    # test input as iterable of iterables
    inputs = [
        lambda: [(2, 3), (1,), (1, 2)],
        lambda: (set([2, 3]), set([1]), set([1, 2])),
        lambda: iter([iter((2, 3)), iter((1,)), set([1, 2])]),
    ]
    indicator_mat = np.array([[0, 1, 1], [1, 0, 0], [1, 1, 0]])

    inverse = inputs[0]()
    for sparse_output in [True, False]:
        for inp in inputs:
            # With fit_tranform
            mlb = MultiLabelBinarizer(sparse_output=sparse_output)
            got = mlb.fit_transform(inp())
            assert_equal(issparse(got), sparse_output)
            if sparse_output:
                got = got.toarray()
            assert_array_equal(indicator_mat, got)
            assert_array_equal([1, 2, 3], mlb.classes_)
            assert_equal(mlb.inverse_transform(got), inverse)

            # With fit
            mlb = MultiLabelBinarizer(sparse_output=sparse_output)
            got = mlb.fit(inp()).transform(inp())
            assert_equal(issparse(got), sparse_output)
            if sparse_output:
                got = got.toarray()
            assert_array_equal(indicator_mat, got)
            assert_array_equal([1, 2, 3], mlb.classes_)
            assert_equal(mlb.inverse_transform(got), inverse)

    assert_raises(ValueError, mlb.inverse_transform, csr_matrix(np.array([[0, 1, 1], [2, 0, 0], [1, 1, 0]])))
开发者ID:huyng,项目名称:scikit-learn,代码行数:33,代码来源:test_label.py


示例11: test_multilabel_binarizer_inverse_validation

def test_multilabel_binarizer_inverse_validation():
    inp = [(1, 1, 1, 0)]
    mlb = MultiLabelBinarizer()
    mlb.fit_transform(inp)
    # Not binary
    assert_raises(ValueError, mlb.inverse_transform, np.array([[1, 3]]))
    # The following binary cases are fine, however
    mlb.inverse_transform(np.array([[0, 0]]))
    mlb.inverse_transform(np.array([[1, 1]]))
    mlb.inverse_transform(np.array([[1, 0]]))

    # Wrong shape
    assert_raises(ValueError, mlb.inverse_transform, np.array([[1]]))
    assert_raises(ValueError, mlb.inverse_transform, np.array([[1, 1, 1]]))
开发者ID:manhhomienbienthuy,项目名称:scikit-learn,代码行数:14,代码来源:test_label.py


示例12: test_multilabel_binarizer_same_length_sequence

def test_multilabel_binarizer_same_length_sequence():
    # Ensure sequences of the same length are not interpreted as a 2-d array
    inp = [[1], [0], [2]]
    indicator_mat = np.array([[0, 1, 0], [1, 0, 0], [0, 0, 1]])
    # fit_transform()
    mlb = MultiLabelBinarizer()
    assert_array_equal(mlb.fit_transform(inp), indicator_mat)
    assert_array_equal(mlb.inverse_transform(indicator_mat), inp)

    # fit().transform()
    mlb = MultiLabelBinarizer()
    assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat)
    assert_array_equal(mlb.inverse_transform(indicator_mat), inp)
开发者ID:tguillemot,项目名称:scikit-learn,代码行数:13,代码来源:test_label.py


示例13: test_sparse_output_multilabel_binarizer

def test_sparse_output_multilabel_binarizer():
    # test input as iterable of iterables
    inputs = [
        lambda: [(2, 3), (1,), (1, 2)],
        lambda: ({2, 3}, {1}, {1, 2}),
        lambda: iter([iter((2, 3)), iter((1,)), {1, 2}]),
    ]
    indicator_mat = np.array([[0, 1, 1],
                              [1, 0, 0],
                              [1, 1, 0]])

    inverse = inputs[0]()
    for sparse_output in [True, False]:
        for inp in inputs:
            # With fit_transform
            mlb = MultiLabelBinarizer(sparse_output=sparse_output)
            got = mlb.fit_transform(inp())
            assert_equal(issparse(got), sparse_output)
            if sparse_output:
                # verify CSR assumption that indices and indptr have same dtype
                assert_equal(got.indices.dtype, got.indptr.dtype)
                got = got.toarray()
            assert_array_equal(indicator_mat, got)
            assert_array_equal([1, 2, 3], mlb.classes_)
            assert_equal(mlb.inverse_transform(got), inverse)

            # With fit
            mlb = MultiLabelBinarizer(sparse_output=sparse_output)
            got = mlb.fit(inp()).transform(inp())
            assert_equal(issparse(got), sparse_output)
            if sparse_output:
                # verify CSR assumption that indices and indptr have same dtype
                assert_equal(got.indices.dtype, got.indptr.dtype)
                got = got.toarray()
            assert_array_equal(indicator_mat, got)
            assert_array_equal([1, 2, 3], mlb.classes_)
            assert_equal(mlb.inverse_transform(got), inverse)

    assert_raises(ValueError, mlb.inverse_transform,
                  csr_matrix(np.array([[0, 1, 1],
                                       [2, 0, 0],
                                       [1, 1, 0]])))
开发者ID:manhhomienbienthuy,项目名称:scikit-learn,代码行数:42,代码来源:test_label.py


示例14: test_multilabel_binarizer_non_integer_labels

def test_multilabel_binarizer_non_integer_labels():
    tuple_classes = np.empty(3, dtype=object)
    tuple_classes[:] = [(1,), (2,), (3,)]
    inputs = [
        ([("2", "3"), ("1",), ("1", "2")], ["1", "2", "3"]),
        ([("b", "c"), ("a",), ("a", "b")], ["a", "b", "c"]),
        ([((2,), (3,)), ((1,),), ((1,), (2,))], tuple_classes),
    ]
    indicator_mat = np.array([[0, 1, 1], [1, 0, 0], [1, 1, 0]])
    for inp, classes in inputs:
        # fit_transform()
        mlb = MultiLabelBinarizer()
        assert_array_equal(mlb.fit_transform(inp), indicator_mat)
        assert_array_equal(mlb.classes_, classes)
        assert_array_equal(mlb.inverse_transform(indicator_mat), inp)

        # fit().transform()
        mlb = MultiLabelBinarizer()
        assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat)
        assert_array_equal(mlb.classes_, classes)
        assert_array_equal(mlb.inverse_transform(indicator_mat), inp)

    mlb = MultiLabelBinarizer()
    assert_raises(TypeError, mlb.fit_transform, [({}), ({}, {"a": "b"})])
开发者ID:tguillemot,项目名称:scikit-learn,代码行数:24,代码来源:test_label.py


示例15: test_multilabel_binarizer_non_unique

def test_multilabel_binarizer_non_unique():
    inp = [(1, 1, 1, 0)]
    indicator_mat = np.array([[1, 1]])
    mlb = MultiLabelBinarizer()
    assert_array_equal(mlb.fit_transform(inp), indicator_mat)
开发者ID:manhhomienbienthuy,项目名称:scikit-learn,代码行数:5,代码来源:test_label.py


示例16: MultiLabelBinarizer

from nltk.corpus import reuters
from nltk.tokenize import word_tokenize
from sklearn.preprocessing.label import MultiLabelBinarizer

nltk.download('reuters')
nltk.download('punkt')

google_news_word2vec_model_location = 'data/GoogleNews-vectors-negative300.bin.gz'
doc2vec_model_location = 'model/doc2vec-model.bin'
doc2vec_dimensions = 300
classifier_model_location = 'model/classifier-model.bin'

doc2vec = Doc2Vec.load(doc2vec_model_location)

# Convert the categories to one hot encoded categories
labelBinarizer = MultiLabelBinarizer()
labelBinarizer.fit([reuters.categories(fileId) for fileId in reuters.fileids()])

# Convert load the articles with their corresponding categories
train_articles = [{'raw': reuters.raw(fileId), 'categories': reuters.categories(fileId)} for fileId in reuters.fileids() if fileId.startswith('training/')]
test_articles = [{'raw': reuters.raw(fileId), 'categories': reuters.categories(fileId)} for fileId in reuters.fileids() if fileId.startswith('test/')]
shuffle(train_articles)
shuffle(test_articles)

# Convert the articles to document vectors using the doc2vec model
train_data = [doc2vec.infer_vector(word_tokenize(article['raw'])) for article in train_articles]
test_data = [doc2vec.infer_vector(word_tokenize(article['raw'])) for article in test_articles]
train_labels = labelBinarizer.transform([article['categories'] for article in train_articles])
test_labels = labelBinarizer.transform([article['categories'] for article in test_articles])
train_data, test_data, train_labels, test_labels = numpy.asarray(train_data), numpy.asarray(test_data), numpy.asarray(train_labels), numpy.asarray(test_labels)
开发者ID:val-labs,项目名称:blog-text-classification,代码行数:30,代码来源:reuters-classifier-train.py


示例17: MultiLabelBinarizer

nltk.download('punkt')

doc2vec_model_location = 'model/doc2vec-model.bin'
classifier_model_location = 'model/classifier-model.bin'

# Use the doc2vec model created in reuters-classifier-train.py
doc2vec = Doc2Vec.load(doc2vec_model_location)

# Load the test articles and convert it to doc vectors
test_articles = [{'raw': reuters.raw(fileId), 'categories': reuters.categories(fileId)} for fileId in reuters.fileids() if fileId.startswith('test/')]
test_data = [doc2vec.infer_vector(word_tokenize(article['raw'])) for article in test_articles]

# Initialize the neural network
model=load_model(classifier_model_location)

# Make predictions
predictions = model.predict(numpy.asarray(test_data))

# Convert the prediction with gives a value between 0 and 1 to exactly 0 or 1 with a threshold
predictions[predictions<0.5] = 0
predictions[predictions>=0.5] = 1

# Convert predicted classes back to category names
labelBinarizer = MultiLabelBinarizer()
labelBinarizer.fit([reuters.categories(fileId) for fileId in reuters.fileids()])
predicted_labels = labelBinarizer.inverse_transform(predictions)

for predicted_label, test_article in zip(predicted_labels, test_articles):
    print('title: {}'.format(test_article['raw'].splitlines()[0]))
    print('predicted: {} - actual: {}'.format(list(predicted_label), test_article['categories']))
    print('')
开发者ID:val-labs,项目名称:blog-text-classification,代码行数:31,代码来源:reuters-doc2vec-predict.py



注:本文中的sklearn.preprocessing.label.MultiLabelBinarizer类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python qda.QDA类代码示例发布时间:2022-05-27
下一篇:
Python label.LabelEncoder类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap