• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python preprocessing.MultiLabelBinarizer类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.preprocessing.MultiLabelBinarizer的典型用法代码示例。如果您正苦于以下问题:Python MultiLabelBinarizer类的具体用法?Python MultiLabelBinarizer怎么用?Python MultiLabelBinarizer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了MultiLabelBinarizer类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: fit_images

def fit_images():
    client = pymongo.MongoClient('localhost', 27017)
    db = client['image_annotation']
    responses = db['mapped_responses'].find()
    no_labels = db['labels_binary'].find()
    numbers = []
    for i in no_labels:
        numbers.append(set([int(i["number"])]))
    train_data = []
    labels = []
    i=0
    mlb = MultiLabelBinarizer()
    mlb.fit(numbers)
    for index, instance in enumerate(responses):
        t_data =  instance['hist']['0']
        indexes[index] = instance['image_no']
        train_data.append(t_data)
        label = instance['binary_results']
        new_labels = []
        for key, value in enumerate(label):
            value1 = int(value)
            new_labels.append(set([value1]))
        new_labels = mlb.transform(new_labels)
        labels.append(label)
    classifier = KNeighborsClassifier(n_neighbors = 5, weights='uniform')
    classifier.fit(train_data, labels)
    build_dir = getBuildDir()
    pickle.dump(classifier, open(join(build_dir, 'model.data'),'w'),protocol=1)
    client.close()
开发者ID:sreeram-boyapati,项目名称:image-annotation,代码行数:29,代码来源:classifier.py


示例2: evaluate_solution

def evaluate_solution(users, urecovered, observed_index, xs=None, E=None,
                      hidden_edges=None):
    """Evaluate the quality of the recovered user profile"""
    mse = mean_squared_error(users[observed_index, :],
                             urecovered[observed_index, :])
    if hidden_edges is None or len(hidden_edges) < 1:
        return mse, None
    labeler = MultiLabelBinarizer(classes=np.arange(xs.shape[1]))
    gold = labeler.fit_transform([E[e] for e in sorted(hidden_edges)])
    # gold = np.array([E[e] for e in sorted(hidden_edges)])
    eh = sorted(hidden_edges)
    heads, tails = zip(*eh)
    Cr = np.dot(urecovered, xs.T)
    Dr = np.abs(Cr[heads, :] - Cr[tails, :])
    # TODO prediction here could be better: instead of predict the k best
    # directions all the time, look at revealed edge to compute threshold of
    # similarity (i.e replace 0.05)
    best_dirs = np.argsort(Dr, 1).astype(int)[:, :2]
    pred = []
    for all_dir, suggestion in zip(Dr, best_dirs):
        my_pred = [suggestion[0]]
        if all_dir[suggestion[1]] < 0.05:
            my_pred.append(suggestion[1])
        pred.append(my_pred)
    pred = labeler.fit_transform(pred)
    return mse, f1_score(gold, pred, average='samples')
开发者ID:daureg,项目名称:magnet,代码行数:26,代码来源:synth.py


示例3: __init__

    def __init__(self, inter_filePath = "inter/technology_companies_of_the_united_states/"):
        # [[cat,cat...]...]
        self.m = Word2Vec.load_word2vec_format("vectors/technology_companies_of_the_united_states/cat_train_neg5size400min_count5", binary=True) 
        self.dim = 400

        (correct_categories_train, context_categories_train) = self.load_category_page(inter_filePath + "category_page.txt")  
        (correct_categories_test, context_categories_test) = self.load_category_page(inter_filePath + "category_page_test.txt")
        ## ----  By mean ---
        Xvectors = np.array(self.predict_vector_by_mean(context_categories_train))
        Xvectors_test = np.array(self.predict_vector_by_mean(context_categories_test))


        ## ----  By mean --- *

        ## ----  By SVM ---
        corpus_train = [" ".join(i) for i in context_categories_train]
        corpus_test = [" ".join(i) for i in context_categories_test]
        cv = CountVectorizer(min_df = 1)
        X = cv.fit_transform(corpus_train)
        ##TFIDF
        transformer = TfidfTransformer()
        X_tfidf = transformer.fit_transform(X)
        #Labels
        mlb = MultiLabelBinarizer()
        mlb.fit(correct_categories_train + correct_categories_test)
        Y = mlb.transform(correct_categories_train) ###Transform to multilabel indicator
        #predict test labels
        X_test = cv.transform(corpus_test)
        Y_test = mlb.transform(correct_categories_test)
        #Y_predict_ovr = self.ovrSVM(X, Y, X_test)
        Y_predict_ovr = self.ovrSVM(Xvectors, Y, Xvectors_test)
        #Y_predict_ovo = self.ovoSVM(X, Y, X_test)
        print "---One versus rest---"
        print "Macro F-1:", f1_score(Y_test, Y_predict_ovr, average='macro')
        print "Micro F-1:", f1_score(Y_test, Y_predict_ovr, average='micro')
开发者ID:pkumusic,项目名称:HCE,代码行数:35,代码来源:SVMs.py


示例4: run_classifier

def run_classifier(sentences, labels, test_docs):
	import numpy as np

	train_matrix, tfidf = tf_idf_fit_transform(sentences)

	test_sentences = doc2sentences(test_docs)
	sentence_matrix = tfidf.transform(test_sentences)
	print("Shape of sentence matrix : ", sentence_matrix.shape)

	from sklearn.preprocessing import MultiLabelBinarizer
	mlb = MultiLabelBinarizer()
	label_matrix = mlb.fit_transform(labels)

	from sklearn.multiclass import OneVsRestClassifier
	from sklearn.svm import linearSVC
	# estimator = SVC(kernel='linear')
	estimator = linearSVC()
	classifier = OneVsRestClassifier(estimator, n_jobs=-1)
	classifier.fit(train_matrix, label_matrix)
	predictions = classifier.predict(sentence_matrix)

	import csv
	with open("classified.csv", "w") as fl:
		writer = csv.writer(fl)
		for i in range(len(test_sentences)):
			curr_pred = [mlb.classes_[x] for x in range(predictions.shape[1]) if predictions[i][x]==1]
			writer.writerow((test_sentences[i], curr_pred))
开发者ID:sarath1,项目名称:EventExtraction,代码行数:27,代码来源:sentence_classifier.py


示例5: test_multilabel_classification_report

def test_multilabel_classification_report():
    n_classes = 4
    n_samples = 50
    make_ml = make_multilabel_classification
    _, y_true_ll = make_ml(n_features=1, n_classes=n_classes, random_state=0,
                           n_samples=n_samples)
    _, y_pred_ll = make_ml(n_features=1, n_classes=n_classes, random_state=1,
                           n_samples=n_samples)

    expected_report = """\
             precision    recall  f1-score   support

          0       0.50      0.67      0.57        24
          1       0.51      0.74      0.61        27
          2       0.29      0.08      0.12        26
          3       0.52      0.56      0.54        27

avg / total       0.45      0.51      0.46       104
"""

    lb = MultiLabelBinarizer()
    lb.fit([range(4)])
    y_true_bi = lb.transform(y_true_ll)
    y_pred_bi = lb.transform(y_pred_ll)

    for y_true, y_pred in [(y_true_ll, y_pred_ll), (y_true_bi, y_pred_bi)]:
        report = classification_report(y_true, y_pred)
        assert_equal(report, expected_report)
开发者ID:nateyoder,项目名称:scikit-learn,代码行数:28,代码来源:test_classification.py


示例6: ACMClassificator

class ACMClassificator(BaseACMClassificator):
    def __init__(self):
        self.vectorizer = CountVectorizer(min_df=0.05, max_df=0.45, tokenizer=tokenize)
        self.mlb = MultiLabelBinarizer()
        self.classificator = OneVsRestClassifier(ExtraTreeClassifier(criterion="gini",
                                                                     max_depth=None,
                                                                     min_samples_split=2,
                                                                     min_samples_leaf=1,
                                                                     min_weight_fraction_leaf=0.,
                                                                     max_features="auto",
                                                                     max_leaf_nodes=None,
                                                                     class_weight=None),
                                                 n_jobs=-1
                                                 )

    def _prepare_problems(self, problems):
        return self.vectorizer.transform([p.statement for p in problems])

    def fit(self, problems, tags):
        nltk.download('punkt', quiet=True)
        self.vectorizer.fit([p.statement for p in problems])
        mat = self._prepare_problems(problems)
        self.mlb = self.mlb.fit(tags)
        self.classificator.fit(mat.toarray(), self.mlb.transform(tags))

    def predict(self, problems):
        mat = self._prepare_problems(problems)
        predicted = self.classificator.predict(mat.toarray())
        return self.mlb.inverse_transform(predicted)
开发者ID:morojenoe,项目名称:classificator,代码行数:29,代码来源:one_vs_rest_tree.py


示例7: read_all_data

def read_all_data(p):
    img_src = "images/"

    df = pd.read_pickle("frame_no_stem.pkl")
    images = __read_all_images(img_src) 
    print("Finished reading images")

    x_images = []
    x_desc = []
    y_category = []
    all_categories = set()

    for asin in df.index.values:
        if asin in images:
            data = images[asin]
            x_images.append(data)

            item = df.loc[asin]
            x_desc.append(item.description)
            cate = item.categories
            y_category.append(cate)
            for c in cate:
                all_categories.add(c)

    print("Finished reading dataframe")
    mlb = MultiLabelBinarizer()
    y_total = mlb.fit_transform(y_category)
    x_images = np.array(x_images)
    x_desc = np.array(x_desc)

    
    return x_images,x_desc, y_total
开发者ID:jeffwiroj,项目名称:ml_proj,代码行数:32,代码来源:image_classifier.py


示例8: main

def main():
    #Explore the data for how many class labels
    reviewsDict = {}
    with open("/Users/huzefa/Workspace/College-Fall-2015/Search/Dataset/Task2/reviewUsefulDict.pickle") as f:
        reviewsDict = pickle.load(f)
    print "Reviews Dictionary loaded .. "
    '''
    usefulCountDict = {}
    for key, value in reviewsDict.iteritems():
        if value not in usefulCountDict:
            usefulCountDict[value] = 1
        else:
            usefulCountDict[value] = usefulCountDict[value]+1
    pprint(usefulCountDict)
    '''
    corpus, target = DictToList(reviewsDict)
    
    vectorizer = TfidfVectorizer(stop_words="english", max_df=0.5, sublinear_tf=True)
    XAll = vectorizer.fit_transform(corpus)
    mlb = MultiLabelBinarizer()
    yAll = mlb.fit_transform(target)
    
    with open("/Users/huzefa/Workspace/College-Fall-2015/Search/Dataset/Task2/Onlyreviews.fv", 'w') as f:
        pickle.dump(XAll, f)
    with open("/Users/huzefa/Workspace/College-Fall-2015/Search/Dataset/Task2/Onlyreviews.target2", 'w') as f:
        pickle.dump(yAll, f)
    with open("/Users/huzefa/Workspace/College-Fall-2015/Search/Dataset/Task2/Onlyreviews.mlb", 'w') as f:
        pickle.dump(mlb, f)
    
    print "Dumped featrue vectors .... "
开发者ID:yangyang861115,项目名称:Yelp-Project,代码行数:30,代码来源:createFeatureVectorsBinary.py


示例9: get_training_data

def get_training_data(window_size_ms, train_time_sec=30):
	#loop until empty input is detected
	X = []
	y = []

	print "Training time for each key is {} seconds".format(train_time_sec)
	i = 0
	while True:
		s = raw_input('Press <enter> to begin training key {} or q-<enter> to quit'.format(i))
		if s: break

		j = 0
		while j < train_time_sec:
			j += (window_size_ms / float(1000))
			freq_spect = read_spectral_data_for_time(window_size_ms)
			X.append(freq_spect)
			y.append([i])

		#increment key counter
		i += 1

	mb = MultiLabelBinarizer()
	y = mb.fit_transform(y)

	X = np.asarray(X)
	y = np.asarray(y)
	return X, y
开发者ID:johncava,项目名称:HackAZ_2016,代码行数:27,代码来源:serialize_training_data.py


示例10: load_data

def load_data(config={}):
    """
    Load the Reuters dataset.

    Returns
    -------
    data : dict
        with keys 'x_train', 'x_test', 'y_train', 'y_test', 'labels'
    """
    stop_words = stopwords.words("english")
    vectorizer = TfidfVectorizer(stop_words=stop_words)
    mlb = MultiLabelBinarizer()

    documents = reuters.fileids()
    test = [d for d in documents if d.startswith('test/')]
    train = [d for d in documents if d.startswith('training/')]

    docs = {}
    docs['train'] = [reuters.raw(doc_id) for doc_id in train]
    docs['test'] = [reuters.raw(doc_id) for doc_id in test]
    xs = {'train': [], 'test': []}
    xs['train'] = vectorizer.fit_transform(docs['train']).toarray()
    xs['test'] = vectorizer.transform(docs['test']).toarray()
    ys = {'train': [], 'test': []}
    ys['train'] = mlb.fit_transform([reuters.categories(doc_id)
                                     for doc_id in train])
    ys['test'] = mlb.transform([reuters.categories(doc_id)
                                for doc_id in test])
    data = {'x_train': xs['train'], 'y_train': ys['train'],
            'x_test': xs['test'], 'y_test': ys['test'],
            'labels': globals()["labels"]}
    return data
开发者ID:MartinThoma,项目名称:algorithms,代码行数:32,代码来源:reuters.py


示例11: generateTrainFeatures

def generateTrainFeatures(L):
    """
    This function generates the training data features and its target labels.
    Input: L : The number of training data
    Output: trainX -> a (L * 2000) numpy matrix representing the 2000 features for each of the
                        L training samples
            trainY -> (L * 185) numpy matrix representing the target class of the training samples
    Logic:
    The input text is read, preprocessed to remove stop words, and is appended to a list.
    Similarly, each of the target class values are read into a list.
    Sklearn package TFIDF vectorizer is used for generating TFIDF matrix for the 2000 frequent
    words. 
    The multi-label classification algorithms require a target Y variable of the form,
    (nsamples * nclasses), multilabel binarizer is used for converting the list of classes
    to a matrix form.
    """
    global classOrder
    X = []
    Y = []
    # read the input
    for i in range(L):
        categories = raw_input()
        target = [int(y) for y in categories.split(" ")]
        del target[0]
        meaningfulWords = readInput()
        Y.append(target)
        X.append(meaningfulWords)
    # construct TF-IDF matrix representing the features
    trainX = vectorizer.fit_transform(X).toarray()
    # convert the target label list to a suitable matrix form
    mlb = MultiLabelBinarizer()
    trainY = mlb.fit_transform(Y)
    # for representing the order of the classes
    classOrder = mlb.classes_
    return (trainX, trainY)
开发者ID:hpam1,项目名称:Machine-Learning,代码行数:35,代码来源:labeler.py


示例12: __init__

class VectorizedData:
    """ Simple container that holds the input dataset
    in a sklearn-friendly form, with X, y numpy vectors.

    TODO: we ignore # of matches for each fbpath """
    def __init__(self, data, Xdict=None, Ydict=None):
        fdict = [q_to_fdict(q) for q in data]
        lset = [q_to_lset(q) for q in data]

        if Xdict is None:
            self.Xdict = DictVectorizer()
            self.X = self.Xdict.fit_transform(fdict)
        else:
            self.Xdict = Xdict
            self.X = self.Xdict.transform(fdict)

        if Ydict is None:
            self.Ydict = MultiLabelBinarizer()
            self.Y = self.Ydict.fit_transform(lset)
        else:
            self.Ydict = Ydict

            # Filter out data with unknown labels, MultiLabelBinarizer() cannot
            # handle this
            known_lset = [set([label for label in ls if label in self.Ydict.classes_]) for ls in lset]
            lset_n = sum([len(ls) for ls in lset])
            known_lset_n = sum([len(ls) for ls in known_lset])
            if known_lset_n < lset_n:
                print('dropped %d out of %d labels (not in training set)' % (lset_n - known_lset_n, lset_n), file=sys.stderr)

            self.Y = self.Ydict.transform(known_lset)

    def cfier_score(self, cfier, scorer):
        """ Measure cfier performance on this dataset.

        scorer -> lambda cfier, X: cfier.predict_proba(X)
        (or decision_function when probabilities not predicted) """
        skl_score = cfier.score(self.X.toarray(), self.Y)

        # XXX: Matched paths might/could be weighted by their nMatches too...

        # Measure prediction performance
        Ypred = cfier.predict(self.X.toarray())
        n_q = float(np.size(self.Y, axis=0))
        # number of questions where all correct paths have been recalled
        recall_all = np.sum(np.sum(self.Y, axis=1) == np.sum(Ypred * self.Y, axis=1)) / n_q
        # number of questions where at least one correct path has been recalled
        recall_any = np.sum((np.sum(self.Y, axis=1) != 0) == (np.sum(Ypred * self.Y, axis=1) != 0)) / n_q
        # number of *PATHS* (not q.) that were correct
        precision = np.sum((Ypred + self.Y) == 2) / float(np.sum(Ypred))

        # Measure scoring performance
        Yscores = scorer(cfier, self.X.toarray())
        # MRR of first correct path
        mrr = mrr_by_score(self.Y, Yscores)
        # number of questions where at least one correct path has been recalled in top N paths
        # TODO

        return {'sklScore': skl_score, 'qRecallAll': recall_all, 'qRecallAny': recall_any, 'pPrec': precision, 'qScoreMRR': mrr}
开发者ID:AmitShah,项目名称:yodaqa,代码行数:59,代码来源:fbpathtrain.py


示例13: perform_train_test_split

def perform_train_test_split(db_name=ds.DEFAULT_DB_NAME,
                                        train_size=ds.DEFAULT_TRAININGSET_SIZE):
    
    """
    Get all document_ids of given database and split's it according to given
    train_size.
    The tricky part is that we n
    
    :param db_name: Name of database to split documents (default DEFAULT_DB_NAME)
    :param train_size: Size in percentage [0,1] of the training set.
    :return splitted_dataset - List of lists 
                    [[DEFAULT_DATASET_LIST_INDEX_TRAINING], 
                    [DEFAULT_DATASET_LIST_INDEX_TEST]]
    """
    
    database = db.couch_database(db_name)
    all_docs = database.getAllDocumentsFromDatabase()
    
    doc_ids_list = []
    all_tag_list = []
    
    i = 0
    
    for row in all_docs.rows:
        
        document = row.doc
        #append the document id to doc_ids_list
        doc_ids_list.append(document[cp.COUCHDB_DOCUMENT_FIELD_ID])
        
        tag_list = []
        
        #if document has tags than split and add them
        if pp.STACKEXCHANGE_TAGS_COLUM in document.keys():
            
            document_tags = document[pp.STACKEXCHANGE_TAGS_COLUM]
            
            tags_list = document_tags.split(sep=dtm_provider.TAG_SPLIT_separator)
            
            for tag in tags_list:
                
                #remove the closing tag (last item)
                tag_list.append(tag[:-1])
        #append the list of document tags to all_tag_list        
        all_tag_list.append(tag_list)
        
        i += 1
        
        if i > 10000:
            break
    
    mlb = MultiLabelBinarizer()
    tags_encoded = mlb.fit_transform(all_tag_list)

    
    print(len(doc_ids_list))
    
    splitted_dataset = cross_validation.train_test_split(doc_ids_list,tags_encoded,
                                               train_size=0.8, random_state=42, 
                                               stratify=tags_encoded)
开发者ID:davcem,项目名称:stackexchange_text_classification,代码行数:59,代码来源:classifier_inspect_splits.py


示例14: createDataMatrix

def createDataMatrix(ngram_features, character_gram_features,tweetText, pos, pos_features, different_pos_tags, pos_text, voca_clusters, categories):
    tokenizer_case_preserve = Tokenizer(preserve_case=True)
    tokenizer = Tokenizer(preserve_case=False)
    handmade_features, cll, cll2 = [], [], []
    for tweet in tweetText:
        feat = []
        feat.append(exclamations(tweet))
        feat.append(questions(tweet))
        feat.append(questions_and_exclamation(tweet))
        feat.append(emoticon_negative(tweet))
        feat.append(emoticon_positive(tweet))
        words = tokenizer_case_preserve.tokenize(tweet) #preserving casing
        feat.append(allCaps(words))
        feat.append(elongated(words))
        feat.append(questions_and_exclamation(words[-1]))
        handmade_features.append(np.array(feat))
        words = tokenizer.tokenize(tweet)
        words = [word.strip("_NEG") for word in words]
        cll.append(getClusters(voca_clusters, words))
        #cll2.append(getClusters(voca_handmade, words))


    bl = csr_matrix(bing_lius(tweetText, pos, different_pos_tags, pos_text))
    nrc_emo = csr_matrix(nrc_emotion(tweetText, pos, different_pos_tags, pos_text ))
    mpqa_feat = csr_matrix(mpqa(tweetText,pos, different_pos_tags, pos_text))
    handmade_features = np.array(handmade_features)
    mlb = MultiLabelBinarizer(sparse_output=True, classes = list(set(voca_clusters.values())))
    cluster_memberships_binarized = csr_matrix(mlb.fit_transform(cll))
    #mlb = MultiLabelBinarizer(sparse_output=True, classes = list(set(voca_handmade.values())))
    #cluster_memberships_binarized_2 = csr_matrix(mlb.fit_transform(cll2))
    
    hasht = csr_matrix(sent140aff(tweetText, pos, different_pos_tags, pos_text, '../lexicons/HashtagSentimentAffLexNegLex/HS-AFFLEX-NEGLEX-unigrams.txt'))
#    sent140aff_data = csr_matrix(sent140aff(tweetText, pos, different_pos_tags, pos_text, '../../lexicons/Sentiment140AffLexNegLex/S140-AFFLEX-NEGLEX-unigrams.txt'))
    hasht_bigrams=csr_matrix(sent140aff_bigrams(tweetText, pos, different_pos_tags, pos_text, '../lexicons/HashtagSentimentAffLexNegLex/HS-AFFLEX-NEGLEX-bigrams.txt'))
#    sent140affBigrams=csr_matrix(sent140aff_bigrams(tweetText, pos, different_pos_tags, pos_text, '../../lexicons/Sentiment140AffLexNegLex/S140-AFFLEX-NEGLEX-bigrams.txt'))
    sentQ = csr_matrix(get_sentiwordnet(pos_text, pos))
    pos_features = csr_matrix(pos_features)
    handmade_features = csr_matrix(handmade_features)
    # ffeatures = scipy.sparse.hstack((ngram_features, character_gram_features, cluster_memberships_binarized, handmade_features, pos_features, 
#                             sent140affBigrams, hasht_bigrams, hasht, sent140aff_data, bl, mpqa_feat, nrc_emo), dtype=float)
#    ffeatures = scipy.sparse.hstack((ngram_features, character_gram_features, cluster_memberships_binarized, handmade_features, pos_features, sent140affBigrams, hasht_bigrams, hasht, sent140aff_data, bl, mpqa_feat, nrc_emo), dtype=float)
    ffeatures = scipy.sparse.hstack((ngram_features, character_gram_features, sentQ, handmade_features, pos_features, cluster_memberships_binarized, bl, mpqa_feat, nrc_emo, hasht, hasht_bigrams ), dtype=float)

#     print ngram_features.shape, character_gram_features.shape, cluster_memberships_binarized.shape, handmade_features.shape, pos_features.shape, 
#     sent140affBigrams.shape, hasht_bigrams, hasht.shape, sent140aff_data.shape, bl.shape, mpqa_feat.shape, nrc_emo.shape
    y=[]
    for i in categories:
        if i=='positive':
            y.append(1)
        elif i == 'negative':
            y.append(-1)
        elif i == 'UNKNOWN':
            y.append(0)
        else:
            print i
    ffeatures = normalize(ffeatures)
#     ffeatures, y = shuffle(ffeatures,y)
    return ffeatures, y
开发者ID:balikasg,项目名称:SemEval2016-Twitter_Sentiment_Evaluation,代码行数:58,代码来源:my_utils.py


示例15: xval

def xval(clf, x, y, train_index, test_index):
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]
    clf.fit(x_train, y_train)
    mlb = MultiLabelBinarizer()
    y_pred = clf.predict_proba(x_test)
    mse = mean_squared_error(mlb.fit_transform(label_binarize(y_test, clf.classes_)), y_pred)
    acc = accuracy_score(y_test, y_pred.argmax(axis=1))
    evals = clf.get_num_evals()
    return mse, acc, evals
开发者ID:shehzadqureshi,项目名称:NeuralNetDynamicOSI,代码行数:10,代码来源:test_basic_4bit_cv.py


示例16: print_report

def print_report(name_classificator, testing_problems, testing_tags, predicted_problems, predicted_tags):
    predicted_problems, predicted_tags = make_right_order(testing_problems, predicted_problems, predicted_tags)
    mlb = MultiLabelBinarizer().fit(testing_tags + predicted_tags)
    testing_tags = mlb.transform(testing_tags)
    predicted_tags = mlb.transform(predicted_tags)
    print(name_classificator)
    print(classification_report(testing_tags, predicted_tags, target_names=mlb.classes_))
    print('label ranking average precision score =',
          label_ranking_average_precision_score(testing_tags, predicted_tags))
    print('\n', ('#'*100), '\n')
开发者ID:morojenoe,项目名称:classificator,代码行数:10,代码来源:report.py


示例17: test_BRKnna_predict_dense

    def test_BRKnna_predict_dense(self):
        data = csr.csr_matrix([[0, 1], [1, 1], [1, 1.1], [0.5, 1]])
        train_ids = [['lid0', 'lid1'], ['lid2', 'lid3'], ['lid4', 'lid3'], ['lid4', 'lid5']]
        mlb = MultiLabelBinarizer()
        y = mlb.fit_transform(train_ids)

        knn = BRKNeighborsClassifier(threshold=0.5, n_neighbors=3, mode='a')
        knn.fit(data, y)

        pred = knn.predict(csr.csr_matrix([[1.1, 1.1]])).todense()
        np.testing.assert_array_equal([[0, 0, 0, 1, 1, 0]], pred)
开发者ID:quadflor,项目名称:Quadflor,代码行数:11,代码来源:test_BRKNN.py


示例18: test_BRKnna_no_labels_take_closest

    def test_BRKnna_no_labels_take_closest(self):
        data = csr.csr_matrix([[0, 1], [1, 1], [1, 1.1], [0, 1]])
        train_ids = [['lid0', 'lid1'], ['lid2', 'lid3'], ['lid2', 'lid3'], ['lid0', 'lid5']]
        mlb = MultiLabelBinarizer(sparse_output=True)
        y = mlb.fit_transform(train_ids)
        knn = BRKNeighborsClassifier(n_neighbors=2, threshold=0.6, mode='a')
        knn.fit(data, y)

        pred = knn.predict(csr.csr_matrix([[0, 1]])).todense()
        print(pred)
        np.testing.assert_array_equal([[1, 0, 0, 0, 0]], pred)
开发者ID:quadflor,项目名称:Quadflor,代码行数:11,代码来源:test_BRKNN.py


示例19: test_BRKnnb_predict_two_samples

    def test_BRKnnb_predict_two_samples(self):
        data = csr.csr_matrix([[0, 1], [1, 1.1], [1, 1], [0.5, 1]])
        train_ids = [['lid0', 'lid1'], ['lid0', 'lid1'], ['lid4', 'lid5'], ['lid4', 'lid5']]
        mlb = MultiLabelBinarizer(sparse_output=True)
        y = mlb.fit_transform(train_ids)

        knn = BRKNeighborsClassifier(mode='b', n_neighbors=3)
        knn.fit(data, y)

        pred = knn.predict(csr.csr_matrix([[0, 1], [2, 2]])).todense()
        np.testing.assert_array_equal([[1, 1, 0, 0], [0, 0, 1, 1]], pred)
开发者ID:quadflor,项目名称:Quadflor,代码行数:11,代码来源:test_BRKNN.py


示例20: run_classifierAccuracy

def run_classifierAccuracy(trainSentences, trainLabels, testSentences, testLabels):
	all_labels = ["Drought", "Earthquake", "Flood", "Epidemic", "Hurricane", \
			"Rebellion", "Terrorism", "Tornado", "Tsunami", "displaced_people_and_evacuations", \
			"donation_needs_or_offers_or_volunteering_services", "infrastructure_and_utilities_damage", \
			"injured_or_dead_people", "missing_trapped_or_found_people"]
	disaster_labels = ["Drought", "Earthquake", "Flood", "Hurricane", \
			"Tornado", "Tsunami", "displaced_people_and_evacuations", \
			"donation_needs_or_offers_or_volunteering_services", "infrastructure_and_utilities_damage", \
			"injured_or_dead_people", "missing_trapped_or_found_people"]
	health_labels = ["Epidemic", "displaced_people_and_evacuations", \
			"donation_needs_or_offers_or_volunteering_services", \
			"injured_or_dead_people"]
	conflict_labels = ["Rebellion", "Terrorism", "displaced_people_and_evacuations", \
			"infrastructure_and_utilities_damage", \
			"injured_or_dead_people", "missing_trapped_or_found_people"]
	import numpy as np
	curr_labels = all_labels

	trainLabels = [list(set(l).intersection(curr_labels)) for l in trainLabels]
	testLabels = [list(set(l).intersection(curr_labels))for l in testLabels]

	from sklearn.preprocessing import MultiLabelBinarizer
	mlb = MultiLabelBinarizer(classes=curr_labels)
	train_label_matrix = mlb.fit(trainLabels)
	print("Labels : ", mlb.classes_)
	train_label_matrix = mlb.transform(trainLabels)
	test_label_matrix = mlb.transform(testLabels)
	print("Shape of label matrix : ", test_label_matrix.shape)

	train_matrix, tfidf = tf_idf_fit_transform(trainSentences)
	test_matrix = tfidf.transform(testSentences)
	print("Shape of sentence matrix : ", test_matrix.shape)


	from sklearn.multiclass import OneVsRestClassifier
	from sklearn.svm import LinearSVC
	from sklearn.ensemble import RandomForestClassifier
	# estimator = LinearSVC()
	estimator = RandomForestClassifier(n_estimators=50, max_depth=None, min_samples_split=2, random_state=0, n_jobs = -1)
	classifier = OneVsRestClassifier(estimator, n_jobs=-1)
	classifier.fit(train_matrix, train_label_matrix)
	predictions = classifier.predict(test_matrix)

	from sklearn.metrics import f1_score, precision_score, recall_score
	print("Micro-Precision", precision_score(test_label_matrix, predictions, average='micro'))
	print("Micro-Recall", recall_score(test_label_matrix, predictions, average='micro'))
	print("Micro-F1", f1_score(test_label_matrix, predictions, average='micro'))
	print("Macro-Precision", precision_score(test_label_matrix, predictions, average='macro'))
	print("Macro-Recall", recall_score(test_label_matrix, predictions, average='macro'))
	print("Macro-F1", f1_score(test_label_matrix, predictions, average='macro'))
	print("Macro-Precision", precision_score(test_label_matrix, predictions, average=None))
	print("Macro-Recall", recall_score(test_label_matrix, predictions, average=None))
	print("Macro-F1", f1_score(test_label_matrix, predictions, average=None))
开发者ID:sarath1,项目名称:EventExtraction,代码行数:53,代码来源:sentence_classifier.py



注:本文中的sklearn.preprocessing.MultiLabelBinarizer类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python preprocessing.Normalizer类代码示例发布时间:2022-05-27
下一篇:
Python preprocessing.MinMaxScaler类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap