本文整理汇总了Python中sklearn.preprocessing.LabelBinarizer类的典型用法代码示例。如果您正苦于以下问题:Python LabelBinarizer类的具体用法?Python LabelBinarizer怎么用?Python LabelBinarizer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了LabelBinarizer类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: BinaryRelevanceClassifier
class BinaryRelevanceClassifier(BaseEstimator, ClassifierMixin):
def __init__(self, estimator):
self.estimator = estimator
def fit(self, X, Y):
# binarize labels
self.bl = LabelBinarizer()
Y = self.bl.fit_transform(Y)
self.classes_ = self.bl.classes_
# create an estimator for each label
self.estimators_ = []
for i in xrange(self.bl.classes_.shape[0]):
estimator = clone(self.estimator)
estimator.fit(X, Y[:, i])
self.estimators_.append(estimator)
def predict(self, X):
self._check_is_fitted()
X = np.atleast_2d(X)
Y = np.empty((X.shape[0], self.classes_.shape[0]))
for i, estimator in enumerate(self.estimators_):
Y[:, i] = estimator.predict(X).T
return self.bl.inverse_transform(Y)
def _check_is_fitted(self):
if not hasattr(self, "estimators_"):
raise ValueError("The object hasn't been fitted yet!")
开发者ID:sunnyrjuneja,项目名称:ai_tidbits,代码行数:30,代码来源:binary_relevance_classifier.py
示例2: GBClassifier
class GBClassifier(_BaseGB, ClassifierMixin):
def __init__(self, estimator, n_estimators=100,
step_size="line_search", learning_rate=0.1,
loss="squared_hinge", subsample=1.0,
callback=None, random_state=None):
self.estimator = estimator
self.n_estimators = n_estimators
self.step_size = step_size
self.learning_rate = learning_rate
self.loss = loss
self.subsample = subsample
self.callback = callback
self.random_state = random_state
def _get_loss(self):
losses = dict(squared_hinge=_SquaredHingeLoss(),
log=_LogLoss())
return losses[self.loss]
def fit(self, X, y):
self._lb = LabelBinarizer(neg_label=-1)
Y = self._lb.fit_transform(y)
return super(GBClassifier, self).fit(X, Y)
def predict(self, X):
pred = self.decision_function(X)
return self._lb.inverse_transform(pred)
开发者ID:0x0all,项目名称:ivalice,代码行数:28,代码来源:gradient_boosting.py
示例3: test_multinomial_loss_ground_truth
def test_multinomial_loss_ground_truth():
# n_samples, n_features, n_classes = 4, 2, 3
n_classes = 3
X = np.array([[1.1, 2.2], [2.2, -4.4], [3.3, -2.2], [1.1, 1.1]])
y = np.array([0, 1, 2, 0])
lbin = LabelBinarizer()
Y_bin = lbin.fit_transform(y)
weights = np.array([[0.1, 0.2, 0.3], [1.1, 1.2, -1.3]])
intercept = np.array([1., 0, -.2])
sample_weights = np.array([0.8, 1, 1, 0.8])
prediction = np.dot(X, weights) + intercept
logsumexp_prediction = logsumexp(prediction, axis=1)
p = prediction - logsumexp_prediction[:, np.newaxis]
loss_1 = -(sample_weights[:, np.newaxis] * p * Y_bin).sum()
diff = sample_weights[:, np.newaxis] * (np.exp(p) - Y_bin)
grad_1 = np.dot(X.T, diff)
weights_intercept = np.vstack((weights, intercept)).T.ravel()
loss_2, grad_2, _ = _multinomial_loss_grad(weights_intercept, X, Y_bin,
0.0, sample_weights)
grad_2 = grad_2.reshape(n_classes, -1)
grad_2 = grad_2[:, :-1].T
assert_almost_equal(loss_1, loss_2)
assert_array_almost_equal(grad_1, grad_2)
# ground truth
loss_gt = 11.680360354325961
grad_gt = np.array([[-0.557487, -1.619151, +2.176638],
[-0.903942, +5.258745, -4.354803]])
assert_almost_equal(loss_1, loss_gt)
assert_array_almost_equal(grad_1, grad_gt)
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:34,代码来源:test_sag.py
示例4: Encoding
def Encoding(data, general_matrix=None):
encoder = LabelBinarizer()
count = 0
# encoding
for i in range(data.shape[1]):
if type(data[0, i]) == str:
count += 1
col = data[:, i]
unique = np.unique(col if general_matrix is None else general_matrix[:, i])
try:
encoder.fit(unique)
except:
pass
new_col = encoder.transform(col)
# split at i and i + 1
before, removed, after = np.hsplit(data, [i, i + 1])
# concatenate
data = np.concatenate((before, new_col, after), axis=1)
before, removed, after = np.hsplit(general_matrix, [i, i + 1])
general_matrix = np.concatenate((before, encoder.transform(general_matrix[:, i]), after), axis=1)
print "count : %d" % count
# return data
return data
开发者ID:nhanloukiala,项目名称:AppsOfDataAnalysis,代码行数:27,代码来源:cyber_attack_classification.py
示例5: display_image_predictions
def display_image_predictions(features, labels, predictions):
n_classes = 10
label_names = _load_label_names()
label_binarizer = LabelBinarizer()
label_binarizer.fit(range(n_classes))
label_ids = label_binarizer.inverse_transform(np.array(labels))
fig, axies = plt.subplots(nrows=4, ncols=2)
fig.tight_layout()
fig.suptitle('Softmax Predictions', fontsize=20, y=1.1)
n_predictions = 3
margin = 0.05
ind = np.arange(n_predictions)
width = (1. - 2. * margin) / n_predictions
for image_i, (feature, label_id, pred_indicies, pred_values) in enumerate(zip(features, label_ids, predictions.indices, predictions.values)):
pred_names = [label_names[pred_i] for pred_i in pred_indicies]
correct_name = label_names[label_id]
axies[image_i][0].imshow(feature*255)
axies[image_i][0].set_title(correct_name)
axies[image_i][0].set_axis_off()
axies[image_i][1].barh(ind + margin, pred_values[::-1], width)
axies[image_i][1].set_yticks(ind + margin)
axies[image_i][1].set_yticklabels(pred_names[::-1])
axies[image_i][1].set_xticks([0, 0.5, 1.0])
开发者ID:lpalum,项目名称:machine-learning,代码行数:28,代码来源:helper.py
示例6: transform
def transform(self, data_dict):
listOfUnits = ["kilogram", "kg", "gram", "[GMgmkK]?Hz", "liter", "ml",
"cup", "cm", "foot", "inch", "meter", "mg", "gallon", "milliliter", "[MGTmgtKk]B"]
regex = "[\d]+\.[\d]+(" + "[\b/,-]|".join(listOfUnits) + ")"
data = data_dict[self.key].str.extract(regex, flags = re.IGNORECASE, expand=False).str.lower()
lb = LabelBinarizer()
return lb.fit_transform(data.fillna(""))
开发者ID:zero0nee,项目名称:UnspscClassifier,代码行数:7,代码来源:transformers.py
示例7: bio_classification_report
def bio_classification_report(y_true, y_pred):
lb = LabelBinarizer()
y_true_combined = 1 - lb.fit_transform(list(chain.from_iterable(y_true)))
y_pred_combined = list(chain.from_iterable(y_pred))
tagset = set(lb.classes_) - {'O'}
tagset = sorted(tagset, key=lambda tag: tag.split('-', 1)[::-1])
class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)}
print 'True sum %d Pred sum %d Len %d' %(sum(y_true_combined), sum(y_pred_combined), len(y_pred_combined))
print "AUC\tP-R: %.4f\tROC: %.4f" % (average_precision_score(y_true_combined, y_pred_combined, average=None),
roc_auc_score(y_true_combined, y_pred_combined, average=None))
#plt.figure()
#fpr, tpr, thr = roc_curve(y_true_combined, y_pred_combined)
#area = auc(fpr, tpr)
#plt.plot(fpr, tpr, label='{area:.3f}'.format( area=area))
#plt.legend(loc=4)
#plt.savefig('sub3.jpg')
return classification_report(
1 - y_true_combined,
[0 if v > 0.1 else 1 for v in y_pred_combined],
labels=[class_indices[cls] for cls in tagset],
target_names=tagset,
)
开发者ID:lmxl,项目名称:hal,代码行数:25,代码来源:base_learner.py
示例8: CategoricalToNumerical
class CategoricalToNumerical(object):
def __init__(self, dimensionality_reducer=None, verify=True):
pass
"""Takes in a dimensionality reducer in order to convert categorical features into numerical.
"""
if dimensionality_reducer is None:
dimensionality_reducer = RandomizedPCA(1)
self.dimensionality_reducer = dimensionality_reducer
self.verify = verify
self.binarizer = LabelBinarizer()
def fit(self, X, y=None):
self._verify(X, self.verify)
binarized = self.binarizer.fit_transform(X)
self.dimensionality_reducer.fit(binarized)
def transform(self, X):
self._verify(X, False)
binarized = self.binarizer.transform(X)
result = self.dimensionality_reducer.transform(binarized).flatten()
assert X.shape == result.shape
return result
def fit_transform(self, X, y=None):
self.fit(X)
return self.transform(X)
def _verify(self, X, verify):
if verify:
assert is_categorical(X)
else:
assert isinstance(X, np.ndarray)
assert len(X.shape) == 1
开发者ID:Diviyan-Kalainathan,项目名称:causal-humans,代码行数:34,代码来源:convert.py
示例9: iris_demo
def iris_demo():
# load the iris dataset
iris = load_iris()
X = iris['data']
y_labels = iris['target']
lb = LabelBinarizer()
y = lb.fit_transform(y_labels)
# split into training, validation and test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.25,
random_state=RANDOM_STATE)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train,
test_size=0.25,
random_state=RANDOM_STATE)
# train the neural net
print("Building logistic regression classifier to classify iris data")
nn = pynn.ArtificialNeuralNet([X_train.shape[1], 20, y_train.shape[1]])
print("Training")
nn.fit(X_train, y_train, X_valid, y_valid,
batch_size=20, n_epochs=20, learning_rate=0.05,
random_state=RANDOM_STATE)
y_pred = nn.predict(X_test)
print("iris accuracy: {}%".format(
accuracy_score(y_test.argmax(1), y_pred.argmax(1)) * 100))
开发者ID:benjamin-croker,项目名称:pynn,代码行数:30,代码来源:example.py
示例10: get_dataset2
def get_dataset2(test_fraction):
"""
@:param: test_fraction used to split train and test
Vectorizes the features and labels into categorical values and randomly splits into train and test set
:return: X_train, X_test, y_train, y_test
"""
data = []
with open('labels.csv', 'r') as datafile:
csv_reader = csv.reader(datafile, delimiter=',', quotechar='|')
for row in csv_reader:
data.append(row)
data = numpy.asarray(data)
X = data[:, 0:data.shape[1]-1]
y = data[:, data.shape[1]-1]
# X,y = get_tabledata()
vec = DictVectorizer()
feature_dict = [dict(enumerate(x)) for x in X.tolist()]
X = vec.fit_transform(feature_dict).toarray()
joblib.dump(vec, 'vectorizer.pkl')
lb = LabelBinarizer()
y = lb.fit_transform(y)
joblib.dump(lb, 'binarizer.pkl')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_fraction)
return X_train, X_test, y_train, y_test
开发者ID:spatial-computing,项目名称:strabo-learning-ocr-transformation-hpc,代码行数:28,代码来源:Data.py
示例11: bio_classification_report
def bio_classification_report(y_true, y_pred):
"""Evaluates entity extraction accuracy.
Classification report for a list of BIO-encoded sequences.
It computes token-level metrics and discards "O" labels.
Note that it requires scikit-learn 0.15+ (or a version from github master)
to calculate averages properly!
Taken from https://github.com/scrapinghub/python-crfsuite/blob/master/examples/CoNLL%202002.ipynb
"""
from sklearn.preprocessing import LabelBinarizer
from itertools import chain
from sklearn.metrics import classification_report
lb = LabelBinarizer()
y_true_combined = lb.fit_transform(list(chain.from_iterable(y_true)))
y_pred_combined = lb.transform(list(chain.from_iterable(y_pred)))
tagset = set(lb.classes_) - {'O'}
tagset = sorted(tagset, key=lambda tag: tag.split('-', 1)[::-1])
class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)}
return classification_report(
y_true_combined,
y_pred_combined,
labels=[class_indices[cls] for cls in tagset],
target_names=tagset,
)
开发者ID:dhpollack,项目名称:rasa_nlu,代码行数:27,代码来源:crf_entity_extractor.py
示例12: scorer_auc
def scorer_auc(y_true, y_pred):
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import LabelBinarizer
"""Dedicated to 2class probabilistic outputs"""
le = LabelBinarizer()
y_true = le.fit_transform(y_true)
return roc_auc_score(y_true, y_pred)
开发者ID:SherazKhan,项目名称:Paris_orientation-decoding,代码行数:7,代码来源:base.py
示例13: binarize_label_columns
def binarize_label_columns(df, columns, two_classes_as='single'):
'''
Inputs:
df: Pandas dataframe object.
columns: Columns to binarize.
tow_classes_as: How to handle two classes, as 'single' or 'multiple' columns.
Returns a tuple with the following items:
df: Pandas dataframe object with new columns.
binlabel_names: Names of the newly created binary variables.
lb_objects: a dictionary with columns as keys and sklear.LabelBinarizer
objects as values.
'''
binlabel_names = []
lb_objects = {}
for col in columns:
if len(df[col].unique()) > 1:
rows_notnull = df[col].notnull() # Use only valid feature observations
lb = LabelBinarizer()
binclass = lb.fit_transform(df[col][rows_notnull]) # Fit & transform on valid observations
if len(lb.classes_) == 2 and two_classes_as == 'multiple':
binclass = np.hstack((1 - binclass, binclass))
lb_objects[col] = lb
if len(lb.classes_) > 2 or two_classes_as == 'multiple':
col_binlabel_names = [col+'_'+str(c) for c in lb.classes_]
binlabel_names += col_binlabel_names # Names for the binarized classes
for n in col_binlabel_names: df[n] = np.NaN # Initialize columns
df.loc[rows_notnull, col_binlabel_names] = binclass # Merge binarized data
elif two_classes_as == 'single':
binlabel_names.append(col+'_bin') # Names for the binarized classes
df[col+'_bin'] = np.NaN # Initialize columns
df.loc[rows_notnull, col+'_bin'] = binclass # Merge binarized data
return df, binlabel_names, lb_objects
开发者ID:jonathan1296,项目名称:Data-Science-Lectures,代码行数:32,代码来源:start_here.py
示例14: one_hot_encoding
def one_hot_encoding(y_train, y_test):
labelBinarizer = LabelBinarizer()
labelBinarizer.fit(y_train)
y_train_one_hot = labelBinarizer.transform(y_train)
y_test_one_hot = labelBinarizer.transform(y_test)
return y_train_one_hot, y_test_one_hot
开发者ID:dzungcamlang,项目名称:Traffic-Signs,代码行数:7,代码来源:util.py
示例15: full_matrix
def full_matrix(dropped):
# create initial matrix
print('starting with m0')
lb = LabelBinarizer(sparse_output=True)
# m = lb.fit_transform(dropped.restaurant_id)
m = lb.fit_transform(dropped.user_name)
print(m.shape)
# build matrix
# making nan its own category for categorical
print("adding categorical to matrix")
m = add_categorical_to_matrix(m, dropped, ['review_stars', 'user_name', 'restaurant_stars', 'restaurant_attributes_ages_allowed', 'restaurant_attributes_alcohol', 'restaurant_attributes_attire', 'restaurant_attributes_byob_corkage', 'restaurant_attributes_noise_level', 'restaurant_attributes_smoking', 'restaurant_attributes_wifi', 'restaurant_city', 'restaurant_hours_friday_close', 'restaurant_hours_friday_open', 'restaurant_hours_monday_close', 'restaurant_hours_monday_open', 'restaurant_hours_saturday_close', 'restaurant_hours_saturday_open', 'restaurant_hours_sunday_close', 'restaurant_hours_sunday_open', 'restaurant_hours_thursday_close', 'restaurant_hours_thursday_open', 'restaurant_hours_tuesday_close', 'restaurant_hours_tuesday_open', 'restaurant_hours_wednesday_close', 'restaurant_hours_wednesday_open', 'restaurant_ambience', 'restaurant_music', 'restaurant_parking', 'restaurant_street', 'restaurant_zipcode', 'inspection_year', 'inspection_month', 'inspection_day', 'inspection_dayofweek', 'inspection_quarter',])
print(m.shape)
print("adding bool to matrix")
m = add_categorical_to_matrix(m, dropped, ['restaurant_attributes_accepts_credit_cards', 'restaurant_attributes_byob', 'restaurant_attributes_caters', 'restaurant_attributes_coat_check', 'restaurant_attributes_corkage', 'restaurant_attributes_delivery', 'restaurant_attributes_dietary_restrictions_dairy_free', 'restaurant_attributes_dietary_restrictions_gluten_free', 'restaurant_attributes_dietary_restrictions_halal', 'restaurant_attributes_dietary_restrictions_kosher', 'restaurant_attributes_dietary_restrictions_soy_free', 'restaurant_attributes_dietary_restrictions_vegan', 'restaurant_attributes_dietary_restrictions_vegetarian', 'restaurant_attributes_dogs_allowed', 'restaurant_attributes_drive_thr', 'restaurant_attributes_good_for_dancing', 'restaurant_attributes_good_for_groups', 'restaurant_attributes_good_for_breakfast', 'restaurant_attributes_good_for_brunch', 'restaurant_attributes_good_for_dessert', 'restaurant_attributes_good_for_dinner', 'restaurant_attributes_good_for_latenight', 'restaurant_attributes_good_for_lunch', 'restaurant_attributes_good_for_kids', 'restaurant_attributes_happy_hour', 'restaurant_attributes_has_tv', 'restaurant_attributes_open_24_hours', 'restaurant_attributes_order_at_counter', 'restaurant_attributes_outdoor_seating', 'restaurant_attributes_payment_types_amex', 'restaurant_attributes_payment_types_cash_only', 'restaurant_attributes_payment_types_discover', 'restaurant_attributes_payment_types_mastercard', 'restaurant_attributes_payment_types_visa', 'restaurant_attributes_take_out', 'restaurant_attributes_takes_reservations', 'restaurant_attributes_waiter_service', 'restaurant_attributes_wheelchair_accessible', ])
print(m.shape)
m = add_numerical_to_matrix(m, dropped, ['review_votes_cool', 'review_votes_funny', 'review_votes_useful', 'user_average_stars', 'user_compliments_cool', 'user_compliments_cute', 'user_compliments_funny', 'user_compliments_hot', 'user_compliments_list', 'user_compliments_more', 'user_compliments_note', 'user_compliments_photos', 'user_compliments_plain', 'user_compliments_profile', 'user_compliments_writer', 'user_fans', 'user_review_count', 'user_votes_cool', 'user_votes_funny', 'user_votes_useful', 'restaurant_attributes_price_range', 'restaurant_latitude', 'restaurant_longitude', 'restaurant_review_count', 'checkin_counts', 'review_delta', 'previous_inspection_delta', 'polarity', 'subjectivity', 'neg', 'neu', 'pos', 'compound', 'user_yelping_since_delta','manager', 'supervisor', 'training', 'safety', 'disease', 'ill', 'sick', 'poisoning', 'hygiene', 'raw', 'undercooked', 'cold', 'clean', 'sanitary', 'wash', 'jaundice', 'yellow', 'hazard', 'inspection', 'violation', 'gloves', 'hairnet', 'nails', 'jewelry', 'sneeze', 'cough', 'runny', 'illegal', 'rotten', 'dirty', 'mouse', 'cockroach', 'contaminated', 'gross', 'disgusting', 'stink', 'old', 'parasite', 'reheat', 'frozen', 'broken', 'drip', 'bathroom', 'toilet', 'leak', 'trash', 'dark', 'lights', 'dust', 'puddle', 'pesticide', 'bugs', 'mold'])
print(m.shape)
print("adding restaurant categories to matrix")
cats = ['restaurant_category_1', 'restaurant_category_2', 'restaurant_category_3', 'restaurant_category_4', 'restaurant_category_5', 'restaurant_category_6', 'restaurant_category_7']
m = special_categories_to_matrix(m, dropped, cats)
print(m.shape)
print("adding restaurant neighborhoods to matrix")
cats = ['restaurant_neighborhood_1', 'restaurant_neighborhood_2', 'restaurant_neighborhood_3']
m = special_categories_to_matrix(m, dropped, cats)
print(m.shape)
print("matrix shape of {}".format(m.shape))
joblib.dump(m, 'pickle_jar/full_matrix')
开发者ID:potatochip,项目名称:kojak,代码行数:32,代码来源:blue_pill.py
示例16: conv_demo
def conv_demo():
# load the digits dataset
digits = load_digits()
X = digits['data']
y_labels = digits['target']
lb = LabelBinarizer()
y = lb.fit_transform(y_labels)
# split into training, validation and test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.25,
random_state=RANDOM_STATE)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train,
test_size=0.25,
random_state=RANDOM_STATE)
# train the neural net
print("Building neural net to classify digits")
conv_net = pynn.ConvNet(digits['images'][0].shape, 1, y.shape[1],
random_state=RANDOM_STATE)
print("Training")
conv_net.fit(X_train, y_train, X_valid, y_valid,
batch_size=20, n_epochs=20, learning_rate=0.05)
y_pred = conv_net.predict(X_test)
print("digits accuracy: {}%".format(
accuracy_score(y_test.argmax(1), y_pred.argmax(1)) * 100))
开发者ID:benjamin-croker,项目名称:pynn,代码行数:30,代码来源:example.py
示例17: just_categorical
def just_categorical(dropped):
# create initial matrix
print('starting with m0')
lb = LabelBinarizer(sparse_output=True)
m = lb.fit_transform(dropped.restaurant_id)
print(m.shape)
# build matrix
# making nan its own category for categorical
print("adding categorical to matrix")
m = add_categorical_to_matrix(m, dropped, ['review_stars', 'user_name', 'restaurant_stars', 'restaurant_attributes_ages_allowed', 'restaurant_attributes_alcohol', 'restaurant_attributes_attire', 'restaurant_attributes_byob_corkage', 'restaurant_attributes_noise_level', 'restaurant_attributes_smoking', 'restaurant_attributes_wifi', 'restaurant_city', 'restaurant_hours_friday_close', 'restaurant_hours_friday_open', 'restaurant_hours_monday_close', 'restaurant_hours_monday_open', 'restaurant_hours_saturday_close', 'restaurant_hours_saturday_open', 'restaurant_hours_sunday_close', 'restaurant_hours_sunday_open', 'restaurant_hours_thursday_close', 'restaurant_hours_thursday_open', 'restaurant_hours_tuesday_close', 'restaurant_hours_tuesday_open', 'restaurant_hours_wednesday_close', 'restaurant_hours_wednesday_open', 'restaurant_ambience', 'restaurant_music', 'restaurant_parking', 'restaurant_street', 'restaurant_zipcode', 'inspection_year', 'inspection_month', 'inspection_day', 'inspection_dayofweek', 'inspection_quarter',])
print(m.shape)
print("adding bool to matrix")
m = add_categorical_to_matrix(m, dropped, ['restaurant_attributes_accepts_credit_cards', 'restaurant_attributes_byob', 'restaurant_attributes_caters', 'restaurant_attributes_coat_check', 'restaurant_attributes_corkage', 'restaurant_attributes_delivery', 'restaurant_attributes_dietary_restrictions_dairy_free', 'restaurant_attributes_dietary_restrictions_gluten_free', 'restaurant_attributes_dietary_restrictions_halal', 'restaurant_attributes_dietary_restrictions_kosher', 'restaurant_attributes_dietary_restrictions_soy_free', 'restaurant_attributes_dietary_restrictions_vegan', 'restaurant_attributes_dietary_restrictions_vegetarian', 'restaurant_attributes_dogs_allowed', 'restaurant_attributes_drive_thr', 'restaurant_attributes_good_for_dancing', 'restaurant_attributes_good_for_groups', 'restaurant_attributes_good_for_breakfast', 'restaurant_attributes_good_for_brunch', 'restaurant_attributes_good_for_dessert', 'restaurant_attributes_good_for_dinner', 'restaurant_attributes_good_for_latenight', 'restaurant_attributes_good_for_lunch', 'restaurant_attributes_good_for_kids', 'restaurant_attributes_happy_hour', 'restaurant_attributes_has_tv', 'restaurant_attributes_open_24_hours', 'restaurant_attributes_order_at_counter', 'restaurant_attributes_outdoor_seating', 'restaurant_attributes_payment_types_amex', 'restaurant_attributes_payment_types_cash_only', 'restaurant_attributes_payment_types_discover', 'restaurant_attributes_payment_types_mastercard', 'restaurant_attributes_payment_types_visa', 'restaurant_attributes_take_out', 'restaurant_attributes_takes_reservations', 'restaurant_attributes_waiter_service', 'restaurant_attributes_wheelchair_accessible', ])
print(m.shape)
print("adding restaurant categories to matrix")
cats = ['restaurant_category_1', 'restaurant_category_2', 'restaurant_category_3', 'restaurant_category_4', 'restaurant_category_5', 'restaurant_category_6', 'restaurant_category_7']
m = special_categories_to_matrix(m, dropped, cats)
print(m.shape)
print("adding restaurant neighborhoods to matrix")
cats = ['restaurant_neighborhood_1', 'restaurant_neighborhood_2', 'restaurant_neighborhood_3']
m = special_categories_to_matrix(m, dropped, cats)
print(m.shape)
print("matrix shape of {}".format(m.shape))
joblib.dump(m, 'pickle_jar/categorical_matrix')
开发者ID:potatochip,项目名称:kojak,代码行数:30,代码来源:blue_pill.py
示例18: BaseSGD
class BaseSGD(object):
def _get_loss(self):
losses = {
"modified_huber": ModifiedHuber(),
"hinge": Hinge(1.0),
"perceptron": Hinge(0.0),
"log": Log(),
"sparse_log": SparseLog(),
"squared": SquaredLoss(),
"huber": Huber(self.epsilon),
"epsilon_insensitive": EpsilonInsensitive(self.epsilon),
}
return losses[self.loss]
def _get_learning_rate(self):
learning_rates = {"constant": 1, "pegasos": 2, "invscaling": 3}
return learning_rates[self.learning_rate]
def _set_label_transformers(self, y):
if self.multiclass == "natural":
self.label_encoder_ = LabelEncoder()
y = self.label_encoder_.fit_transform(y).astype(np.float64)
self.label_binarizer_ = LabelBinarizer(neg_label=-1, pos_label=1)
self.label_binarizer_.fit(y)
self.classes_ = self.label_binarizer_.classes_.astype(np.int32)
n_classes = len(self.label_binarizer_.classes_)
n_vectors = 1 if n_classes <= 2 else n_classes
return n_classes, n_vectors
开发者ID:Raz0r,项目名称:lightning,代码行数:29,代码来源:sgd.py
示例19: binarize_seqfeature
def binarize_seqfeature(X):
"""
Binarizes the sequence features into 1s and 0s.
Parameters:
===========
- X: (pandas DataFrame) the sequence feature matrix without drug resistance values.
Returns:
========
- binarized: (pandas DataFrame) a binarized sequence feature matrix with columns corresponding to particular amino acids at each position.
- binarizers: (dictionary) a dictionary of binarizer objects for each position.
"""
binarized = pd.DataFrame()
binarizers = dict()
for col in X.columns:
lb = LabelBinarizer()
binarized_cols = lb.fit_transform(X[col])
if len(lb.classes_) == 2:
binarized[col] = pd.Series(binarized_cols[:, 0])
else:
for i, c in enumerate(lb.classes_):
binarized[col + "_" + c] = binarized_cols[:, i]
binarizers[col] = lb
return binarized, binarizers
开发者ID:nickleshill,项目名称:systems-microbiology-hiv,代码行数:26,代码来源:custom_funcs.py
示例20: run
def run():
# Load and preprocess data
label_to_unique_instance = load_data()
X, Y = preprocess_data(label_to_unique_instance)
# Encode labels
label_binarizer = LabelBinarizer()
transformed_Y = label_binarizer.fit_transform(Y)
# Cross validation
cross_validation_iterator = StratifiedShuffleSplit(Y, n_iter=1, test_size=0.4, random_state=0)
for train_index, test_index in cross_validation_iterator:
break
# Init model
model = init_model(raw_feature_dim=X.shape[-1], unique_lable_num=len(label_binarizer.classes_))
# Training procedure
model.fit(X[train_index], transformed_Y[train_index],
batch_size=BATCH_SIZE, nb_epoch=MAXIMUM_EPOCH_NUM,
validation_data=(X[test_index], transformed_Y[test_index]),
callbacks=[TensorBoard(log_dir="/tmp/Sequence Classification")],
verbose=2)
print("All done!")
开发者ID:nixingyang,项目名称:Miscellaneous-Projects,代码行数:25,代码来源:solution.py
注:本文中的sklearn.preprocessing.LabelBinarizer类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论