本文整理汇总了Python中sklearn.tree.DecisionTreeClassifier类的典型用法代码示例。如果您正苦于以下问题:Python DecisionTreeClassifier类的具体用法?Python DecisionTreeClassifier怎么用?Python DecisionTreeClassifier使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了DecisionTreeClassifier类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: decision_tree_entropy
def decision_tree_entropy(training_data):
clf = DecisionTreeClassifier(criterion="entropy",random_state=0)
clf.fit(training_data[0], training_data[1])
#with open("/media/deeksha/e/Deeksha/Dropbox/Coursework/MachineLearning/HW3/entropy.dot", 'w') as f:
# f = tree.export_graphviz(clf, out_file=f)
print "entropy:Number of Nodes", clf.tree_.node_count
return clf
开发者ID:deekshachugh,项目名称:MachineLearning,代码行数:7,代码来源:DecisionTreeUsingGiniand+Entropy.py
示例2: __init__
class Transformer:
def __init__(self, use_PCA=True):
self._clf = DecisionTreeClassifier(min_samples_leaf=10)
self._idx = None
self._scaler = StandardScaler()
self._trans = PCA('mle')
self._use_PCA = use_PCA
def fit(self, X, y):
X = np.array(X)
self._clf.fit(X, y)
self._idx = filter(lambda x: self._clf.feature_importances_[x] > 0, \
range(len(self._clf.feature_importances_)))
new_set = [X[i][self._idx] for i in xrange(len(X))]
# new_set = self._scaler.fit_transform(new_set)
if self._use_PCA:
new_set = self._trans.fit_transform(new_set)
return new_set
def transform(self, features):
features = features[self._idx]
# features = self._scaler.transform(features.astype(float))
if self._use_PCA:
features = self._trans.transform(features)
return features
开发者ID:ItsLastDay,项目名称:Opinion-mining-from-reviews,代码行数:29,代码来源:solution.py
示例3: quize1
def quize1(data):
# 1. Select count of neighbors.Загрузите выборку из файла titanic.csv с помощью пакета Pandas.
# 2.Оставьте в выборке четыре признака: класс пассажира (Pclass), цену билета (Fare), возраст пассажира (Age) и его пол (Sex).
# 3.Обратите внимание, что признак Sex имеет строковые значения.
# 4.Выделите целевую переменную — она записана в столбце Survived.
# 5.В данных есть пропущенные значения — например, для некоторых пассажиров неизвестен их возраст.
# 6.Такие записи при чтении их в pandas принимают значение nan.
# Найдите все объекты, у которых есть пропущенные признаки, и удалите их из выборки.
# Обучите решающее дерево с параметром random_state=241 и остальными параметрами по умолчанию.
# Вычислите важности признаков и найдите два признака с
# наибольшей важностью. Их названия будут ответами для данной задачи
# (в качестве ответа укажите названия признаков через запятую или пробел, порядок не важен).
dataF = data[['Pclass', 'Fare', 'Age', 'Sex','Survived']]
dataF = dataF.dropna()
Y = dataF['Survived']
dataF = dataF[['Pclass', 'Fare', 'Age', 'Sex']]
clf = DecisionTreeClassifier(random_state=241)
dataF.loc[dataF['Sex'] != 'male', 'Sex'] = 0
dataF.loc[dataF['Sex'] == 'male', 'Sex'] = 1
print (dataF)
clf.fit(dataF, Y)
importances = clf.feature_importances_
print(importances)
# d = zip(dataF.columns, clf.feature_importanc_)
# print(d)
return
开发者ID:BlinJin,项目名称:Machine-Learning,代码行数:26,代码来源:decision_trees.py
示例4: evaluateDecisionTree
def evaluateDecisionTree(train_x,train_y,test_x,test_y):
clf = DecisionTreeClassifier(criterion='entropy',min_samples_leaf=5,max_depth=20)
clf.fit(train_x,train_y)
p = clf.predict_proba(test_x)[:,1]
auc = roc_auc_score(test_y,p)
plotAUC(test_y,clf.predict_proba(test_x)[:,1],'DT')
return auc
开发者ID:ds-ga-1001-final,项目名称:project,代码行数:7,代码来源:decision_tree.py
示例5: decision_tree
def decision_tree(train_bow,train_labels,test_bow,test_labels,bow_indexes):
print("Training decision tree")
dt_classifier=DecisionTreeClassifier()
dt_classifier.fit(train_bow,train_labels)
print("Testing decision tree")
test(dt_classifier,"dt",test_bow,test_labels,bow_indexes)
开发者ID:wangk1,项目名称:research,代码行数:7,代码来源:classifiers_func.py
示例6: train_adaboost
def train_adaboost(features, labels, learning_rate, n_lab, n_runs, n_estim, n_samples):
uniqLabels = np.unique(labels)
print 'Taking ', str(n_lab), ' labels'
uniqLabels = uniqLabels[:n_lab]
used_labels = uniqLabels
pbar = start_progressbar(len(uniqLabels), 'training adaboost for %i labels' %len(uniqLabels))
allLearners = []
for yy ,targetLab in enumerate(uniqLabels):
runs=[]
for rrr in xrange(n_runs):
#import ipdb;ipdb.set_trace()
feats,labs = get_binary_sets(features, labels, targetLab, n_samples)
#print 'fitting stump'
#import ipdb;ipdb.set_trace()
baseClf = DecisionTreeClassifier(max_depth=4, min_samples_leaf=10, min_samples_split=10)
baseClf.fit(feats, labs)
ada_real = AdaBoostClassifier( base_estimator=baseClf, learning_rate=learning_rate,
n_estimators=n_estim,
algorithm="SAMME.R")
#import ipdb;ipdb.set_trace()
runs.append(ada_real.fit(feats, labs))
allLearners.append(runs)
update_progressbar(pbar, yy)
end_progressbar(pbar)
return allLearners, used_labels
开发者ID:aarslan,项目名称:action_rec,代码行数:26,代码来源:classifier_wrappers.py
示例7: test_importances
def test_importances():
"""Check variable importances."""
X, y = datasets.make_classification(n_samples=2000,
n_features=10,
n_informative=3,
n_redundant=0,
n_repeated=0,
shuffle=False,
random_state=0)
for name, Tree in CLF_TREES.items():
clf = Tree(random_state=0)
clf.fit(X, y)
importances = clf.feature_importances_
n_important = np.sum(importances > 0.1)
assert_equal(importances.shape[0], 10, "Failed with {0}".format(name))
assert_equal(n_important, 3, "Failed with {0}".format(name))
X_new = clf.transform(X, threshold="mean")
assert_less(0, X_new.shape[1], "Failed with {0}".format(name))
assert_less(X_new.shape[1], X.shape[1], "Failed with {0}".format(name))
# Check on iris that importances are the same for all builders
clf = DecisionTreeClassifier(random_state=0)
clf.fit(iris.data, iris.target)
clf2 = DecisionTreeClassifier(random_state=0,
max_leaf_nodes=len(iris.data))
clf2.fit(iris.data, iris.target)
assert_array_equal(clf.feature_importances_,
clf2.feature_importances_)
开发者ID:Carol-Hu,项目名称:scikit-learn,代码行数:33,代码来源:test_tree.py
示例8: MultEstimator
class MultEstimator(BaseEstimator):
def __init__(self, categories):
self.categories = categories
def fit(self, X, y, **params):
self.models = {_: None for _ in self.categories}
self.tot_model = DecisionTreeClassifier(max_depth=8, min_samples_leaf=100)
categ = X[:, -1]
data = X[:, :-1]
self.tot_model.fit(data, y)
for c in self.models.keys():
mask = categ == c
m = DecisionTreeClassifier(max_depth=8, min_samples_leaf=100)
m.fit(data[mask], y[mask])
self.models[c] = m
def predict(self, X):
categ = X[:, -1]
data = X[:, :-1]
p = self.tot_model.predict(data)
for c in self.models.keys():
mask = categ == c
if mask.any():
p[mask] = self.models[c].predict(data[mask])
return p
def predict_proba(self, X):
categ = X[:, -1]
data = X[:, :-1]
p = self.tot_model.predict_proba(data)
for c in self.models.keys():
mask = categ == c
if mask.any():
p[mask] = self.models[c].predict_proba(data[mask])
return p
开发者ID:alfiya400,项目名称:kaggle-avitoDuplicatesDetection,代码行数:35,代码来源:model.py
示例9: main
def main(percentage):
"""Given a percentage for splitting the dataset, fit the training set and apply the rest as a test set."""
df = pd.read_csv('cellStrength.log')
df.drop('SSID', 1, inplace=True)
processed = preprocess(df)
location_col = processed[0].shape[1]-4
hash_to_location = {y:x for x,y in processed[1].items()}
df2, targets = encode_target(processed[0], location_col)
msk = np.random.rand(len(df)) < percentage
test = df2[~msk].copy()
train = df2[msk].copy()
open('golden.csv', 'w').write(','.join([hash_to_location[p] for p in test['Target'].tolist()]) + '\n' )
test.drop(186, 1, inplace=True)
test.drop('Target', 1, inplace=True)
features = list(df2.columns[:location_col]) + list(df2.columns[location_col+1:-1])
y = train['Target']
X = train[features]
dt = DecisionTreeClassifier(min_samples_split=3, random_state=99)
try:
dt.fit(X, y)
except ValueError:
return
predictions = dt.predict(test).tolist()
open('golden.csv', 'a').write(','.join([hash_to_location[p] for p in predictions]))
# get_code(dt, features, targets)
return get_accuracy('golden.csv')
开发者ID:elahi-arman,项目名称:Python,代码行数:34,代码来源:router_association.py
示例10: programmer_2
def programmer_2():
datafile = 'data/model.xls'
data = pd.read_excel(datafile)
data = data.as_matrix()
shuffle(data) # 随机打乱数据
# 设置训练数据比8:2
p = 0.8
train = data[:int(len(data) * p), :]
test = data[int(len(data) * p):, :]
# 构建CART决策树模型
treefile = 'tmp/tree.pkl'
tree = DecisionTreeClassifier()
tree.fit(train[:, :3], train[:, 3])
joblib.dump(tree, treefile)
cm_plot(train[:, 3], tree.predict(train[:, :3])).show() # 显示混淆矩阵可视化结果
# 注意到Scikit-Learn使用predict方法直接给出预测结果。
fpr, tpr, thresholds = roc_curve(
test[:, 3], tree.predict_proba(test[:, :3])[:, 1], pos_label=1)
plt.plot(fpr, tpr, linewidth=2, label='ROC of CART', color='green')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
# 设定边界范围
plt.ylim(0, 1.05)
plt.xlim(0, 1.05)
plt.legend(loc=4)
plt.show()
print(thresholds)
开发者ID:Ctipsy,项目名称:python_data_analysis_and_mining_action,代码行数:32,代码来源:code.py
示例11: test_graphviz_errors
def test_graphviz_errors():
# Check for errors of export_graphviz
clf = DecisionTreeClassifier(max_depth=3, min_samples_split=2)
# Check not-fitted decision tree error
out = StringIO()
assert_raises(NotFittedError, export_graphviz, clf, out)
clf.fit(X, y)
# Check if it errors when length of feature_names
# mismatches with number of features
message = ("Length of feature_names, "
"1 does not match number of features, 2")
assert_raise_message(ValueError, message, export_graphviz, clf, None,
feature_names=["a"])
message = ("Length of feature_names, "
"3 does not match number of features, 2")
assert_raise_message(ValueError, message, export_graphviz, clf, None,
feature_names=["a", "b", "c"])
# Check class_names error
out = StringIO()
assert_raises(IndexError, export_graphviz, clf, out, class_names=[])
# Check precision error
out = StringIO()
assert_raises_regex(ValueError, "should be greater or equal",
export_graphviz, clf, out, precision=-1)
assert_raises_regex(ValueError, "should be an integer",
export_graphviz, clf, out, precision="1")
开发者ID:Lavanya-Basavaraju,项目名称:scikit-learn,代码行数:32,代码来源:test_export.py
示例12: decision_trees
def decision_trees(features, labels):
classifier = DecisionTreeClassifier(random_state=0, criterion="entropy")
classifier.fit(features, labels)
scores = cross_validation.cross_val_score(
classifier, features, labels, cv=10, score_func=metrics.precision_recall_fscore_support
)
print_table("Decision Trees", numpy.around(numpy.mean(scores, axis=0), 2))
开发者ID:pelluch,项目名称:data-mining,代码行数:7,代码来源:main.py
示例13: text_learning_experiment
def text_learning_experiment(words_to_remove=[]):
from_sara = open("../text_learning/from_sara.txt", "r")
from_chris = open("../text_learning/from_chris.txt", "r")
word_data, authors = vectorize_emails(from_sara, from_chris, max_emails=300, words_to_remove=words_to_remove)
features_train, features_test, labels_train, labels_test = \
cross_validation.train_test_split(word_data, authors, test_size=0.1, random_state=42)
vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.5,
stop_words='english')
features_train = vectorizer.fit_transform(features_train)
features_test = vectorizer.transform(features_test).toarray()
features_train = features_train[:150].toarray()
labels_train = labels_train[:150]
clf = DecisionTreeClassifier()
clf.fit(features_train, labels_train)
predict_train = clf.predict(features_train)
predict_test = clf.predict(features_test)
print "train acc:", accuracy_score(labels_train, predict_train)
print "test acc: ", accuracy_score(labels_test, predict_test)
feature_index = np.argmax(clf.feature_importances_)
feature_importance = clf.feature_importances_[feature_index]
feature_name = vectorizer.get_feature_names()[feature_index]
print "Most important feature, and relative importance:", feature_name, ":", feature_importance
return feature_name, feature_importance
开发者ID:andrei-iusan,项目名称:ud120-projects,代码行数:25,代码来源:poi_id.py
示例14: train_dtc
def train_dtc(X, y):
"""
Create and train the Decision Tree Classifier.
"""
dtc = DecisionTreeClassifier()
dtc.fit(X, y)
return dtc
开发者ID:texaspandaa,项目名称:Text-Mining,代码行数:7,代码来源:1.py
示例15: decision_tree_prediction
def decision_tree_prediction(features_train, labels_train, features_test, ids):
X_train, X_test, y_train, y_test = cross_validation.train_test_split(features_train, labels_train, random_state=1301, stratify=labels_train, test_size=0.3)
clf = DecisionTreeClassifier(criterion='gini',
min_samples_split=10,
max_depth=10,
max_leaf_nodes=16,
max_features=2)
#clf_acc = clf.fit(X_train, y_train)
# print(clf.best_estimator_)
#feature_importance = clf.feature_importances_
#print (feature_importance)
#pred = clf_acc.predict_proba(X_test)[:,1]
#print (y_test, pred)
# acc = accuracy_score(y_test, pred)
# print ("Acc {}".format(acc))
clf = clf.fit(features_train, labels_train)
pred = clf.predict_proba(features_test)[:,1]
predictions_file = open("data/canivel_decision_tree.csv", "wb")
predictions_file_object = csv.writer(predictions_file)
predictions_file_object.writerow(["ID", "TARGET"])
predictions_file_object.writerows(zip(ids, pred))
predictions_file.close()
开发者ID:canivel,项目名称:Kaggle-Santander,代码行数:30,代码来源:regular_classifiers.py
示例16: main
def main():
data = run_game()
clf = DecisionTreeClassifier(criterion='entropy')
game_data = [[i[0], i[1]] for i in data]
profits = [i[2] for i in data]
clf.fit(game_data, profits)
with open('tree.dot', 'w') as dotfile:
export_graphviz(
clf,
dotfile,
feature_names=['coin', 'bet']
)
predictions_lose1 = [clf.predict([0, 0]) for x in xrange(100)]
predictions_lose2 = [clf.predict([0, 1]) for x in xrange(100)]
predictions_win = [clf.predict([1, 1]) for x in xrange(100)]
print 'All these profit predictions should be zero:'
print predictions_lose1
print 'Accuracy was', calculate_accuracy(predictions_lose1, np.array([0]))
print 'All these profit predictions should be zero:'
print predictions_lose2
print 'Accuracy was', calculate_accuracy(predictions_lose2, np.array([0]))
print 'All these profit predictions should be two:'
print predictions_win
print 'Accuracy was', calculate_accuracy(predictions_win, np.array([2]))
开发者ID:kimmobrunfeldt,项目名称:machine-learning,代码行数:32,代码来源:main.py
示例17: buildTree
def buildTree(options, treefile, dataFile = None):
dt = loadTree(treefile)
if dt is not None:
return dt
if dataFile is None:
raise ValueError("No data file specified")
dt = DecisionTreeClassifier(min_samples_split=20, random_state=99)
files = []
featureFrames = []
targetFrames = []
if os.path.isdir(dataFile):
files = getFiles(dataFile, ".csv")
else:
files.append(dataFile)
for _file in files:
print("Loading data %s" % _file)
(featureValues, targetValues, features, df) = loadData(_file, options)
featureFrames.append(featureValues)
targetFrames.append(targetValues)
dt.fit(pd.concat(featureFrames), pd.concat(targetFrames))
saveTree(treefile, dt)
print("Building graph")
visualize_tree(treefile, dt, features)
return dt
开发者ID:gbrian,项目名称:naive-machine-learning,代码行数:25,代码来源:searchml.py
示例18: main
def main(args):
exec "import main.pandas_talib.sig_%s as conf" % args.signame
build.work2(20, 'sp500Top50', args.signame)
df = base.get_merged(conf.__name__, yeod.get_sp500Top50())
df.to_csv("ta.csv")
tree = DecisionTreeClassifier()
feat_names = base.get_feat_names(df)
dfTrain = df[(df.date>='1970-01-01') & (df.date <='2009-12-31')]
npTrainFeat = dfTrain.loc[:,feat_names].values.copy()
npTrainLabel = dfTrain.loc[:,"label5"].values.copy()
npTrainLabel[npTrainLabel > 1.0] = 1
npTrainLabel[npTrainLabel < 1.0] = 0
tree.fit(npTrainFeat, npTrainLabel)
joblib.dump(tree, "tree.pkl", compress = 3)
dfTest = df[(df.date>='2010-01-01') & (df.date <='2099-12-31')]
npTestFeat = dfTest.loc[:, feat_names].values.copy()
npPred = tree.predict_proba(npTestFeat)
dfTest.loc[:,"pred"] = npPred[:,1]
print dfTest['pred'].head()
dfPos = dfTest[ dfTest['pred'] > 0.55 ]
print 1.0 * len(dfPos[dfPos['label5']>1]) / len(dfPos)
print 1.0 * len(dfTest[dfTest['label5']>1]) / len(dfTest)
开发者ID:hongbin0908,项目名称:pytrade,代码行数:31,代码来源:check_sig2.py
示例19: test_graphviz_errors
def test_graphviz_errors():
"""Check for errors of export_graphviz"""
clf = DecisionTreeClassifier(max_depth=3, min_samples_split=1)
clf.fit(X, y)
out = StringIO()
assert_raises(IndexError, export_graphviz, clf, out, feature_names=[])
开发者ID:Hydroinformatics-UNESCO-IHE,项目名称:scikit-learn,代码行数:7,代码来源:test_export.py
示例20: train
def train(self, X, Y):
N, D = X.shape
for t in xrange(self.boostrap_sample):
sampleX, sampleY = self.get_sample(X, Y)
clf = DecisionTreeClassifier(criterion="entropy", max_depth = 1)
clf.fit(sampleX, sampleY)
self.weak_clfs.append(clf)
开发者ID:MUforever,项目名称:Machine-Learning,代码行数:7,代码来源:Bagging.py
注:本文中的sklearn.tree.DecisionTreeClassifier类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论