• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python tester.test_classifier函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中tester.test_classifier函数的典型用法代码示例。如果您正苦于以下问题:Python test_classifier函数的具体用法?Python test_classifier怎么用?Python test_classifier使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了test_classifier函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: tune_classifier

def tune_classifier(classifier, clf_params, max_features):
    ### features_list is a list of strings, each of which is a feature name.
    ### The first feature must be "poi".
    features_list = get_feature_list()

    ### Create new feature(s)
    ### Store to my_dataset for easy export below.
    my_dataset = get_data()

    ### Extract features and labels from dataset for local testing
    features_list = features_list[0:max_features+1]
    data, labels, features = get_features_and_labels(my_dataset, features_list)

    ### Tune your classifier to achieve better than .3 precision and recall
    ### using our testing script. Check the tester.py script in the final project
    ### folder for details on the evaluation method, especially the test_classifier
    ### function. Because of the small size of the dataset, the script uses
    ### stratified shuffle split cross validation. For more info:
    ### http://scikit-learn.org/stable/modules/generated/sklearn.cross_validation.StratifiedShuffleSplit.html

    from sklearn.cross_validation import train_test_split
    features_train, features_test, labels_train, labels_test = \
        train_test_split(features, labels, test_size=0.3, random_state=42)

    # Testing
    clf = GridSearchCV(classifier, param_grid=clf_params, scoring=make_scorer(f1_score))
    clf.fit(features_train, labels_train)
    clf_final = clf.best_estimator_
    print "The best estimator = ", clf_final
    test_classifier(clf_final, my_dataset, features_list, 1000)
开发者ID:tommysiu,项目名称:udacity-data-analyst,代码行数:30,代码来源:tuning.py


示例2: RandomForest

def RandomForest(feature_list,dataset):
    from sklearn.ensemble import RandomForestClassifier
    clf = RandomForestClassifier()
    test_classifier(clf,dataset,feature_list)
    imp= clf.feature_importances_
    print_importance (feature_list,imp)
    return clf
开发者ID:MengoDB,项目名称:Identify-Fraud-from-Enron-Emails,代码行数:7,代码来源:poi_id.py


示例3: GaussianNB

def GaussianNB(feature_list, dataset):
    from sklearn.naive_bayes import GaussianNB

    clf = GaussianNB()
    test_classifier(clf, dataset, feature_list)
    #score = clf.
    return clf
开发者ID:MengoDB,项目名称:ud120-projects,代码行数:7,代码来源:poi_bkp.py


示例4: decisionTree

def decisionTree(feature_list, dataset):
    from sklearn import tree

    clf = tree.DecisionTreeClassifier()
    test_classifier(clf, dataset, feature_list)
    print clf.feature_importances_
    return clf
开发者ID:MengoDB,项目名称:ud120-projects,代码行数:7,代码来源:poi_bkp.py


示例5: iterPipe

def iterPipe(num1, num2):
    for i in range(num1, num2 + 1):
        # estimators = [('scaling', StandardScaler()),('reduce_dim', PCA()), ('dtc', DTC(min_samples_split=i*2))]
        # estimators = [('reduce_dim', PCA(n_components=2)), ('dtc', DTC(min_samples_split=i))]
        # clfIter = Pipeline(estimators)
        # clfIter.set_params(reduce_dim__n_components=3)
        clfIter = DTC(min_samples_split=i)
        test_classifier(clfIter, my_dataset, features_list)
开发者ID:Faylfire,项目名称:identifying_enron_fraud_project_5_fang_lu,代码行数:8,代码来源:poi_id.py


示例6: KNN

def KNN(feature_list,dataset):
	from sklearn.neighbors import KNeighborsClassifier
	from sklearn.pipeline import Pipeline
	from sklearn.preprocessing import StandardScaler
	knn = KNeighborsClassifier()
	# feature scale
	estimators = [('scale', StandardScaler()), ('knn', knn)]
	clf = Pipeline(estimators)
	test_classifier(clf, my_dataset, features_list)
开发者ID:yielder,项目名称:identifying-fraud-from-enron-email,代码行数:9,代码来源:poi_id.py


示例7: setup_and_test

def setup_and_test(my_dataset, features_list, classifier):
    # Dump classifier and features list, so we can test them
    dump_classifier_and_data(classifier, my_dataset, features_list)

    # load up student's classifier, dataset, and feature_list
    clf, dataset, feature_list = load_classifier_and_data()
    # Run testing script
    test_classifier(clf, dataset, feature_list)

    return
开发者ID:joashxu,项目名称:enron,代码行数:10,代码来源:utils.py


示例8: tuneKmeans

def tuneKmeans(feature_list,dataset):
    from sklearn.cluster import KMeans
    from sklearn.grid_search import GridSearchCV
    km_clf = KMeans(n_clusters=2, tol=0.001)

    parameters = {'n_clusters': (2,10)}
    clf = GridSearchCV(km_clf, parameters, scoring='recall')
    test_classifier(clf, dataset, feature_list)
    print '###best_params'
    print clf.best_params_
    return clf.best_estimator_
开发者ID:MengoDB,项目名称:ud120-projects,代码行数:11,代码来源:poi_bkp.py


示例9: tuneDT

def tuneDT(feature_list,dataset):
	from sklearn.neighbors import KNeighborsClassifier
	from sklearn.grid_search import GridSearchCV
	from sklearn import tree
	tree_clf = tree.DecisionTreeClassifier()
	parameters = {'criterion':('gini', 'entropy'),
		'splitter':('best','random')}
	clf = GridSearchCV(tree_clf, parameters,scoring = 'recall')
	test_classifier(clf, my_dataset, features_list)
	print '###best_params'
	print clf.best_params_
开发者ID:yielder,项目名称:identifying-fraud-from-enron-email,代码行数:11,代码来源:poi_id.py


示例10: detect_poi

def detect_poi():
### Load the dictionary containing the dataset
    data_dict = pickle.load(open("final_project_dataset.pkl", "r") )
### Task 1: Remove outliers
    data_dict.pop('TOTAL',0)    
    
### Task 2: Select what features
### 'stk_pay_ratio','to_poi_ratio', 'from_poi_ratio','bonus_salary_ratio'
### features_list is a list of strings, each of which is a feature name.
### The first feature must be "poi".
    my_dataset = data_dict
    stk_pay_ratio(my_dataset)
    from_poi_ratio(my_dataset)
    to_poi_ratio(my_dataset)
    bonus_salary_ratio(my_dataset)
     
### Task 3: Feature Selection
### Generate a set of 15 feature lists from these 4 features
### This way, all possible combinations of these features are tested

    all_features_list = fList_set()

### Because of the small size of the dataset, the script uses stratified
### shuffle split cross validation in tester.py
    metrics = []    
    clf = GaussianNB()    
### ptest uses Stratified shuffle split cross validation and calculates the precision
### Find the precision for every list
    for i in range(0,15):
        metrics.append(ptest(clf,my_dataset,all_features_list[i]))
### Go for the feature list that produces the best precision.  
### For this dataset only, it is harder to get a high precision.
    best = np.array(metrics).argmax()  
    
### Run test_classifier to print evaluation metrics to console
    test_classifier(clf, my_dataset,all_features_list[best])

### Now use the same feature list to run the decison tree classifier
    features_list = all_features_list[best]
### Task 4: Try a varity of classifiers
    samples_split_values = [2,4]
    samples_leaf_values = [1,2]

    for split in samples_split_values:
        for leaf in samples_leaf_values:
            clf = tree.DecisionTreeClassifier(min_samples_split=split,\
            min_samples_leaf=leaf)
            test_classifier(clf, my_dataset, features_list)
            print_feature_importances(features_list, clf)
###Choose best classfier and feature set    
    clf = GaussianNB()   

### Dump classifier, dataset, and features_list
    dump_classifier_and_data(clf, my_dataset, features_list)
开发者ID:RaphaelTam,项目名称:Enron_Bad_Guys,代码行数:54,代码来源:poi.id.py


示例11: explore_scores

def explore_scores():
    for n in features:
        for c in n_neighbor:
            for d in weights:
                for e in algorithm:
                    for f in leaf_size:
                        for g in p:
                            for h in metric:
                                feature = 0
                                feature = features_select(n)
                                pipeline = Pipeline([('normalization', scaler), 
                                             ('classifier', KNeighborsClassifier(n_neighbors=c, weights=d, algorithm=e, 
                                                                                 leaf_size=f, p=g, metric=h))])
                                test_classifier(pipeline, enron_data, feature)
开发者ID:BlaneG,项目名称:Udacity_Intro_machine_learning,代码行数:14,代码来源:poi_id.py


示例12: tuneKNN

def tuneKNN(feature_list,dataset):
	from sklearn.neighbors import KNeighborsClassifier
	from sklearn.pipeline import Pipeline
	from sklearn.preprocessing import StandardScaler
	from sklearn.grid_search import GridSearchCV
	knn = KNeighborsClassifier()
	# feature scale
	estimators = [('scale', StandardScaler()), ('knn', knn)]
	pipeline = Pipeline(estimators)
	parameters = {'knn__n_neighbors':[1,8],
		'knn__algorithm':('ball_tree','kd_tree','brute','auto')}
	clf = GridSearchCV(pipeline, parameters,scoring = 'recall')
	test_classifier(clf, my_dataset, features_list)
	print '###best_params'
	print clf.best_params_
开发者ID:yielder,项目名称:identifying-fraud-from-enron-email,代码行数:15,代码来源:poi_id.py


示例13: getRF

def getRF():

    print "==============="
    print "RandomForests"
    print "==============="

    for score in scores:

        print score
        print

        #parameters = {'n_estimators':range(10, 150, 10), 'criterion':['gini', 'entropy'], 'min_samples_split':range(2, 8, 2)}
        parameters = {'rf__n_estimators':range(10, 150, 10), 'rf__criterion':['gini', 'entropy'], 'rf__min_samples_split':range(2, 8, 2), 
            'selector__k':range(3, 22, 1)}	

        gs = grid_search.GridSearchCV(rf_pipe, parameters, scoring=score, cv=cv)
            
        gs.fit(features, labels)

         #This is the model you pass to tester.py
        clf = gs.best_estimator_

        print " "
        print "Optimal Model - by Grid Search"
        print clf
        print " "

        best_parameters = gs.best_estimator_.get_params()

        print " "
        print "Best Parameters- by Grid Search"
        print best_parameters
        print " "

        labels_pred = gs.predict(features)

        # Print Results  (will print the Grid Search score)
        print "Grid Search Classification report:" 
        print " "
        print classification_report(labels, labels_pred)
        print ' ' 

        # Print Results  (will print the tester.py score)
        print "tester.py Classification report:" 
        print " "
        test_classifier(clf, my_dataset, features_list)
        print " "
        print
开发者ID:abdelrahmansaud,项目名称:Identifying-Fraud-from-Enron-Email,代码行数:48,代码来源:poi_id.py


示例14: getKNN

def getKNN():

    print "==============="
    print "KNeighborsClassifier"
    print "==============="

    for score in scores:

        print score
        print

        #parameters = {'n_neighbors':range(2, 10, 2), 'weights':['distance', 'uniform'], 'metric':['minkowski', 'euclidean']}
        parameters = {'knn__n_neighbors': range(2, 10, 2), 'knn__weights':['distance', 'uniform'], 'knn__metric':['minkowski', 'euclidean'], 
            'selector__k':range(3, 20, 1)}

        gs = grid_search.GridSearchCV(knn_pipe, parameters, scoring=score, cv=cv)

        gs.fit(features, labels)

         #This is the model you pass to tester.py
        clf = gs.best_estimator_

        print " "
        print "Optimal Model - by Grid Search"
        print clf
        print " "

        best_parameters = gs.best_estimator_.get_params()

        print " "
        print "Best Parameters- by Grid Search"
        print best_parameters
        print " "

        labels_pred = gs.predict(features)

        # Print Results  (will print the Grid Search score)
        print "Grid Search Classification report:" 
        print " "
        print classification_report(labels, labels_pred)
        print ' ' 

        # Print Results  (will print the tester.py score)
        print "tester.py Classification report:" 
        print " "
        test_classifier(clf, my_dataset, features_list)
        print " "
        print
开发者ID:abdelrahmansaud,项目名称:Identifying-Fraud-from-Enron-Email,代码行数:48,代码来源:poi_id.py


示例15: getAda

def getAda():
		
	print "==============="
	print "AdaBoost"
	print "==============="

	for score in scores:

		print score
		print

		#parameters = {'n_estimators':range(50, 100, 1), 'learning_rate':[x * 0.01 for x in range(100, 160, 1)]}
		parameters = {'ada__n_estimators': range(1, 100, 20), 'ada__learning_rate':[x * 0.01 for x in range(100, 160, 10)],
			'selector__k':range(3, 22, 1)}

		gs = grid_search.GridSearchCV(ada_pipe, parameters, scoring=score, cv=cv)

		gs.fit(features, labels)

		 #This is the model you pass to tester.py
		clf = gs.best_estimator_

		print " "
		print "Optimal Model - by Grid Search"
		print clf
		print " "

		best_parameters = gs.best_estimator_.get_params()

		print " "
		print "Best Parameters- by Grid Search"
		print best_parameters
		print " "

		labels_pred = gs.predict(features)

		# Print Results  (will print the Grid Search score)
		print "Grid Search Classification report:" 
		print " "
		print classification_report(labels, labels_pred)
		print ' ' 

		# Print Results  (will print the tester.py score)
		print "tester.py Classification report:" 
		print " "
		test_classifier(clf, my_dataset, features_list)
		print " "
		print
开发者ID:abdelrahmansaud,项目名称:Identifying-Fraud-from-Enron-Email,代码行数:48,代码来源:poi_id.py


示例16: getSVC

def getSVC():
		
	print "==============="
	print "SVC"
	print "==============="

	for score in scores:

		print score
		print

		parameters = {'sv__C': [0.01, 0.1, 1, 500, 1000, 5000, 10000, 50000, 100000], 'sv__kernel':['linear'],
			'selector__k':range(3, 22, 1)} #'sv__gamma':[0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1, 1, 10, 100, 500, 1000], 

		gs = grid_search.GridSearchCV(sv_pipe, parameters, scoring=score, cv=cv)

		gs.fit(features, labels)

		 #This is the model you pass to tester.py
		clf = gs.best_estimator_

		print " "
		print "Optimal Model - by Grid Search"
		print clf
		print " "

		best_parameters = gs.best_estimator_.get_params()

		print " "
		print "Best Parameters- by Grid Search"
		print best_parameters
		print " "

		labels_pred = gs.predict(features)

		# Print Results  (will print the Grid Search score)
		print "Grid Search Classification report:" 
		print " "
		print classification_report(labels, labels_pred)
		print ' ' 

		# Print Results  (will print the tester.py score)
		print "tester.py Classification report:" 
		print " "
		test_classifier(clf, my_dataset, features_list)
		print " "
		print
开发者ID:abdelrahmansaud,项目名称:Identifying-Fraud-from-Enron-Email,代码行数:47,代码来源:poi_id.py


示例17: getNB

def getNB():

	print "==============="
	print "GaussianNB"
	print "==============="

	for score in scores:

		print score
		print

		parameters = {'selector__k':range(3, 22, 1)}	

		gs = grid_search.GridSearchCV(nb_pipe, parameters, scoring=score, cv=cv)
			
		gs.fit(features, labels)

		 #This is the model you pass to tester.py
		clf = gs.best_estimator_

		print " "
		print "Optimal Model - by Grid Search"
		print clf
		print " "

		best_parameters = gs.best_estimator_.get_params()

		print " "
		print "Best Parameters- by Grid Search"
		print best_parameters
		print " "

		labels_pred = gs.predict(features)

		# Print Results  (will print the Grid Search score)
		print "Grid Search Classification report:" 
		print " "
		print classification_report(labels, labels_pred)
		print ' ' 

		# Print Results  (will print the tester.py score)
		print "tester.py Classification report:" 
		print " "
		test_classifier(clf, my_dataset, features_list)
		print " "
		print
开发者ID:abdelrahmansaud,项目名称:Identifying-Fraud-from-Enron-Email,代码行数:46,代码来源:poi_id.py


示例18: train_test

 def train_test():
 
     data = featureFormat(my_dataset, features_list, sort_keys = True)
     labels, features = targetFeatureSplit(data)
     
     features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=0.3, random_state=42)
     
     clf = DecisionTreeClassifier(random_state=42)
     clf.fit(features_train, labels_train)
     print test_classifier(clf, my_dataset, features_list)
     
     ### Print feature importance in order
     
     features_imp = {}
     for i in xrange(len(features_list)-1):
         features_imp[features_list[1+i]] = clf.feature_importances_[i]
         
     pprint(sorted(features_imp.items(), key=operator.itemgetter(1),reverse=True))
开发者ID:nickreinerink,项目名称:Udacity-P5-Identity-Fraud-from-Enron-Email,代码行数:18,代码来源:new_features.py


示例19: main

def main():
    data_dict = pickle.load(open("final_project_dataset.pkl", "r"))
    my_dataset = data_dict
    my_dataset = AddFeatures(my_dataset)
    # Exclude using Discretion.
    Exc1 = ["email_address"]
    # Replaced by creating better versions of the features
    Exc2 = ["to_messages", "from_messages", "from_this_person_to_poi", "from_poi_to_this_person"]
    # Exclude because Highly Correlated with stronger features
    Exc3 = [
        "deferral_payments",
        "expenses",
        "deferred_income",
        "restricted_stock_deferred",
        "director_fees",
        "long_term_incentive",
        "bonus",
        "total_payments",
        "salary",
        "total_stock_value",
        "restricted_stock",
        "exercised_stock_options",
        "other",
    ]
    exclude = Exc1 + Exc2 + Exc3
    # QueryDataSet(my_dataset)
    # ShowCorrel(my_dataset)
    features_list = next(my_dataset.itervalues()).keys()
    for i in exclude:
        features_list.remove(i)
    features_list.insert(0, features_list.pop(features_list.index("poi")))
    data = featureFormat(my_dataset, features_list, sort_keys=True)
    ### Extract features and labels from dataset for local testing
    labels, features = targetFeatureSplit(data)
    features_train, features_test, labels_train, labels_test = train_test_split(
        features, labels, test_size=0.1, random_state=42, stratify=labels
    )
    # clf=TuneSVM(features, labels,features_list)
    # clf=TuneKNN(features, labels,features_list)
    # clf=NoTuneDT(features, labels,features_list)
    # clf=TuneDT(features,labels,features_list)
    features_list.insert(0, "poi")
    dump_classifier_and_data(clf, my_dataset, features_list)
    test_classifier(clf, my_dataset, features_list)
开发者ID:datalord123,项目名称:MachineLearning,代码行数:44,代码来源:poi_id.py


示例20: train_and_predict

    def train_and_predict(first,second):
        #trains the model and returns the value of desired evaluation metric
        
        features_list = ["poi",first,second]
        data = featureFormat(my_dataset, features_list, sort_keys = True)
        labels, features = targetFeatureSplit(data)

        from sklearn.naive_bayes import GaussianNB
        from sklearn import tree

        if dt:
            clf = tree.DecisionTreeClassifier()
        else:
            clf = GaussianNB()

        if f1:
            return test_classifier(clf, my_dataset, features_list,return_F1=True)
        else:
            return test_classifier(clf, my_dataset, features_list,return_precision=True)
开发者ID:reinson,项目名称:UdacityProject4_Identifying-Fraud-from-Enron-Email,代码行数:19,代码来源:poi_id.py



注:本文中的tester.test_classifier函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python tester.Tester类代码示例发布时间:2022-05-27
下一篇:
Python tester.dump_classifier_and_data函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap