• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python classifiers.Evaluation类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中weka.classifiers.Evaluation的典型用法代码示例。如果您正苦于以下问题:Python Evaluation类的具体用法?Python Evaluation怎么用?Python Evaluation使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了Evaluation类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: use_classifier

def use_classifier(data, cli, args):
    cli = cli.format(cli, **args)
    cls = from_commandline(cli, classname="weka.classifiers.Classifier")
    cls.build_classifier(data)
    evaluation = Evaluation(data)
    evaluation.crossvalidate_model(cls, data, 10, Random(1))
    return cls, evaluation
开发者ID:orestisf1993,项目名称:pattern-recognition-assignments,代码行数:7,代码来源:weka-auto.py


示例2: myGridSearch

def myGridSearch(data,RBound,MBound):
    bestlogistic = None
    best_acc     = -float('inf')
    class bestValues(object):
        m = float('nan')
        r = float('nan')
    for r in range(RBound[0],RBound[1]+RBound[2],RBound[2]):
        for m in range(MBound[0],MBound[1]+MBound[2],MBound[2]):
            logistic = Logistic()
            logistic.setMaxIts(int(m))
            logistic.setRidge(pow(10,r))
            evaluation = Evaluation(data)
            output = util.get_buffer_for_predictions()[0]
            attRange = Range()  # no additional attributes output
            outputDistribution = Boolean(False)  # we don't want distribution
            random = Random(1)
            numFolds = min(10,data.numInstances())
            evaluation.crossValidateModel(logistic,data,numFolds,random,[output, attRange, outputDistribution])
            acc = evaluation.pctCorrect()
            if (acc>best_acc):
                bestlogistic = logistic
                best_acc = acc
                bestValues.m = int(m)
                bestValues.r = pow(10,r)
    print "Best accuracy: ", best_acc
    print "Best values:   M = ", bestValues.m, ", Ridge = ", bestValues.r
    print "-----------------------------------------"
    return bestlogistic, bestValues.r, bestValues.m, best_acc
开发者ID:kayhan-batmanghelich,项目名称:gondola,代码行数:28,代码来源:logistic.py


示例3: main

def main(args):
    """
    Loads a dataset, shuffles it, splits it into train/test set. Trains J48 with training set and
    evaluates the built model on the test set.
    :param args: the commandline arguments (optional, can be dataset filename)
    :type args: list
    """

    # load a dataset
    if len(args) <= 1:
        data_file = helper.get_data_dir() + os.sep + "vote.arff"
    else:
        data_file = args[1]
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(data_file)
    data.class_is_last()

    # generate train/test split of randomized data
    train, test = data.train_test_split(66.0, Random(1))

    # build classifier
    cls = Classifier(classname="weka.classifiers.trees.J48")
    cls.build_classifier(train)
    print(cls)

    # evaluate
    evl = Evaluation(train)
    evl.test_model(cls, test)
    print(evl.summary())
开发者ID:fracpete,项目名称:python-weka-wrapper3-examples,代码行数:30,代码来源:train_test_split.py


示例4: main

def main():
    """
    Shows how to use the CostSensitiveClassifier.
    """

    # load a dataset
    data_file = helper.get_data_dir() + os.sep + "diabetes.arff"
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(data_file)
    data.class_is_last()

    # classifier
    classifier = SingleClassifierEnhancer(
        classname="weka.classifiers.meta.CostSensitiveClassifier",
        options=["-cost-matrix", "[0 1; 2 0]", "-S", "2"])
    base = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"])
    classifier.classifier = base

    folds = 10
    evaluation = Evaluation(data)
    evaluation.crossvalidate_model(classifier, data, folds, Random(1))


    print("")
    print("=== Setup ===")
    print("Classifier: " + classifier.to_commandline())
    print("Dataset: " + data.relationname)
    print("")
    print(evaluation.summary("=== " + str(folds) + " -fold Cross-Validation ==="))
开发者ID:fracpete,项目名称:python-weka-wrapper3-examples,代码行数:30,代码来源:cost_sensitive.py


示例5: myGridSearch

def myGridSearch(data,NTreeBounds,NFeaturesBounds):
    best_acc = -float('inf')
    bestrandomforest = None
    class bestValues(object):
        t = float('nan')
        f = float('nan')
    for t in range(NTreeBounds[0],NTreeBounds[1]+NTreeBounds[2],NTreeBounds[2]):
        for f in range(NFeaturesBounds[0],NFeaturesBounds[1]+NFeaturesBounds[2],NFeaturesBounds[2]):
            randomforest = RandomForest()
            randomforest.setNumTrees(int(t))
            randomforest.setNumFeatures(int(f))
            evaluation = Evaluation(data)
            output = output = util.get_buffer_for_predictions()[0]
            attRange = Range()  # no additional attributes output
            outputDistribution = Boolean(False)  # we don't want distribution
            random = Random(1)
            numFolds = min(10,data.numInstances())
            evaluation.crossValidateModel(randomforest,data,numFolds,random,[output, attRange, outputDistribution])
            acc = evaluation.pctCorrect()
            if (acc>best_acc):
                bestrandomforest = randomforest
                best_acc = acc
                bestValues.t = t
                bestValues.f = f
    print "Best accuracy:", best_acc
    print "Best values:  NTreeBounds = ", bestValues.t, ", NFeaturesBounds = ", bestValues.f
    print "-----------------------------------------"
    return bestrandomforest, bestValues.t, bestValues.f, best_acc
开发者ID:kayhan-batmanghelich,项目名称:gondola,代码行数:28,代码来源:randomforest.py


示例6: use_classifier

def use_classifier(data_filename, cli):
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(data_filename)
    data.class_is_last()
    cls = from_commandline(cli, classname="weka.classifiers.Classifier")
    cls.build_classifier(data)
    evaluation = Evaluation(data)
    evaluation.crossvalidate_model(cls, data, 10, Random(1))
    return cls, evaluation
开发者ID:orestisf1993,项目名称:pattern-recognition-assignments,代码行数:9,代码来源:latex-generator.py


示例7: RandomForest_ParamFinder

def RandomForest_ParamFinder(data): 
    # possible set for Number of trees
    NTreeBounds = [1,20,1]
    # possible set for number of features
    NFeaturesBounds = [0,20,1]
    if (data.numInstances()>10):     # grid search does 10-fold cross validation; hence number of samples must be more than 10
        gridsearch = GridSearch()
        acctag = gridsearch.getEvaluation()
        acctag = SelectedTag('ACC',acctag.getTags())
        gridsearch.setEvaluation(acctag)
        allfilters = AllFilters()
        gridsearch.setFilter(allfilters)
        gridsearch.setGridIsExtendable(Boolean(True))
        randomforest = RandomForest()
        gridsearch.setClassifier(randomforest)
        gridsearch.setXProperty(String('classifier.numTrees'))
        gridsearch.setYProperty(String('classifier.numFeatures'))
        gridsearch.setXExpression(String('I'))
        gridsearch.setYExpression(String('I'))
        gridsearch.setXMin(NTreeBounds[0])
        gridsearch.setXMax(NTreeBounds[1])
        gridsearch.setXStep(NTreeBounds[2])
        gridsearch.setYMin(NFeaturesBounds[0])
        gridsearch.setYMax(NFeaturesBounds[1])
        gridsearch.setYStep(NFeaturesBounds[2])
        gridsearch.setYBase(10)
        print "searching for random-forest NumTrees = [", NTreeBounds[0], ",", NTreeBounds[1], "], NumFeatures = [ ", NFeaturesBounds[0], ",", NFeaturesBounds[1], "] ...."
        gridsearch.buildClassifier(data)
        bestValues = gridsearch.getValues()
        # -----------------------  Evaluation
        bestrandomforest = RandomForest()
        bestrandomforest.setNumTrees(int(bestValues.x))
        bestrandomforest.setNumFeatures(int(bestValues.y))
        evaluation = Evaluation(data)
        output = output = util.get_buffer_for_predictions()[0]
        attRange = Range()  # no additional attributes output
        outputDistribution = Boolean(False)  # we don't want distribution
        random = Random(1)
        numFolds = min(10,data.numInstances())
        evaluation.crossValidateModel(bestrandomforest,data,numFolds,random,[output, attRange, outputDistribution])
        acc = evaluation.pctCorrect()
        print "best accuracy: ", acc
        print "best random-forest classifier with NumTrees=",bestValues.x , ", NumFeatures = ", bestValues.y
        OptRndFrst = bestrandomforest
        OptRndFrstp1 = bestValues.x
        OptRndFrstp2 = bestValues.y
        OptRndFrstAcc = acc
    else:
        OptRndFrst, OptRndFrstp1, OptRndFrstp2, OptRndFrstAcc = myGridSearch(data,NTreeBounds,NFeaturesBounds) 
    Description = 'Random-Forest classifier: OptNumTrees = ' + str(OptRndFrstp1) + \
            ', OptNumFeatures = ' + str(OptRndFrstp2) + ', OptAcc = ' + str(OptRndFrstAcc)
    print "-----------------------------------------"
    return OptRndFrst, OptRndFrstp1, OptRndFrstp2, OptRndFrstAcc, Description
开发者ID:kayhan-batmanghelich,项目名称:gondola,代码行数:53,代码来源:randomforest.py


示例8: Logistic_ParamFinder

def Logistic_ParamFinder(data): 
    # Possible set for Ridge-value
    RBounds = [-10,2,1]
    # possible set for maximum Iteration
    MBounds = [-1,10,1]
    if (data.numInstances()>10):     # grid search does 10-fold cross validation; hence number of samples must be more than 10
        gridsearch = GridSearch()
        acctag = gridsearch.getEvaluation()
        acctag = SelectedTag('ACC',acctag.getTags())
        gridsearch.setEvaluation(acctag)
        allfilters = AllFilters()
        gridsearch.setFilter(allfilters)
        gridsearch.setGridIsExtendable(Boolean(True))
        logistic = Logistic()
        gridsearch.setClassifier(logistic)
        gridsearch.setXProperty(String('classifier.maxIts'))
        gridsearch.setYProperty(String('classifier.ridge'))
        gridsearch.setXExpression(String('I'))
        gridsearch.setYExpression(String('pow(BASE,I)'))
        gridsearch.setXMin(MBounds[0])
        gridsearch.setXMax(MBounds[1])
        gridsearch.setXStep(MBounds[2])
        gridsearch.setYMin(RBounds[0])
        gridsearch.setYMax(RBounds[1])
        gridsearch.setYStep(RBounds[2])
        gridsearch.setYBase(10)
        print "searching for logistic lcassifier Max Iteration = [", MBounds[0], ",", MBounds[1], "], Ridge = [ 10E", RBounds[0], ",10E", RBounds[1], "] ...."
        gridsearch.buildClassifier(data)
        bestValues = gridsearch.getValues()
        # -----------------------  Evaluation
        bestlogistic = Logistic()
        bestlogistic.setMaxIts(int(bestValues.x))
        bestlogistic.setRidge(pow(10,bestValues.y))
        evaluation = Evaluation(data)
        output = util.get_buffer_for_predictions()[0]
        attRange = Range()  # no additional attributes output
        outputDistribution = Boolean(False)  # we don't want distribution
        random = Random(1)
        numFolds = min(10,data.numInstances())
        evaluation.crossValidateModel(bestlogistic,data,numFolds,random,[output, attRange, outputDistribution])
        acc = evaluation.pctCorrect()
        print "best accuracy: ", acc
        print "best logistic classifier with Ridge = ", bestlogistic.getRidge(), " Max Iteration = ", bestlogistic.getMaxIts()
        OptLog = bestlogistic
        OptLogp1 = bestlogistic.getRidge()
        OptLogp2 = bestlogistic.getMaxIts()
        OptLogAcc = acc
    else:
        OptLog, OptLogp1, OptLogp2, OptLogAcc = myGridSearch(data,RBounds,MBounds)
    Description = 'Logistic classifier OptRidge = ' + str(OptLogp1) + \
            ', OptMaxIts = ' + str(OptLogp2) + ', OptAcc = ' + str(OptLogAcc)
    print "-----------------------------------------"
    return OptLog, OptLogp1, OptLogp2, OptLogAcc, Description
开发者ID:kayhan-batmanghelich,项目名称:gondola,代码行数:53,代码来源:logistic.py


示例9: test_model

    def test_model(self, test_data, empty_solution, evaluate = False):
        model_weka = None
        if os.path.isfile(self.prediction_file):
            print 'Model ' + self.name + ' already tested.'
        elif not os.path.isfile(self.model_file):
            print 'Impossible testing this model. It should be trained first.'
            return
        else: 
            print 'Starting to test_model model ' + self.name + '.'
            model_weka = Classifier(jobject = serialization.read(self.model_file)) 
            evaluation = Evaluation(data = test_data)
            evaluation.test_model(classifier = model_weka, data = test_data)
            
            predictions = evaluation.predictions()
            rows        = read_sheet(file_name = empty_solution)
            solutions   = []

            for row in rows:
                solution = [row['userid'], row['tweetid'], predictions.pop(0).predicted()]
                solutions.append(solution)
            write_the_solution_file(solutions, self.prediction_file)
            print 'Model ' + self.name + ' tested.'
        
        if evaluate == True:
            if os.path.isfile(self.evaluation_file):
                print 'Model ' + self.name + ' already evaluated.'
                return
            elif model_weka == None:
                model_weka = Classifier(jobject = serialization.read(self.model_file)) 
                evaluation = Evaluation(data = test_data)
                evaluation.test_model(classifier = model_weka, data = test_data)
            save_file(file_name = self.evaluation_file, content = evaluation.to_summary())
            print 'Model ' + self.name + ' evaluated.'
开发者ID:jonmagal,项目名称:recsys_challenge,代码行数:33,代码来源:model.py


示例10: smo

def smo(trainData,testData,params,exparams):
    kerType = str2bool(params[0]) 
    cValue = float(params[1])
    kerParam = float(params[2])
    if kerType:     # RBF kernel
        kernel = RBFKernel()
        kernel.setGamma(kerParam)
    else:       # Polynomial kernel
        kernel = PolyKernel()
        kernel.setExponent(kerParam)
    smo = SMO()
    smo.setKernel(kernel)
    smo.setC(cValue)
    smo.buildClassifier(trainData)  # only a trained classifier can be evaluated
    # evaluate it on the training
    evaluation = Evaluation(trainData)
    (trainOutput, trainBuffer) = util.get_buffer_for_predictions(trainData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(smo, trainData, [trainOutput, attRange, outputDistribution])
    print "--> Evaluation:\n"
    print evaluation.toSummaryString()
    trainSummary = makeTrainEvalSummary(evaluation)
    # evaluate it on testing
    evaluation = Evaluation(testData)
    (testOutput, testBuffer) = util.get_buffer_for_predictions(testData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(smo, testData, [testOutput, attRange, outputDistribution])
    return trainBuffer, testBuffer, trainSummary
开发者ID:kayhan-batmanghelich,项目名称:gondola,代码行数:30,代码来源:wekaClassifier.py


示例11: simple_logistic

def simple_logistic(trainData,testData,params,exparams):
    heuristicStop = int(float(params[0]))
    numBoostingIterations = int(float(params[1]))
    simplelogistic = SimpleLogistic()
    simplelogistic.setHeuristicStop(heuristicStop)
    simplelogistic.setNumBoostingIterations(numBoostingIterations)
    if (trainData.numInstances()<5):   # special case for small sample size
        simplelogistic.setUseCrossValidation(False) 
    simplelogistic.buildClassifier(trainData)  # only a trained classifier can be evaluated
    # evaluate it on the training
    evaluation = Evaluation(trainData)
    (trainOutput, trainBuffer) = util.get_buffer_for_predictions(trainData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(simplelogistic, trainData, [trainOutput, attRange, outputDistribution])
    print "--> Evaluation:\n"
    print evaluation.toSummaryString()
    trainSummary = makeTrainEvalSummary(evaluation)
    # evaluate it on testing
    evaluation = Evaluation(testData)
    (testOutput, testBuffer) = util.get_buffer_for_predictions(testData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(simplelogistic, testData, [testOutput, attRange, outputDistribution])
    return trainBuffer, testBuffer, trainSummary
开发者ID:kayhan-batmanghelich,项目名称:gondola,代码行数:25,代码来源:wekaClassifier.py


示例12: bayesian

def bayesian(trainData,testData,params,exparams):
    IsOptMultinomialBayes   = str2bool(params[0]) 
    IsOptNaiveKernelDensity = str2bool(params[1]) 
    if IsOptMultinomialBayes:    # optimal bayesian classifier is multinomial
        bayes = NaiveBayesMultinomial()
    else:
        bayes = NaiveBayes()
        if IsOptNaiveKernelDensity:   # use kernel density estimation
            bayes.setUseKernelEstimator(Boolean(True))   
    bayes.buildClassifier(trainData)  # only a trained classifier can be evaluated
    # evaluate it on the training
    evaluation = Evaluation(trainData)
    (trainOutput, trainBuffer) = util.get_buffer_for_predictions(trainData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(bayes, trainData, [trainOutput, attRange, outputDistribution])
    print "--> Evaluation:\n"
    print evaluation.toSummaryString()
    trainSummary = makeTrainEvalSummary(evaluation)
    # evaluate it on testing
    evaluation = Evaluation(testData)
    (testOutput, testBuffer) = util.get_buffer_for_predictions(testData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(bayes, testData, [testOutput, attRange, outputDistribution])   
    return trainBuffer, testBuffer, trainSummary
开发者ID:kayhan-batmanghelich,项目名称:gondola,代码行数:26,代码来源:wekaClassifier.py


示例13: bagging_logistic

def bagging_logistic(trainData,testData,params,exparams):
    IsOptBagOnOptLog = str2bool(params[0])
    logistic = Logistic()
    bagging = Bagging()
    if IsOptBagOnOptLog:    # optimal bagging is based on optimal logistic
        ridge = float(exparams[0])
        maxIt = int(float(exparams[1]))
        logistic.setMaxIts(maxIt)
        bagSizePercent = int(float(params[1]))
        bagging.setBagSizePercent(bagSizePercent)
    else:   # ridge parameter is also optimized in the process
        ridge = float(params[1])
    numIterations = int(float(params[2]))
    bagging.setNumIterations(numIterations)
    logistic.setRidge(ridge)
    bagging.setClassifier(logistic)
    bagging.buildClassifier(trainData)  # only a trained classifier can be evaluated
    # evaluate it on the training
    evaluation = Evaluation(trainData)
    (trainOutput, trainBuffer) = util.get_buffer_for_predictions(trainData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(bagging, trainData, [trainOutput, attRange, outputDistribution])
    print "--> Evaluation:\n"
    print evaluation.toSummaryString()
    trainSummary = makeTrainEvalSummary(evaluation)
    # evaluate it on testing
    evaluation = Evaluation(testData)
    (testOutput, testBuffer) = util.get_buffer_for_predictions(testData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(bagging, testData, [testOutput, attRange, outputDistribution])
    return trainBuffer, testBuffer, trainSummary
开发者ID:kayhan-batmanghelich,项目名称:gondola,代码行数:33,代码来源:wekaClassifier.py


示例14: build_and_classify

def build_and_classify(classifier, classifier_name, approach_name, infile, percentage='10'):
    """
    Creates model and classifies against input data. Returns accuracy statistics
    """
    # set seed so results are consistent
    random.seed('iot')

    # load data
    loader = Loader(classname='weka.core.converters.CSVLoader')
    data = loader.load_file(infile)
    data.class_is_last()

    # convert all numeric attributes to nominal
    to_nominal = Filter(classname='weka.filters.unsupervised.attribute.NumericToNominal',
                        options=['-R', 'first-last'])
    to_nominal.inputformat(data)
    data = to_nominal.filter(data)

    # randomize data with constant seed
    randomize = Filter(classname='weka.filters.unsupervised.instance.Randomize',
                       options=['-S', '42'])
    randomize.inputformat(data)

    data = randomize.filter(data)

    # create training set and testing set
    train_percent_filter = Filter(classname='weka.filters.unsupervised.instance.RemovePercentage',
                                  options=['-P', percentage, '-V'])
    train_percent_filter.inputformat(data)

    train = train_percent_filter.filter(data)
    test = data

    # build and test classifier
    classifier.build_classifier(train)
    evaluation = Evaluation(train)
    evaluation.test_model(classifier, test)

    # return results as array
    results = [
        approach_name,
        classifier_name,
        percentage,
        evaluation.percent_correct,
        evaluation.weighted_f_measure
    ]
    return results
开发者ID:kapil1garg,项目名称:nursing-home-analytics,代码行数:47,代码来源:weka_learning-curve_generator.py


示例15: crossValidate

    def crossValidate(self, arrfFile = None, classname="weka.classifiers.trees.J48", options=["-C", "0.3"]):
        
        if arrfFile is not None:
            self.initData( arrfFile )
            
        if self.data is None:
            return 

        print 'Classificador ' + str(classname) + ' ' + ' '.join(options)
        cls = Classifier(classname=classname, options=options)
        
        evl = Evaluation(self.data)
        evl.crossvalidate_model(cls, self.data, 10, Random(1))

        print(evl.percent_correct)
        print(evl.summary())
        print(evl.class_details())
开发者ID:fernandovieiraf02,项目名称:superpixel,代码行数:17,代码来源:wekaWrapper.py


示例16: use_classifier

def use_classifier(data):
    """
    Uses the meta-classifier AttributeSelectedClassifier for attribute selection.
    :param data: the dataset to use
    :type data: Instances
    """
    print("\n1. Meta-classifier")
    classifier = Classifier(classname="weka.classifiers.meta.AttributeSelectedClassifier")
    aseval = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval")
    assearch = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-B"])
    base = Classifier(classname="weka.classifiers.trees.J48")
    # setting nested options is always a bit tricky, getting all the escaped double quotes right
    # simply using the bean property for setting Java objects is often easier and less error prone
    classifier.set_property("classifier", base.jobject)
    classifier.set_property("evaluator", aseval.jobject)
    classifier.set_property("search", assearch.jobject)
    evaluation = Evaluation(data)
    evaluation.crossvalidate_model(classifier, data, 10, Random(1))
    print(evaluation.summary())
开发者ID:keypointt,项目名称:python-weka-wrapper-examples,代码行数:19,代码来源:attribute_selection_test.py


示例17: evaluation

 def evaluation(self, classifier, trainingData, testingData = None):
     trainingData.set_class_index(trainingData.num_attributes() - 1)
     if testingData == None:
         evaluation = Evaluation(trainingData) 
                             # initialize with priors
         evaluation.crossvalidate_model(classifier, trainingData, 10, Random(42))  # 10-fold CV
         return evaluation
     else:
         print "testing data exists"
         if testingData.num_attributes() == trainingData.num_attributes():
             testingData.set_class_index(testingData.num_attributes() - 1)
             evaluation = Evaluation(trainingData)   
             
             classifier.build_classifier(trainingData)
             evaluation.test_model(classifier, testingData)
             
             #for attribute in trainingData.attributes():
             #    print "train:" + str(attribute)
             #for attribute in testingData.attributes():
             #    print "test:" + str(attribute)
                 
                 
             return evaluation
         else:
             print "testing Data doesn't have same attribute with training data"
             for attribute in trainingData.attributes():
                 print "train:" + str(attribute)
             for attribute in testingData.attributes():
                 print "test:" + str(attribute)
开发者ID:zhaohengyang,项目名称:Android-malware-detection,代码行数:29,代码来源:weka_interface.py


示例18: do_temporal_cv

def do_temporal_cv(t_selector, instances, num_folds):
    num_instances = instances.numInstances()
    results = []
    # Split folds
    for f in xrange(2, num_folds+1):
        print "fold:%d"%f
        for pair in split_temporal_train_test(f, num_instances):
    #        train_start = pair.train_start
    #        train_end = pair.train_end

            train_set = Instances(instances, int(pair.train_start), int(pair.train_end - pair.train_start+1))
            test_set = Instances(instances, int(pair.test_start), int(pair.test_end - pair.test_start +1))

            t_selector.buildClassifier(train_set)
            e = Evaluation(train_set)
            e.evaluateModel(t_selector, test_set)

            if e.recall(0) > 0 and e.precision(0) > 0:
                results.append(Result(instances.numAttributes(), e))


            #            print "precision: %.2f"%evalTest.precision(0)
#            print "recall: %.2f"%evalTest.recall(0)
#            print evalTest.toSummaryString()
    #        System.out.println(strSummary);
    sum_precision = 0
    sum_recall = 0
    for r in results:
#        print "precision:"
#        print r.precision
#        print "recall:"
#        print r.recall
        sum_precision += r.precision
        sum_recall +=r.recall


    precision = sum_precision*1.0/len(results)
    recall = sum_recall*1.0/len(results)
    avg_fmeasure = harmonic_mean([precision, recall])
    print "f_measure:%.2f"%avg_fmeasure
开发者ID:SoftwareIntrospectionLab,项目名称:BugPrediction,代码行数:40,代码来源:featsel.py


示例19: baggin_smo

def baggin_smo(trainData,testData,params,exparams):
    IsOptBagOnOptSMO =  str2bool(params[0]) 
    if IsOptBagOnOptSMO:    # optimal bagging is based on optimal SMO thus I should use extra params
        kerType =  str2bool(params[0]) 
        cValue = float(exparams[1])
        kerParam = float(exparams[2])
        if kerType:     # RBF kernel
            kernel = RBFKernel()
            kernel.setGamma(kerParam)
        else:       # Polynomial kernel
            kernel = PolyKernel()
            kernel.setExponent(kerParam)
        bagSizePercent = int(float(params[1]))
        numIterations = int(float(params[2]))
        smo = SMO()
        bagging = Bagging()
        smo.setKernel(kernel)
        smo.setC(cValue)
        bagging.setBagSizePercent(bagSizePercent)
        bagging.setNumIterations(numIterations)
        bagging.setClassifier(smo)
    else:   # optimal bagging is based on linear SMO
        cValue = float(params[1])
        numIterations = int(float(params[2]))
        smo = SMO()
        bagging = Bagging()
        kernel = PolyKernel()
        smo.setKernel(kernel)
        smo.setC(cValue)
        bagging.setNumIterations(numIterations)
        bagging.setClassifier(smo)
    bagging.buildClassifier(trainData)  # only a trained classifier can be evaluated
    # evaluate it on the training
    evaluation = Evaluation(trainData)
    (trainOutput, trainBuffer) = util.get_buffer_for_predictions(trainData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(bagging, trainData, [trainOutput, attRange, outputDistribution])
    print "--> Evaluation:\n"
    print evaluation.toSummaryString()
    trainSummary = makeTrainEvalSummary(evaluation)
    # evaluate it on testing
    evaluation = Evaluation(testData)
    (testOutput, testBuffer) = util.get_buffer_for_predictions(testData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(bagging, testData, [testOutput, attRange, outputDistribution])
    return trainBuffer, testBuffer, trainSummary
开发者ID:kayhan-batmanghelich,项目名称:gondola,代码行数:48,代码来源:wekaClassifier.py


示例20: random_forest

def random_forest(trainData,testData,params,exparams):
    numTrees = int(float(params[0]))
    numFeatures = int(float(params[1]))
    randomforest = RandomForest()
    randomforest.setNumTrees(numTrees)
    randomforest.setNumFeatures(numFeatures)
    randomforest.buildClassifier(trainData)  # only a trained classifier can be evaluated
    # evaluate it on the training
    evaluation = Evaluation(trainData)
    (trainOutput, trainBuffer) = util.get_buffer_for_predictions(trainData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(randomforest, trainData, [trainOutput, attRange, outputDistribution])
    print "--> Evaluation:\n"
    print evaluation.toSummaryString()
    trainSummary = makeTrainEvalSummary(evaluation)
    # evaluate it on testing
    evaluation = Evaluation(testData)
    (testOutput, testBuffer) = util.get_buffer_for_predictions(testData)
    attRange = Range()  # no additional attributes output
    outputDistribution = Boolean(False)  # we don't want distribution
    evaluation.evaluateModel(randomforest, testData, [testOutput, attRange, outputDistribution])
    return trainBuffer, testBuffer, trainSummary
开发者ID:kayhan-batmanghelich,项目名称:gondola,代码行数:23,代码来源:wekaClassifier.py



注:本文中的weka.classifiers.Evaluation类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python converters.Loader类代码示例发布时间:2022-05-26
下一篇:
Python classifiers.Classifier类代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap