• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python filters.Filter类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中weka.filters.Filter的典型用法代码示例。如果您正苦于以下问题:Python Filter类的具体用法?Python Filter怎么用?Python Filter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了Filter类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: filterUnusedFeatureFromList

    def filterUnusedFeatureFromList(self, data, unusedFuncitonList):
        filteredData = data

        for attribute in unusedFuncitonList:                
            remove = Filter(classname="weka.filters.unsupervised.attribute.RemoveByName", options=["-E", "^" + attribute + ".*$"])
            remove.set_inputformat(filteredData)
            filteredData = remove.filter(filteredData)

        return filteredData      
开发者ID:zhaohengyang,项目名称:Android-malware-detection,代码行数:9,代码来源:weka_interface.py


示例2: attributeSelector

    def attributeSelector(self, data, selectNum):
        attributeSelector = Filter(classname="weka.filters.supervised.attribute.AttributeSelection",\
                         options=["-S", "weka.attributeSelection.Ranker -T -1.7976931348623157E308 -N " + str(selectNum),\
                                   "-E", "weka.attributeSelection.InfoGainAttributeEval"])

        attributeSelector.set_inputformat(data)
        data = attributeSelector.filter(data)

            
        return data
开发者ID:zhaohengyang,项目名称:Android-malware-detection,代码行数:10,代码来源:weka_interface.py


示例3: getSetDataBySetIndex

 def getSetDataBySetIndex(self, data, index):
     # cut feature set out
     featureTable = FeatureTable()
     startIndexList = featureTable.getEachSetStartIndex()
     
     start = startIndexList[index]
     end = startIndexList[index+1] - 1
     remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-V", "-R", str(start) + "-" + str(end) + ",last"])
     remove.set_inputformat(data)
     filteredData = remove.filter(data)
     return filteredData
开发者ID:zhaohengyang,项目名称:Android-malware-detection,代码行数:11,代码来源:weka_interface.py


示例4: remove_correct_classified

	def remove_correct_classified(self, invert = False):
		options=[
			'-W', self.classifier.to_commandline(), 
			'-C', str(self.class_index), #classindex
	#		'-F','0', # folds
	#		'-T','0.1', #threshold by numeric classes
			'-I','0', # max iterations
			'-V' if not invert else '' 
		] # invert
		classname = "weka.filters.unsupervised.instance.RemoveMisclassified"
		remove = Filter(classname=classname, options=options)
		remove.inputformat(self.data)
		self.data = remove.filter(self.data)
开发者ID:sbiastoch,项目名称:thesis,代码行数:13,代码来源:evaluate.py


示例5: emlimitateUnusedFeature

    def emlimitateUnusedFeature(self, trainData, testData = None):
        trainData.set_class_index(trainData.num_attributes() - 1)   # set class attribute
        featureIndex = -1       
        filteredTrainData = trainData
        filteredTestData = testData
        

        attribute_index = 0

        while attribute_index < filteredTrainData.num_attributes() - 1:
            sampleCoverage = 0
            #print attribute_index
            # check value for current feature in each instance
            for instance_index in range(0, filteredTrainData.num_instances()):
                instance = filteredTrainData.get_instance(instance_index)
                value = instance.get_value(attribute_index)
                
                if value > 0:
                    sampleCoverage += 1
            if sampleCoverage == 0:
                #print "found"
                remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", str(attribute_index+1)]) #The index in this function start from 1
                remove.set_inputformat(filteredTrainData)
                filteredTrainData = remove.filter(filteredTrainData)  
                if filteredTestData:
                    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", str(attribute_index+1)]) #The index in this function start from 1
                    remove.set_inputformat(filteredTestData)
                    filteredTestData = remove.filter(filteredTestData)  
            else:
                attribute_index += 1

        return [filteredTrainData, filteredTestData]
开发者ID:zhaohengyang,项目名称:Android-malware-detection,代码行数:32,代码来源:weka_interface.py


示例6: main

def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(iris_file)

    # remove class attribute
    data.delete_last_attribute()

    # build a clusterer and output model
    helper.print_title("Training SimpleKMeans clusterer")
    clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
    clusterer.build_clusterer(data)
    print(clusterer)
    helper.print_info("Evaluating on data")
    evaluation = ClusterEvaluation()
    evaluation.set_model(clusterer)
    evaluation.test_model(data)
    print("# clusters: " + str(evaluation.num_clusters))
    print("log likelihood: " + str(evaluation.log_likelihood))
    print("cluster assignments:\n" + str(evaluation.cluster_assignments))
    plc.plot_cluster_assignments(evaluation, data, inst_no=True)

    # using a filtered clusterer
    helper.print_title("Filtered clusterer")
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(iris_file)
    clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
    fclusterer = FilteredClusterer()
    fclusterer.clusterer = clusterer
    fclusterer.filter = remove
    fclusterer.build_clusterer(data)
    print(fclusterer)

    # load a dataset incrementally and build clusterer incrementally
    helper.print_title("Incremental clusterer")
    loader = Loader("weka.core.converters.ArffLoader")
    iris_inc = loader.load_file(iris_file, incremental=True)
    clusterer = Clusterer("weka.clusterers.Cobweb")
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
    remove.inputformat(iris_inc)
    iris_filtered = remove.outputformat()
    clusterer.build_clusterer(iris_filtered)
    for inst in loader:
        remove.input(inst)
        inst_filtered = remove.output()
        clusterer.update_clusterer(inst_filtered)
    clusterer.update_finished()
    print(clusterer.to_commandline())
    print(clusterer)
    print(clusterer.graph)
    plg.plot_dot_graph(clusterer.graph)
开发者ID:keypointt,项目名称:python-weka-wrapper-examples,代码行数:58,代码来源:clusterers.py


示例7: filterOutUnnecessaryAPIAndEvaluateOurApproach

 def filterOutUnnecessaryAPIAndEvaluateOurApproach(self, ourApproahFile, apiFile, indexInTable, methodName, databaseTable, csvFilePath):
     outputStr = methodName+","
     resultList = []
     # Get whole feature set of our approach
     filteredData = self.load_Arff(ourApproahFile)
     # Use this function to get selected API feature and save the unselected api in a list
     filterOutList = self.attribueSelectionBasedOnRankingInDatabase(apiFile, indexInTable, databaseTable, "")[1]
     
     # Remove unselected API
     for functionName in filterOutList:
         functionName = functionName.split("(")[0] + "\(\)"
         functionName = functionName.replace('$','\$')
         remove = Filter(classname="weka.filters.unsupervised.attribute.RemoveByName", options=["-E", "^" + functionName + ".*$"])
         remove.set_inputformat(filteredData)
         filteredData = remove.filter(filteredData)
     featureNum = filteredData.num_attributes() - 1
     print "featureNum: " + str(featureNum)
     if csvFilePath != "":
         self.writeTenScaledTitleManual(featureNum, csvFilePath)
         #print "i:" + str(i)
         #print "functionName:" + functionName
         #print "featureNum: " + str(filteredData.num_attributes() - 1)
     for attributeStr in filteredData.attributes():
         print(attributeStr)
     # Run ten scaled generation and evaluation 
     step = 10 
     while step < featureNum:
         roundData = self.attributeSelector(filteredData, step)
         classifier = self.algorithmPicker(roundData, indexInTable)
         evaluation = self.evaluation(classifier, roundData)
         #print(self.algorithmTable[indexInTable] + ": " + "{:.2f}".format(evaluation.percent_correct()) + ", Feature select number:" + str(roundData.num_attributes() - 1) + "/" + str(featureNum))
         resultList.append("{:.2f}".format(evaluation.percent_correct()))
         #csvFile.write("{:.2f}".format(evaluation.percent_correct()) +",")
         step += 10
     
     classifier = self.algorithmPicker(filteredData, indexInTable)
     evaluation = self.evaluation(classifier, filteredData)
     #print(self.algorithmTable[indexInTable] + ": " + "{:.2f}".format(evaluation.percent_correct()) + ", Feature select number:" + str(filteredData.num_attributes() - 1) + "/" + str(featureNum))
     resultList.append("{:.2f}".format(evaluation.percent_correct()))
     
     # Write out to CSV file
     for item in resultList:
         outputStr += item +","
     outputStr = outputStr[0:-1] + "\n"
     self.writeToPath(csvFilePath, outputStr)
开发者ID:zhaohengyang,项目名称:Android-malware-detection,代码行数:45,代码来源:weka_interface.py


示例8: use_filter

def use_filter(data):
    """
    Uses the AttributeSelection filter for attribute selection.
    :param data: the dataset to use
    :type data: Instances
    """
    print("\n2. Filter")
    flter = Filter(classname="weka.filters.supervised.attribute.AttributeSelection")
    aseval = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval")
    assearch = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-B"])
    flter.set_property("evaluator", aseval.jobject)
    flter.set_property("search", assearch.jobject)
    flter.inputformat(data)
    filtered = flter.filter(data)
    print(str(filtered))
开发者ID:keypointt,项目名称:python-weka-wrapper-examples,代码行数:15,代码来源:attribute_selection_test.py


示例9: _pre_process_to_classification

 def _pre_process_to_classification(self, dataset):   
     filter_data = Filter(classname = 'weka.filters.unsupervised.attribute.MathExpression', 
                          options = ['-unset-class-temporarily', '-E', "ifelse ( A>0, 1, 0 )", 
                                     '-V', '-R', 'last'])
     
     filter_data.set_inputformat(dataset)
     filtered = filter_data.filter(dataset)
     
     discretize_data = Filter(classname = 'weka.filters.unsupervised.attribute.NumericToNominal', 
                          options = ['-R', 'last'])
     
     discretize_data.set_inputformat(filtered)
     discretized = discretize_data.filter(filtered)
     
     return discretized
开发者ID:jonmagal,项目名称:recsys_challenge,代码行数:15,代码来源:dataset.py


示例10: load

    def load(path, db):
        nominals = [
            49,  # dev_double_fp_config
            50,  # dev_endian_little
            51,  # dev_execution_capabilities
            52,  # dev_extensions
            54,  # dev_global_mem_cache_type
            57,  # dev_host_unified_memory
            63,  # dev_image_support
            65,  # dev_local_mem_type
            96,  # dev_queue_properties
            97,  # dev_single_fp_config
            98,  # dev_type
            100, # dev_vendor_id
        ]
        nominal_indices = ",".join([str(index) for index in nominals])
        force_nominal = ["-N", nominal_indices]

        # Load data from CSV.
        dataset = Dataset.load_csv(path, options=force_nominal)
        dataset.__class__ = Dataset

        # Set class index and database connection.
        dataset.class_index = -1
        dataset.db = db

        # Create string->nominal type attribute filter, ignoring the first
        # attribute (scenario ID), since we're not classifying with it.
        string_to_nominal = WekaFilter(classname=("weka.filters.unsupervised."
                                                  "attribute.StringToNominal"),
                                       options=["-R", "2-last"])
        string_to_nominal.inputformat(dataset.instances)

        # Create filtered dataset, and swap data around.
        filtered = string_to_nominal.filter(dataset.instances)
        dataset.instances = filtered

        return dataset
开发者ID:vianziro,项目名称:msc-thesis,代码行数:38,代码来源:dataset.py


示例11: Loader

from weka.core.converters import Loader
from weka.core.classes import Random
from weka.classifiers import Classifier, Evaluation, PredictionOutput
from weka.filters import Filter

jvm.start()

# load diabetes
loader = Loader(classname="weka.core.converters.ArffLoader")
fname = data_dir + os.sep + "diabetes.arff"
print("\nLoading dataset: " + fname + "\n")
data = loader.load_file(fname)
# we'll set the class attribute after filtering

# apply NominalToBinary filter and set class attribute
fltr = Filter("weka.filters.unsupervised.attribute.NominalToBinary")
fltr.inputformat(data)
filtered = fltr.filter(data)
filtered.class_is_last()

# cross-validate LinearRegression on filtered data, display model
cls = Classifier(classname="weka.classifiers.functions.LinearRegression")
pout = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.PlainText")
evl = Evaluation(filtered)
evl.crossvalidate_model(cls, filtered, 10, Random(1), pout)
print("10-fold cross-validation:\n" + evl.summary())
print("Predictions:\n\n" + str(pout))
cls.build_classifier(filtered)
print("Model:\n\n" + str(cls))

# use AddClassification filter with LinearRegression on filtered data
开发者ID:fracpete,项目名称:wekamooc,代码行数:31,代码来源:class-4.3.py


示例12: remove_attributes

 def remove_attributes(self, *attributes):
     indices = [self.attribute_index(x) for x in attributes]
     remove = Filter(classname="weka.filters.unsupervised.attribute.Remove",
                     options=["-R", ','.join(str(x + 1) for x in indices)])
     remove.inputformat(self.instances)
     self.instances = remove.filter(self.instances)
开发者ID:ChrisCummins,项目名称:phd,代码行数:6,代码来源:ml.py


示例13: Loader

        if (len_email > 0) and (len_content > 0):
            writer.writerow(row)

# close csvfile
csvfile.close()

# start JVM
jvm.start()

# load CSV file
loader = Loader(classname="weka.core.converters.CSVLoader", options=["-E", '"', "-F", ","])
data = loader.load_file(csvfilename)
#print(data)

# convert class to nominal
wfilter = Filter(classname="weka.filters.unsupervised.attribute.StringToNominal", options=["-R", "last"])
wfilter.set_inputformat(data)
data = wfilter.filter(data)

# convert content to string
wfilter = Filter(classname="weka.filters.unsupervised.attribute.NominalToString", options=["-C", "first"])
wfilter.set_inputformat(data)
data = wfilter.filter(data)

# set class attribute
data.set_class_index(data.num_attributes() - 1)

# generate baseline
zeror = Classifier(classname="weka.classifiers.rules.ZeroR")
evaluation = Evaluation(data)
evaluation.crossvalidate_model(zeror, data, 10, Random(1))
开发者ID:Br3nda,项目名称:meetings,代码行数:31,代码来源:list.py


示例14: main

def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()

    # classifier help
    helper.print_title("Creating help string")
    classifier = Classifier(classname="weka.classifiers.trees.J48")
    print(classifier.to_help())

    # partial classname
    helper.print_title("Creating classifier from partial classname")
    clsname = ".J48"
    classifier = Classifier(classname=clsname)
    print(clsname + " --> " + classifier.classname)

    # classifier from commandline
    helper.print_title("Creating SMO from command-line string")
    cmdline = 'weka.classifiers.functions.SMO -K "weka.classifiers.functions.supportVector.NormalizedPolyKernel -E 3.0"'
    classifier = from_commandline(cmdline, classname="weka.classifiers.Classifier")
    classifier.build_classifier(iris_data)
    print("input: " + cmdline)
    print("output: " + classifier.to_commandline())
    print("model:\n" + str(classifier))

    # kernel classifier
    helper.print_title("Creating SMO as KernelClassifier")
    kernel = Kernel(classname="weka.classifiers.functions.supportVector.RBFKernel", options=["-G", "0.001"])
    classifier = KernelClassifier(classname="weka.classifiers.functions.SMO", options=["-M"])
    classifier.kernel = kernel
    classifier.build_classifier(iris_data)
    print("classifier: " + classifier.to_commandline())
    print("model:\n" + str(classifier))

    # build a classifier and output model
    helper.print_title("Training J48 classifier on iris")
    classifier = Classifier(classname="weka.classifiers.trees.J48")
    # Instead of using 'options=["-C", "0.3"]' in the constructor, we can also set the "confidenceFactor"
    # property of the J48 classifier itself. However, being of type float rather than double, we need
    # to convert it to the correct type first using the double_to_float function:
    classifier.set_property("confidenceFactor", typeconv.double_to_float(0.3))
    classifier.build_classifier(iris_data)
    print(classifier)
    print(classifier.graph)
    print(classifier.to_source("MyJ48"))
    plot_graph.plot_dot_graph(classifier.graph)

    # evaluate model on test set
    helper.print_title("Evaluating J48 classifier on iris")
    evaluation = Evaluation(iris_data)
    evl = evaluation.test_model(classifier, iris_data)
    print(evl)
    print(evaluation.summary())

    # evaluate model on train/test split
    helper.print_title("Evaluating J48 classifier on iris (random split 66%)")
    classifier = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"])
    evaluation = Evaluation(iris_data)
    evaluation.evaluate_train_test_split(classifier, iris_data, 66.0, Random(1))
    print(evaluation.summary())

    # load a dataset incrementally and build classifier incrementally
    helper.print_title("Build classifier incrementally on iris")
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_inc = loader.load_file(iris_file, incremental=True)
    iris_inc.class_is_last()
    classifier = Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable")
    classifier.build_classifier(iris_inc)
    for inst in loader:
        classifier.update_classifier(inst)
    print(classifier)

    # construct meta-classifiers
    helper.print_title("Meta classifiers")
    # generic FilteredClassifier instantiation
    print("generic FilteredClassifier instantiation")
    meta = SingleClassifierEnhancer(classname="weka.classifiers.meta.FilteredClassifier")
    meta.classifier = Classifier(classname="weka.classifiers.functions.LinearRegression")
    flter = Filter("weka.filters.unsupervised.attribute.Remove")
    flter.options = ["-R", "first"]
    meta.set_property("filter", flter.jobject)
    print(meta.to_commandline())
    # direct FilteredClassifier instantiation
    print("direct FilteredClassifier instantiation")
    meta = FilteredClassifier()
    meta.classifier = Classifier(classname="weka.classifiers.functions.LinearRegression")
    flter = Filter("weka.filters.unsupervised.attribute.Remove")
    flter.options = ["-R", "first"]
    meta.filter = flter
    print(meta.to_commandline())
    # generic Vote
    print("generic Vote instantiation")
#.........这里部分代码省略.........
开发者ID:fracpete,项目名称:python-weka-wrapper3-examples,代码行数:101,代码来源:classifiers.py


示例15: run_classifier

def run_classifier(path, prot, sel, cols, prot_vals, beta):
        
    DIs = dict()
    jvm.start()

    for i in range(len(cols)-1):
        loader = Loader(classname="weka.core.converters.CSVLoader")
        data = loader.load_file(path)
    
        # remove selected attribute from the data
        # NOTE: options are ONE indexed, not ZERO indexed
        remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", \
                        options=["-R", str(sel[2]+1)])
        remove.inputformat(data)
        data = remove.filter(data)

        # if running for only one attribue, remove all others (except protected)
        if i > 0:
            for j in range(1, prot[2]+1):
                if i != j:
                    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", \
                                    options=["-R", ("1" if i>j else "2")])
                    remove.inputformat(data)
                    data = remove.filter(data)

        # set prot attribute as Class attribute
        data.class_is_last()
        
        # run classifier
        cls = Classifier(classname="weka.classifiers.bayes.NaiveBayes")
        cls.build_classifier(data)
    
        # count the number of each combination
        pos_and_pred = float(0.0)
        pos_and_not_pred = float(0.0)
        neg_and_pred = float(0.0)
        neg_and_not_pred = float(0.0)
        for ind, inst in enumerate(data):
            if cls.classify_instance(inst):
                if prot_vals[ind] == prot[1]:
                    pos_and_pred += 1
                else:
                    neg_and_pred += 1
            else:
                if prot_vals[ind] == prot[1]:
                    pos_and_not_pred += 1
                else:
                    neg_and_not_pred += 1

        # calculate DI
        BER = ((pos_and_not_pred / (pos_and_pred + pos_and_not_pred)) + \
               (neg_and_pred / (neg_and_pred + neg_and_not_pred))) * 0.5
        if BER > 0.5:
            BER = 1 - BER
        DI = 1 - ((1 - 2 * BER) / (beta + 1 - 2 * BER))

        if i == 0: # consider changing this to a 'code word' instead of 'all'
            DIs["all"] = DI
        else:
            DIs[cols[i-1]] = DI

    jvm.stop()

    return DIs
开发者ID:sorelle,项目名称:fairdata,代码行数:64,代码来源:main.py


示例16: usage

    		usage()
    		return 1


        options = {'idFlag':True, 'weightFlag': False, 'rmClassFlag': False, 'rmClass': 0}
        # read the first dataset
        fn = inputList[0]
        fid = FileReader(fn)
	Data = Instances(fid)
        Data, IDs = PreprocessData(Data,options)
        # remove class label
        attributeremove = AttributeRemove()
        attributeremove.setInvertSelection(Boolean(False))  # remove class labels from dataset
        attributeremove.setAttributeIndices(String(str(Data.numAttributes())))
        attributeremove.setInputFormat(Data)
        newData = Filter.useFilter(Data, attributeremove)
        # loop over input arff file
        cnt = Data.numAttributes() 
        for fnCnt in range(1,len(inputList)):
             fn = inputList[fnCnt]
             fid = FileReader(fn)
	     Data = Instances(fid)
             Data, IDs = PreprocessData(Data,options)
             # remove class label
             attributeremove = AttributeRemove()
	     attributeremove.setInvertSelection(Boolean(True))  # remove every attribute but the last one which is class label
	     attributeremove.setAttributeIndices(String(str(Data.numAttributes())))
	     attributeremove.setInputFormat(Data)
	     labels = Filter.useFilter(Data, attributeremove)
             attributeremove = AttributeRemove()
             attributeremove.setInvertSelection(Boolean(False))  # remove class labels from dataset
开发者ID:kayhan-batmanghelich,项目名称:gondola,代码行数:31,代码来源:mergeArffFiles.py


示例17: main

def main():
    """
    Just runs some example code.
    """

    # load a dataset
    data_file = helper.get_data_dir() + os.sep + "vote.arff"
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(data_file)
    data.class_is_last()

    # classifier
    classifier = Classifier(classname="weka.classifiers.trees.J48")

    # randomize data
    folds = 10
    seed = 1
    rnd = Random(seed)
    rand_data = Instances.copy_instances(data)
    rand_data.randomize(rnd)
    if rand_data.class_attribute.is_nominal:
        rand_data.stratify(folds)

    # perform cross-validation and add predictions
    predicted_data = None
    evaluation = Evaluation(rand_data)
    for i in xrange(folds):
        train = rand_data.train_cv(folds, i)
        # the above code is used by the StratifiedRemoveFolds filter,
        # the following code is used by the Explorer/Experimenter
        # train = rand_data.train_cv(folds, i, rnd)
        test = rand_data.test_cv(folds, i)

        # build and evaluate classifier
        cls = Classifier.make_copy(classifier)
        cls.build_classifier(train)
        evaluation.test_model(cls, test)

        # add predictions
        addcls = Filter(
            classname="weka.filters.supervised.attribute.AddClassification",
            options=["-classification", "-distribution", "-error"])
        # setting the java object directory avoids issues with correct quoting in option array
        addcls.set_property("classifier", Classifier.make_copy(classifier))
        addcls.inputformat(train)
        addcls.filter(train)  # trains the classifier
        pred = addcls.filter(test)
        if predicted_data is None:
            predicted_data = Instances.template_instances(pred, 0)
        for n in xrange(pred.num_instances):
            predicted_data.add_instance(pred.get_instance(n))

    print("")
    print("=== Setup ===")
    print("Classifier: " + classifier.to_commandline())
    print("Dataset: " + data.relationname)
    print("Folds: " + str(folds))
    print("Seed: " + str(seed))
    print("")
    print(evaluation.summary("=== " + str(folds) + " -fold Cross-Validation ==="))
    print("")
    print(predicted_data)
开发者ID:fracpete,项目名称:python-weka-wrapper-examples,代码行数:63,代码来源:crossvalidation_addprediction.py


示例18: Loader

jvm.start()

# load diabetes
loader = Loader(classname="weka.core.converters.ArffLoader")
fname = data_dir + os.sep + "diabetes.arff"
print("\nLoading dataset: " + fname + "\n")
data = loader.load_file(fname)
data.class_is_last()

# simulate the 10 train/test pairs of cross-validation
evl = Evaluation(data)
for i in xrange(1, 11):
    # create train set
    remove = Filter(
        classname="weka.filters.supervised.instance.StratifiedRemoveFolds",
        options=["-N", "10", "-F", str(i), "-S", "1", "-V"])
    remove.inputformat(data)
    train = remove.filter(data)

    # create test set
    remove = Filter(
        classname="weka.filters.supervised.instance.StratifiedRemoveFolds",
        options=["-N", "10", "-F", str(i), "-S", "1"])
    remove.inputformat(data)
    test = remove.filter(data)

    cls = Classifier(classname="weka.classifiers.trees.J48")
    cls.build_classifier(train)
    evl.test_model(cls, test)
开发者ID:fracpete,项目名称:wekamooc,代码行数:29,代码来源:class-2.5.py


示例19: attribueSelectionBasedOnRankingInDatabase

    def attribueSelectionBasedOnRankingInDatabase(self, trainingData, indexInTable, databaseTable, csvFilePath, testingData = None):     
        featureNum = trainingData.num_attributes() - 1
        outputStr = ""
        outputStr += databaseTable+","

        # select from database vector difference
        featureList3 = []
        wholefeatureList = []
        dbmgr = permissionMappingManager(databasePath)

        for row in dbmgr.query("select * from " + databaseTable):
            featureList3.append(row[0])
            wholefeatureList.append(row[0])
        #featureList3.reverse()
        
        bestRemainFilterList = []
        resultList = []
        digit = len(featureList3) % 10

        bestAccuracy = 0
        bestTrainingData = None
        bestTestingData = None
        bestEvaluation = None
        
        classifier = self.algorithmPicker(trainingData, indexInTable)
        evaluation = self.evaluation(classifier, trainingData, testingData)
        if evaluation.percent_correct() >= bestAccuracy:
            bestAccuracy = evaluation.percent_correct()
            bestTrainingData = trainingData
            bestTestingData = testingData
            bestRemainFilterList = list(featureList3)
            bestEvaluation = evaluation
            
        print(self.algorithmTable[indexInTable] + ": " + "{:.2f}".format(evaluation.percent_correct()) + ", Feature select number:" + str(trainingData.num_attributes() - 1) + "/" + str(featureNum))
        resultList.append("{:.2f}".format(evaluation.percent_correct()))
        
        if digit > 0:
            for i in range(0, digit):
                functionName = featureList3.pop().split("(")[0] + "\(\)"
                functionName = functionName.replace('$','\$')
                #print "functionName:" + functionName
                remove = Filter(classname="weka.filters.unsupervised.attribute.RemoveByName", options=["-E", "^" + functionName + ".*$"])
                remove.set_inputformat(trainingData)
                trainingData = remove.filter(trainingData)
                if testingData:
                    remove.set_inputformat(testingData)
                    testingData = remove.filter(testingData)
                
                #print "i:" + str(i)
                #print "functionName:" + functionName
                #print "featureNum: " + str(filteredData.num_attributes() - 1)
            #for attributeStr in trainingData.attributes():
            #    print(attributeStr)
            #self.printFunctionInfo(trainingData, trainingData.num_instances())
            
            classifier = self.algorithmPicker(trainingData, indexInTable)
            evaluation = self.evaluation(classifier, trainingData, testingData)
            if evaluation.percent_correct() >= bestAccuracy:
                bestAccuracy = evaluation.percent_correct()
                bestTrainingData = trainingData
                bestTestingData = testingData
                bestRemainFilterList = list(featureList3)
                bestEvaluation = evaluation
                
            print(self.algorithmTable[indexInTable] + ": " + "{:.2f}".format(evaluation.percent_correct()) + ", Feature select number:" + str(trainingData.num_attributes() - 1) + "/" + str(featureNum))
            resultList.append("{:.2f}".format(evaluation.percent_correct()))
            
        while trainingData.num_attributes() - 1 > 10:
            for i in range(0,10):
                functionName = featureList3.pop().split("(")[0] + "\(\)"
                functionName = functionName.replace('$','\$')
                #print "functionName:" + functionName
                remove = Filter(classname="weka.filters.unsupervised.attribute.RemoveByName", options=["-E", "^" + functionName + ".*$"])
                remove.set_inputformat(trainingData)
                trainingData = remove.filter(trainingData)
                if testingData:
                    remove.set_inputformat(testingData)
                    testingData = remove.filter(testingData)
                #print functionName
                #print "featureNum: " + str(filteredData.num_attributes() - 1)
                
            #for attributeStr in trainingData.attributes():
            #    print(attributeStr)
            
            classifier = self.algorithmPicker(trainingData, indexInTable)
            evaluation = self.evaluation(classifier, trainingData, testingData)
            if evaluation.percent_correct() >= bestAccuracy:
                
                bestAccuracy = evaluation.percent_correct()
                bestTrainingData = trainingData
                bestTestingData = testingData
                bestRemainFilterList = list(featureList3)
                bestEvaluation = evaluation
                #print "update feature number:" + str(len(bestRemainFilterList))
                
            print(self.algorithmTable[indexInTable] + ": " + "{:.2f}".format(evaluation.percent_correct()) + ", Feature select number:" + str(trainingData.num_attributes() - 1) + "/" + str(featureNum))
            resultList.append("{:.2f}".format(evaluation.percent_correct()))

        resultList.reverse()
        
#.........这里部分代码省略.........
开发者ID:zhaohengyang,项目名称:Android-malware-detection,代码行数:101,代码来源:weka_interface.py


示例20: print

from weka.core.converters import Loader, Saver
from weka.core.dataset import Instances
from weka.filters import Filter

jvm.start()

# load weather.nominal
fname = data_dir + os.sep + "weather.nominal.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)

# output header
print(Instances.template_instances(data))

# remove attribute no 3
print("\nRemove attribute no 3")
fltr = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "3"])
fltr.set_inputformat(data)
filtered = fltr.filter(data)

# output header
print(Instances.template_instances(filtered))

# save modified dataset
saver = Saver(classname="weka.core.converters.ArffSaver")
saver.save_file(filtered, data_dir + os.sep + "weather.nominal-filtered.arff")

jvm.stop()

开发者ID:echavarria,项目名称:wekamooc,代码行数:29,代码来源:class-1.5.py



注:本文中的weka.filters.Filter类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python control.Flow类代码示例发布时间:2022-05-26
下一篇:
Python jvm.stop函数代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap