本文整理汇总了Python中weka.filters.Filter类的典型用法代码示例。如果您正苦于以下问题:Python Filter类的具体用法?Python Filter怎么用?Python Filter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Filter类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: filterUnusedFeatureFromList
def filterUnusedFeatureFromList(self, data, unusedFuncitonList):
filteredData = data
for attribute in unusedFuncitonList:
remove = Filter(classname="weka.filters.unsupervised.attribute.RemoveByName", options=["-E", "^" + attribute + ".*$"])
remove.set_inputformat(filteredData)
filteredData = remove.filter(filteredData)
return filteredData
开发者ID:zhaohengyang,项目名称:Android-malware-detection,代码行数:9,代码来源:weka_interface.py
示例2: attributeSelector
def attributeSelector(self, data, selectNum):
attributeSelector = Filter(classname="weka.filters.supervised.attribute.AttributeSelection",\
options=["-S", "weka.attributeSelection.Ranker -T -1.7976931348623157E308 -N " + str(selectNum),\
"-E", "weka.attributeSelection.InfoGainAttributeEval"])
attributeSelector.set_inputformat(data)
data = attributeSelector.filter(data)
return data
开发者ID:zhaohengyang,项目名称:Android-malware-detection,代码行数:10,代码来源:weka_interface.py
示例3: getSetDataBySetIndex
def getSetDataBySetIndex(self, data, index):
# cut feature set out
featureTable = FeatureTable()
startIndexList = featureTable.getEachSetStartIndex()
start = startIndexList[index]
end = startIndexList[index+1] - 1
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-V", "-R", str(start) + "-" + str(end) + ",last"])
remove.set_inputformat(data)
filteredData = remove.filter(data)
return filteredData
开发者ID:zhaohengyang,项目名称:Android-malware-detection,代码行数:11,代码来源:weka_interface.py
示例4: remove_correct_classified
def remove_correct_classified(self, invert = False):
options=[
'-W', self.classifier.to_commandline(),
'-C', str(self.class_index), #classindex
# '-F','0', # folds
# '-T','0.1', #threshold by numeric classes
'-I','0', # max iterations
'-V' if not invert else ''
] # invert
classname = "weka.filters.unsupervised.instance.RemoveMisclassified"
remove = Filter(classname=classname, options=options)
remove.inputformat(self.data)
self.data = remove.filter(self.data)
开发者ID:sbiastoch,项目名称:thesis,代码行数:13,代码来源:evaluate.py
示例5: emlimitateUnusedFeature
def emlimitateUnusedFeature(self, trainData, testData = None):
trainData.set_class_index(trainData.num_attributes() - 1) # set class attribute
featureIndex = -1
filteredTrainData = trainData
filteredTestData = testData
attribute_index = 0
while attribute_index < filteredTrainData.num_attributes() - 1:
sampleCoverage = 0
#print attribute_index
# check value for current feature in each instance
for instance_index in range(0, filteredTrainData.num_instances()):
instance = filteredTrainData.get_instance(instance_index)
value = instance.get_value(attribute_index)
if value > 0:
sampleCoverage += 1
if sampleCoverage == 0:
#print "found"
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", str(attribute_index+1)]) #The index in this function start from 1
remove.set_inputformat(filteredTrainData)
filteredTrainData = remove.filter(filteredTrainData)
if filteredTestData:
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", str(attribute_index+1)]) #The index in this function start from 1
remove.set_inputformat(filteredTestData)
filteredTestData = remove.filter(filteredTestData)
else:
attribute_index += 1
return [filteredTrainData, filteredTestData]
开发者ID:zhaohengyang,项目名称:Android-malware-detection,代码行数:32,代码来源:weka_interface.py
示例6: main
def main():
"""
Just runs some example code.
"""
# load a dataset
iris_file = helper.get_data_dir() + os.sep + "iris.arff"
helper.print_info("Loading dataset: " + iris_file)
loader = Loader("weka.core.converters.ArffLoader")
data = loader.load_file(iris_file)
# remove class attribute
data.delete_last_attribute()
# build a clusterer and output model
helper.print_title("Training SimpleKMeans clusterer")
clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
clusterer.build_clusterer(data)
print(clusterer)
helper.print_info("Evaluating on data")
evaluation = ClusterEvaluation()
evaluation.set_model(clusterer)
evaluation.test_model(data)
print("# clusters: " + str(evaluation.num_clusters))
print("log likelihood: " + str(evaluation.log_likelihood))
print("cluster assignments:\n" + str(evaluation.cluster_assignments))
plc.plot_cluster_assignments(evaluation, data, inst_no=True)
# using a filtered clusterer
helper.print_title("Filtered clusterer")
loader = Loader("weka.core.converters.ArffLoader")
data = loader.load_file(iris_file)
clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
fclusterer = FilteredClusterer()
fclusterer.clusterer = clusterer
fclusterer.filter = remove
fclusterer.build_clusterer(data)
print(fclusterer)
# load a dataset incrementally and build clusterer incrementally
helper.print_title("Incremental clusterer")
loader = Loader("weka.core.converters.ArffLoader")
iris_inc = loader.load_file(iris_file, incremental=True)
clusterer = Clusterer("weka.clusterers.Cobweb")
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
remove.inputformat(iris_inc)
iris_filtered = remove.outputformat()
clusterer.build_clusterer(iris_filtered)
for inst in loader:
remove.input(inst)
inst_filtered = remove.output()
clusterer.update_clusterer(inst_filtered)
clusterer.update_finished()
print(clusterer.to_commandline())
print(clusterer)
print(clusterer.graph)
plg.plot_dot_graph(clusterer.graph)
开发者ID:keypointt,项目名称:python-weka-wrapper-examples,代码行数:58,代码来源:clusterers.py
示例7: filterOutUnnecessaryAPIAndEvaluateOurApproach
def filterOutUnnecessaryAPIAndEvaluateOurApproach(self, ourApproahFile, apiFile, indexInTable, methodName, databaseTable, csvFilePath):
outputStr = methodName+","
resultList = []
# Get whole feature set of our approach
filteredData = self.load_Arff(ourApproahFile)
# Use this function to get selected API feature and save the unselected api in a list
filterOutList = self.attribueSelectionBasedOnRankingInDatabase(apiFile, indexInTable, databaseTable, "")[1]
# Remove unselected API
for functionName in filterOutList:
functionName = functionName.split("(")[0] + "\(\)"
functionName = functionName.replace('$','\$')
remove = Filter(classname="weka.filters.unsupervised.attribute.RemoveByName", options=["-E", "^" + functionName + ".*$"])
remove.set_inputformat(filteredData)
filteredData = remove.filter(filteredData)
featureNum = filteredData.num_attributes() - 1
print "featureNum: " + str(featureNum)
if csvFilePath != "":
self.writeTenScaledTitleManual(featureNum, csvFilePath)
#print "i:" + str(i)
#print "functionName:" + functionName
#print "featureNum: " + str(filteredData.num_attributes() - 1)
for attributeStr in filteredData.attributes():
print(attributeStr)
# Run ten scaled generation and evaluation
step = 10
while step < featureNum:
roundData = self.attributeSelector(filteredData, step)
classifier = self.algorithmPicker(roundData, indexInTable)
evaluation = self.evaluation(classifier, roundData)
#print(self.algorithmTable[indexInTable] + ": " + "{:.2f}".format(evaluation.percent_correct()) + ", Feature select number:" + str(roundData.num_attributes() - 1) + "/" + str(featureNum))
resultList.append("{:.2f}".format(evaluation.percent_correct()))
#csvFile.write("{:.2f}".format(evaluation.percent_correct()) +",")
step += 10
classifier = self.algorithmPicker(filteredData, indexInTable)
evaluation = self.evaluation(classifier, filteredData)
#print(self.algorithmTable[indexInTable] + ": " + "{:.2f}".format(evaluation.percent_correct()) + ", Feature select number:" + str(filteredData.num_attributes() - 1) + "/" + str(featureNum))
resultList.append("{:.2f}".format(evaluation.percent_correct()))
# Write out to CSV file
for item in resultList:
outputStr += item +","
outputStr = outputStr[0:-1] + "\n"
self.writeToPath(csvFilePath, outputStr)
开发者ID:zhaohengyang,项目名称:Android-malware-detection,代码行数:45,代码来源:weka_interface.py
示例8: use_filter
def use_filter(data):
"""
Uses the AttributeSelection filter for attribute selection.
:param data: the dataset to use
:type data: Instances
"""
print("\n2. Filter")
flter = Filter(classname="weka.filters.supervised.attribute.AttributeSelection")
aseval = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval")
assearch = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-B"])
flter.set_property("evaluator", aseval.jobject)
flter.set_property("search", assearch.jobject)
flter.inputformat(data)
filtered = flter.filter(data)
print(str(filtered))
开发者ID:keypointt,项目名称:python-weka-wrapper-examples,代码行数:15,代码来源:attribute_selection_test.py
示例9: _pre_process_to_classification
def _pre_process_to_classification(self, dataset):
filter_data = Filter(classname = 'weka.filters.unsupervised.attribute.MathExpression',
options = ['-unset-class-temporarily', '-E', "ifelse ( A>0, 1, 0 )",
'-V', '-R', 'last'])
filter_data.set_inputformat(dataset)
filtered = filter_data.filter(dataset)
discretize_data = Filter(classname = 'weka.filters.unsupervised.attribute.NumericToNominal',
options = ['-R', 'last'])
discretize_data.set_inputformat(filtered)
discretized = discretize_data.filter(filtered)
return discretized
开发者ID:jonmagal,项目名称:recsys_challenge,代码行数:15,代码来源:dataset.py
示例10: load
def load(path, db):
nominals = [
49, # dev_double_fp_config
50, # dev_endian_little
51, # dev_execution_capabilities
52, # dev_extensions
54, # dev_global_mem_cache_type
57, # dev_host_unified_memory
63, # dev_image_support
65, # dev_local_mem_type
96, # dev_queue_properties
97, # dev_single_fp_config
98, # dev_type
100, # dev_vendor_id
]
nominal_indices = ",".join([str(index) for index in nominals])
force_nominal = ["-N", nominal_indices]
# Load data from CSV.
dataset = Dataset.load_csv(path, options=force_nominal)
dataset.__class__ = Dataset
# Set class index and database connection.
dataset.class_index = -1
dataset.db = db
# Create string->nominal type attribute filter, ignoring the first
# attribute (scenario ID), since we're not classifying with it.
string_to_nominal = WekaFilter(classname=("weka.filters.unsupervised."
"attribute.StringToNominal"),
options=["-R", "2-last"])
string_to_nominal.inputformat(dataset.instances)
# Create filtered dataset, and swap data around.
filtered = string_to_nominal.filter(dataset.instances)
dataset.instances = filtered
return dataset
开发者ID:vianziro,项目名称:msc-thesis,代码行数:38,代码来源:dataset.py
示例11: Loader
from weka.core.converters import Loader
from weka.core.classes import Random
from weka.classifiers import Classifier, Evaluation, PredictionOutput
from weka.filters import Filter
jvm.start()
# load diabetes
loader = Loader(classname="weka.core.converters.ArffLoader")
fname = data_dir + os.sep + "diabetes.arff"
print("\nLoading dataset: " + fname + "\n")
data = loader.load_file(fname)
# we'll set the class attribute after filtering
# apply NominalToBinary filter and set class attribute
fltr = Filter("weka.filters.unsupervised.attribute.NominalToBinary")
fltr.inputformat(data)
filtered = fltr.filter(data)
filtered.class_is_last()
# cross-validate LinearRegression on filtered data, display model
cls = Classifier(classname="weka.classifiers.functions.LinearRegression")
pout = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.PlainText")
evl = Evaluation(filtered)
evl.crossvalidate_model(cls, filtered, 10, Random(1), pout)
print("10-fold cross-validation:\n" + evl.summary())
print("Predictions:\n\n" + str(pout))
cls.build_classifier(filtered)
print("Model:\n\n" + str(cls))
# use AddClassification filter with LinearRegression on filtered data
开发者ID:fracpete,项目名称:wekamooc,代码行数:31,代码来源:class-4.3.py
示例12: remove_attributes
def remove_attributes(self, *attributes):
indices = [self.attribute_index(x) for x in attributes]
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove",
options=["-R", ','.join(str(x + 1) for x in indices)])
remove.inputformat(self.instances)
self.instances = remove.filter(self.instances)
开发者ID:ChrisCummins,项目名称:phd,代码行数:6,代码来源:ml.py
示例13: Loader
if (len_email > 0) and (len_content > 0):
writer.writerow(row)
# close csvfile
csvfile.close()
# start JVM
jvm.start()
# load CSV file
loader = Loader(classname="weka.core.converters.CSVLoader", options=["-E", '"', "-F", ","])
data = loader.load_file(csvfilename)
#print(data)
# convert class to nominal
wfilter = Filter(classname="weka.filters.unsupervised.attribute.StringToNominal", options=["-R", "last"])
wfilter.set_inputformat(data)
data = wfilter.filter(data)
# convert content to string
wfilter = Filter(classname="weka.filters.unsupervised.attribute.NominalToString", options=["-C", "first"])
wfilter.set_inputformat(data)
data = wfilter.filter(data)
# set class attribute
data.set_class_index(data.num_attributes() - 1)
# generate baseline
zeror = Classifier(classname="weka.classifiers.rules.ZeroR")
evaluation = Evaluation(data)
evaluation.crossvalidate_model(zeror, data, 10, Random(1))
开发者ID:Br3nda,项目名称:meetings,代码行数:31,代码来源:list.py
示例14: main
def main():
"""
Just runs some example code.
"""
# load a dataset
iris_file = helper.get_data_dir() + os.sep + "iris.arff"
helper.print_info("Loading dataset: " + iris_file)
loader = Loader("weka.core.converters.ArffLoader")
iris_data = loader.load_file(iris_file)
iris_data.class_is_last()
# classifier help
helper.print_title("Creating help string")
classifier = Classifier(classname="weka.classifiers.trees.J48")
print(classifier.to_help())
# partial classname
helper.print_title("Creating classifier from partial classname")
clsname = ".J48"
classifier = Classifier(classname=clsname)
print(clsname + " --> " + classifier.classname)
# classifier from commandline
helper.print_title("Creating SMO from command-line string")
cmdline = 'weka.classifiers.functions.SMO -K "weka.classifiers.functions.supportVector.NormalizedPolyKernel -E 3.0"'
classifier = from_commandline(cmdline, classname="weka.classifiers.Classifier")
classifier.build_classifier(iris_data)
print("input: " + cmdline)
print("output: " + classifier.to_commandline())
print("model:\n" + str(classifier))
# kernel classifier
helper.print_title("Creating SMO as KernelClassifier")
kernel = Kernel(classname="weka.classifiers.functions.supportVector.RBFKernel", options=["-G", "0.001"])
classifier = KernelClassifier(classname="weka.classifiers.functions.SMO", options=["-M"])
classifier.kernel = kernel
classifier.build_classifier(iris_data)
print("classifier: " + classifier.to_commandline())
print("model:\n" + str(classifier))
# build a classifier and output model
helper.print_title("Training J48 classifier on iris")
classifier = Classifier(classname="weka.classifiers.trees.J48")
# Instead of using 'options=["-C", "0.3"]' in the constructor, we can also set the "confidenceFactor"
# property of the J48 classifier itself. However, being of type float rather than double, we need
# to convert it to the correct type first using the double_to_float function:
classifier.set_property("confidenceFactor", typeconv.double_to_float(0.3))
classifier.build_classifier(iris_data)
print(classifier)
print(classifier.graph)
print(classifier.to_source("MyJ48"))
plot_graph.plot_dot_graph(classifier.graph)
# evaluate model on test set
helper.print_title("Evaluating J48 classifier on iris")
evaluation = Evaluation(iris_data)
evl = evaluation.test_model(classifier, iris_data)
print(evl)
print(evaluation.summary())
# evaluate model on train/test split
helper.print_title("Evaluating J48 classifier on iris (random split 66%)")
classifier = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"])
evaluation = Evaluation(iris_data)
evaluation.evaluate_train_test_split(classifier, iris_data, 66.0, Random(1))
print(evaluation.summary())
# load a dataset incrementally and build classifier incrementally
helper.print_title("Build classifier incrementally on iris")
helper.print_info("Loading dataset: " + iris_file)
loader = Loader("weka.core.converters.ArffLoader")
iris_inc = loader.load_file(iris_file, incremental=True)
iris_inc.class_is_last()
classifier = Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable")
classifier.build_classifier(iris_inc)
for inst in loader:
classifier.update_classifier(inst)
print(classifier)
# construct meta-classifiers
helper.print_title("Meta classifiers")
# generic FilteredClassifier instantiation
print("generic FilteredClassifier instantiation")
meta = SingleClassifierEnhancer(classname="weka.classifiers.meta.FilteredClassifier")
meta.classifier = Classifier(classname="weka.classifiers.functions.LinearRegression")
flter = Filter("weka.filters.unsupervised.attribute.Remove")
flter.options = ["-R", "first"]
meta.set_property("filter", flter.jobject)
print(meta.to_commandline())
# direct FilteredClassifier instantiation
print("direct FilteredClassifier instantiation")
meta = FilteredClassifier()
meta.classifier = Classifier(classname="weka.classifiers.functions.LinearRegression")
flter = Filter("weka.filters.unsupervised.attribute.Remove")
flter.options = ["-R", "first"]
meta.filter = flter
print(meta.to_commandline())
# generic Vote
print("generic Vote instantiation")
#.........这里部分代码省略.........
开发者ID:fracpete,项目名称:python-weka-wrapper3-examples,代码行数:101,代码来源:classifiers.py
示例15: run_classifier
def run_classifier(path, prot, sel, cols, prot_vals, beta):
DIs = dict()
jvm.start()
for i in range(len(cols)-1):
loader = Loader(classname="weka.core.converters.CSVLoader")
data = loader.load_file(path)
# remove selected attribute from the data
# NOTE: options are ONE indexed, not ZERO indexed
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", \
options=["-R", str(sel[2]+1)])
remove.inputformat(data)
data = remove.filter(data)
# if running for only one attribue, remove all others (except protected)
if i > 0:
for j in range(1, prot[2]+1):
if i != j:
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", \
options=["-R", ("1" if i>j else "2")])
remove.inputformat(data)
data = remove.filter(data)
# set prot attribute as Class attribute
data.class_is_last()
# run classifier
cls = Classifier(classname="weka.classifiers.bayes.NaiveBayes")
cls.build_classifier(data)
# count the number of each combination
pos_and_pred = float(0.0)
pos_and_not_pred = float(0.0)
neg_and_pred = float(0.0)
neg_and_not_pred = float(0.0)
for ind, inst in enumerate(data):
if cls.classify_instance(inst):
if prot_vals[ind] == prot[1]:
pos_and_pred += 1
else:
neg_and_pred += 1
else:
if prot_vals[ind] == prot[1]:
pos_and_not_pred += 1
else:
neg_and_not_pred += 1
# calculate DI
BER = ((pos_and_not_pred / (pos_and_pred + pos_and_not_pred)) + \
(neg_and_pred / (neg_and_pred + neg_and_not_pred))) * 0.5
if BER > 0.5:
BER = 1 - BER
DI = 1 - ((1 - 2 * BER) / (beta + 1 - 2 * BER))
if i == 0: # consider changing this to a 'code word' instead of 'all'
DIs["all"] = DI
else:
DIs[cols[i-1]] = DI
jvm.stop()
return DIs
开发者ID:sorelle,项目名称:fairdata,代码行数:64,代码来源:main.py
示例16: usage
usage()
return 1
options = {'idFlag':True, 'weightFlag': False, 'rmClassFlag': False, 'rmClass': 0}
# read the first dataset
fn = inputList[0]
fid = FileReader(fn)
Data = Instances(fid)
Data, IDs = PreprocessData(Data,options)
# remove class label
attributeremove = AttributeRemove()
attributeremove.setInvertSelection(Boolean(False)) # remove class labels from dataset
attributeremove.setAttributeIndices(String(str(Data.numAttributes())))
attributeremove.setInputFormat(Data)
newData = Filter.useFilter(Data, attributeremove)
# loop over input arff file
cnt = Data.numAttributes()
for fnCnt in range(1,len(inputList)):
fn = inputList[fnCnt]
fid = FileReader(fn)
Data = Instances(fid)
Data, IDs = PreprocessData(Data,options)
# remove class label
attributeremove = AttributeRemove()
attributeremove.setInvertSelection(Boolean(True)) # remove every attribute but the last one which is class label
attributeremove.setAttributeIndices(String(str(Data.numAttributes())))
attributeremove.setInputFormat(Data)
labels = Filter.useFilter(Data, attributeremove)
attributeremove = AttributeRemove()
attributeremove.setInvertSelection(Boolean(False)) # remove class labels from dataset
开发者ID:kayhan-batmanghelich,项目名称:gondola,代码行数:31,代码来源:mergeArffFiles.py
示例17: main
def main():
"""
Just runs some example code.
"""
# load a dataset
data_file = helper.get_data_dir() + os.sep + "vote.arff"
helper.print_info("Loading dataset: " + data_file)
loader = Loader("weka.core.converters.ArffLoader")
data = loader.load_file(data_file)
data.class_is_last()
# classifier
classifier = Classifier(classname="weka.classifiers.trees.J48")
# randomize data
folds = 10
seed = 1
rnd = Random(seed)
rand_data = Instances.copy_instances(data)
rand_data.randomize(rnd)
if rand_data.class_attribute.is_nominal:
rand_data.stratify(folds)
# perform cross-validation and add predictions
predicted_data = None
evaluation = Evaluation(rand_data)
for i in xrange(folds):
train = rand_data.train_cv(folds, i)
# the above code is used by the StratifiedRemoveFolds filter,
# the following code is used by the Explorer/Experimenter
# train = rand_data.train_cv(folds, i, rnd)
test = rand_data.test_cv(folds, i)
# build and evaluate classifier
cls = Classifier.make_copy(classifier)
cls.build_classifier(train)
evaluation.test_model(cls, test)
# add predictions
addcls = Filter(
classname="weka.filters.supervised.attribute.AddClassification",
options=["-classification", "-distribution", "-error"])
# setting the java object directory avoids issues with correct quoting in option array
addcls.set_property("classifier", Classifier.make_copy(classifier))
addcls.inputformat(train)
addcls.filter(train) # trains the classifier
pred = addcls.filter(test)
if predicted_data is None:
predicted_data = Instances.template_instances(pred, 0)
for n in xrange(pred.num_instances):
predicted_data.add_instance(pred.get_instance(n))
print("")
print("=== Setup ===")
print("Classifier: " + classifier.to_commandline())
print("Dataset: " + data.relationname)
print("Folds: " + str(folds))
print("Seed: " + str(seed))
print("")
print(evaluation.summary("=== " + str(folds) + " -fold Cross-Validation ==="))
print("")
print(predicted_data)
开发者ID:fracpete,项目名称:python-weka-wrapper-examples,代码行数:63,代码来源:crossvalidation_addprediction.py
示例18: Loader
jvm.start()
# load diabetes
loader = Loader(classname="weka.core.converters.ArffLoader")
fname = data_dir + os.sep + "diabetes.arff"
print("\nLoading dataset: " + fname + "\n")
data = loader.load_file(fname)
data.class_is_last()
# simulate the 10 train/test pairs of cross-validation
evl = Evaluation(data)
for i in xrange(1, 11):
# create train set
remove = Filter(
classname="weka.filters.supervised.instance.StratifiedRemoveFolds",
options=["-N", "10", "-F", str(i), "-S", "1", "-V"])
remove.inputformat(data)
train = remove.filter(data)
# create test set
remove = Filter(
classname="weka.filters.supervised.instance.StratifiedRemoveFolds",
options=["-N", "10", "-F", str(i), "-S", "1"])
remove.inputformat(data)
test = remove.filter(data)
cls = Classifier(classname="weka.classifiers.trees.J48")
cls.build_classifier(train)
evl.test_model(cls, test)
开发者ID:fracpete,项目名称:wekamooc,代码行数:29,代码来源:class-2.5.py
示例19: attribueSelectionBasedOnRankingInDatabase
def attribueSelectionBasedOnRankingInDatabase(self, trainingData, indexInTable, databaseTable, csvFilePath, testingData = None):
featureNum = trainingData.num_attributes() - 1
outputStr = ""
outputStr += databaseTable+","
# select from database vector difference
featureList3 = []
wholefeatureList = []
dbmgr = permissionMappingManager(databasePath)
for row in dbmgr.query("select * from " + databaseTable):
featureList3.append(row[0])
wholefeatureList.append(row[0])
#featureList3.reverse()
bestRemainFilterList = []
resultList = []
digit = len(featureList3) % 10
bestAccuracy = 0
bestTrainingData = None
bestTestingData = None
bestEvaluation = None
classifier = self.algorithmPicker(trainingData, indexInTable)
evaluation = self.evaluation(classifier, trainingData, testingData)
if evaluation.percent_correct() >= bestAccuracy:
bestAccuracy = evaluation.percent_correct()
bestTrainingData = trainingData
bestTestingData = testingData
bestRemainFilterList = list(featureList3)
bestEvaluation = evaluation
print(self.algorithmTable[indexInTable] + ": " + "{:.2f}".format(evaluation.percent_correct()) + ", Feature select number:" + str(trainingData.num_attributes() - 1) + "/" + str(featureNum))
resultList.append("{:.2f}".format(evaluation.percent_correct()))
if digit > 0:
for i in range(0, digit):
functionName = featureList3.pop().split("(")[0] + "\(\)"
functionName = functionName.replace('$','\$')
#print "functionName:" + functionName
remove = Filter(classname="weka.filters.unsupervised.attribute.RemoveByName", options=["-E", "^" + functionName + ".*$"])
remove.set_inputformat(trainingData)
trainingData = remove.filter(trainingData)
if testingData:
remove.set_inputformat(testingData)
testingData = remove.filter(testingData)
#print "i:" + str(i)
#print "functionName:" + functionName
#print "featureNum: " + str(filteredData.num_attributes() - 1)
#for attributeStr in trainingData.attributes():
# print(attributeStr)
#self.printFunctionInfo(trainingData, trainingData.num_instances())
classifier = self.algorithmPicker(trainingData, indexInTable)
evaluation = self.evaluation(classifier, trainingData, testingData)
if evaluation.percent_correct() >= bestAccuracy:
bestAccuracy = evaluation.percent_correct()
bestTrainingData = trainingData
bestTestingData = testingData
bestRemainFilterList = list(featureList3)
bestEvaluation = evaluation
print(self.algorithmTable[indexInTable] + ": " + "{:.2f}".format(evaluation.percent_correct()) + ", Feature select number:" + str(trainingData.num_attributes() - 1) + "/" + str(featureNum))
resultList.append("{:.2f}".format(evaluation.percent_correct()))
while trainingData.num_attributes() - 1 > 10:
for i in range(0,10):
functionName = featureList3.pop().split("(")[0] + "\(\)"
functionName = functionName.replace('$','\$')
#print "functionName:" + functionName
remove = Filter(classname="weka.filters.unsupervised.attribute.RemoveByName", options=["-E", "^" + functionName + ".*$"])
remove.set_inputformat(trainingData)
trainingData = remove.filter(trainingData)
if testingData:
remove.set_inputformat(testingData)
testingData = remove.filter(testingData)
#print functionName
#print "featureNum: " + str(filteredData.num_attributes() - 1)
#for attributeStr in trainingData.attributes():
# print(attributeStr)
classifier = self.algorithmPicker(trainingData, indexInTable)
evaluation = self.evaluation(classifier, trainingData, testingData)
if evaluation.percent_correct() >= bestAccuracy:
bestAccuracy = evaluation.percent_correct()
bestTrainingData = trainingData
bestTestingData = testingData
bestRemainFilterList = list(featureList3)
bestEvaluation = evaluation
#print "update feature number:" + str(len(bestRemainFilterList))
print(self.algorithmTable[indexInTable] + ": " + "{:.2f}".format(evaluation.percent_correct()) + ", Feature select number:" + str(trainingData.num_attributes() - 1) + "/" + str(featureNum))
resultList.append("{:.2f}".format(evaluation.percent_correct()))
resultList.reverse()
#.........这里部分代码省略.........
开发者ID:zhaohengyang,项目名称:Android-malware-detection,代码行数:101,代码来源:weka_interface.py
示例20: print
from weka.core.converters import Loader, Saver
from weka.core.dataset import Instances
from weka.filters import Filter
jvm.start()
# load weather.nominal
fname = data_dir + os.sep + "weather.nominal.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)
# output header
print(Instances.template_instances(data))
# remove attribute no 3
print("\nRemove attribute no 3")
fltr = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "3"])
fltr.set_inputformat(data)
filtered = fltr.filter(data)
# output header
print(Instances.template_instances(filtered))
# save modified dataset
saver = Saver(classname="weka.core.converters.ArffSaver")
saver.save_file(filtered, data_dir + os.sep + "weather.nominal-filtered.arff")
jvm.stop()
开发者ID:echavarria,项目名称:wekamooc,代码行数:29,代码来源:class-1.5.py
注:本文中的weka.filters.Filter类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论