本文整理汇总了Python中spambayes.Options.get_pathname_option函数的典型用法代码示例。如果您正苦于以下问题:Python get_pathname_option函数的具体用法?Python get_pathname_option怎么用?Python get_pathname_option使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了get_pathname_option函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: unlearn_compare
def unlearn_compare(nsets, unsets):
print options.display()
spamdirs = [get_pathname_option("TestDriver", "spam_directories") % i for i in range(1, nsets+1)]
hamdirs = [get_pathname_option("TestDriver", "ham_directories") % i for i in range(1, nsets+1)]
spamhamdirs = zip(spamdirs, hamdirs)
unspamdirs = [get_pathname_option("TestDriver", "spam_directories") % i for i in range(1, unsets+1)]
unhamdirs = [get_pathname_option("TestDriver", "ham_directories") % i for i in range(1, unsets+1)]
unspamhamdirs = zip(unspamdirs, unhamdirs)
d = TestDriver.Driver()
d.new_classifier()
"""
for spamdir, hamdir in spamhamdirs:
d.train(msgs.HamStream(hamdir, [hamdir]),
msgs.SpamStream(spamdir, [spamdir]))
"""
d.train(msgs.HamStream(hamdirs[0], [hamdirs[0]]),
msgs.SpamStream(spamdirs[0], [spamdirs[0]]))
d.train(msgs.HamStream(hamdirs[1], [hamdirs[1]]),
msgs.SpamStream(spamdirs[1], [spamdirs[1]]))
d.test(msgs.HamStream(hamdirs[2], [hamdirs[2]]),
msgs.SpamStream(spamdirs[2], [spamdirs[2]]))
d.finishtest()
d.alldone()
unlearn_driver(d, spamhamdirs, unspamhamdirs)
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:27,代码来源:alex^2test.py
示例2: createWorkers
def createWorkers(self):
"""Using the options that were initialised in __init__ and then
possibly overridden by the driver code, create the Bayes object,
the Corpuses, the Trainers and so on."""
print "Loading database...",
if self.isTest:
self.useDB = "pickle"
self.DBName = '_pop3proxy_test.pickle' # This is never saved.
if not hasattr(self, "DBName"):
self.DBName, self.useDB = storage.database_type([])
self.bayes = storage.open_storage(self.DBName, self.useDB)
self.buildStatusStrings()
# Don't set up the caches and training objects when running the self-test,
# so as not to clutter the filesystem.
if not self.isTest:
def ensureDir(dirname):
try:
os.mkdir(dirname)
except OSError, e:
if e.errno != errno.EEXIST:
raise
# Create/open the Corpuses. Use small cache sizes to avoid hogging
# lots of memory.
sc = get_pathname_option("Storage", "spam_cache")
hc = get_pathname_option("Storage", "ham_cache")
uc = get_pathname_option("Storage", "unknown_cache")
map(ensureDir, [sc, hc, uc])
if self.gzipCache:
factory = GzipFileMessageFactory()
else:
factory = FileMessageFactory()
age = options["Storage", "cache_expiry_days"]*24*60*60
self.spamCorpus = ExpiryFileCorpus(age, factory, sc,
'[0123456789\-]*',
cacheSize=20)
self.hamCorpus = ExpiryFileCorpus(age, factory, hc,
'[0123456789\-]*',
cacheSize=20)
self.unknownCorpus = ExpiryFileCorpus(age, factory, uc,
'[0123456789\-]*',
cacheSize=20)
# Given that (hopefully) users will get to the stage
# where they do not need to do any more regular training to
# be satisfied with spambayes' performance, we expire old
# messages from not only the trained corpora, but the unknown
# as well.
self.spamCorpus.removeExpiredMessages()
self.hamCorpus.removeExpiredMessages()
self.unknownCorpus.removeExpiredMessages()
# Create the Trainers.
self.spamTrainer = storage.SpamTrainer(self.bayes)
self.hamTrainer = storage.HamTrainer(self.bayes)
self.spamCorpus.addObserver(self.spamTrainer)
self.hamCorpus.addObserver(self.hamTrainer)
开发者ID:Xodarap,项目名称:Eipi,代码行数:59,代码来源:sb_server.py
示例3: drive
def drive():
print options.display()
spam = [get_pathname_option("TestDriver", "spam_directories") % i for i in range(1, 5)]
ham = [get_pathname_option("TestDriver", "ham_directories") % i for i in range(1, 5)]
d = dictionarywriter.DictionaryWriter(150, 4)
d.write()
keep_going = True
trial_number = 1
au = ActiveUnlearnDriver.ActiveUnlearner([msgs.HamStream(ham[1], [ham[1]]),
msgs.HamStream(ham[2], [ham[2]])],
[msgs.SpamStream(spam[1], [spam[1]]),
msgs.SpamStream(spam[3], [spam[3]])],
msgs.HamStream(ham[0], [ham[0]]),
msgs.SpamStream(spam[0], [spam[0]]),
)
with open("C:\Users\Alex\Desktop\dict_correlation_stats.txt", 'w') as outfile:
while keep_going:
chosen = set()
current = au.select_initial()
cluster = au.determine_cluster(current)
chosen.add(current)
au.driver.test(au.testing_ham, au.testing_spam)
while not cluster:
current = au.select_initial(chosen)
cluster = au.determine_cluster(current)
chosen.add(current)
au.driver.test(au.testing_ham, au.testing_spam)
cluster_list = list(cluster.cluster_set)
dicts = au.driver.tester.train_examples[2]
data = v_correlation(cluster_list, dicts)
outfile.write("Trial " + str(trial_number) + " Percentage Overlap (Correlation): " + str(data))
answer = raw_input("Keep going (y/n)? You have performed " + str(trial_number) + " trial(s) so far. ")
valid_input = False
while not valid_input:
if answer == "n":
keep_going = False
valid_input = True
elif answer == "y":
au.learn(cluster)
au.init_ground()
trial_number += 1
valid_input = True
else:
print "Please enter either y or n."
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:58,代码来源:dict_correlation_test.py
示例4: __init__
def __init__(self, spam_feature=None, ham_feature=None, inject_type=0):
self.h_injected = get_pathname_option("TestDriver", "ham_directories") % 3 + "/"
self.s_injected = get_pathname_option("TestDriver", "spam_directories") % 3 + "/"
if inject_type is 0:
self.feature = spam_feature
elif inject_type is 1:
self.feature = ham_feature
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:9,代码来源:InjectionPollution.py
示例5: main
def main():
ham = [get_pathname_option("TestDriver", "ham_directories") % i for i in range(1, 5)]
spam = [get_pathname_option("TestDriver", "spam_directories") % i for i in range(1, 5)]
t = TestDriver.Driver()
t.train(msgs.HamStream(ham[0], [ham[0]]), msgs.SpamStream(spam[0], [spam[0]]))
t.dict_test(msgs.HamStream(ham[2], [ham[2]]), msgs.SpamStream(spam[3], [spam[3]]))
print "Test sizes: ", len(t.tester.truth_examples[0]), ", ", len(t.tester.truth_examples[1]), "\n"
print "Detection rate:", t.tester.correct_classification_rate(), "\n"
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:9,代码来源:dict_mem_test.py
示例6: main
def main():
ham = [get_pathname_option("TestDriver", "ham_directories") % i for i in range(1, 4)]
spam = [get_pathname_option("TestDriver", "spam_directories") % i for i in range(1, 4)]
sizes = [0, 60, 120, 240, 480, 840, 1200, 2400, 3600, 4800, 6000]
d = TestDriver.Driver()
d.new_classifier()
detection_rates = []
target_rates = []
false_positives = []
false_negatives = []
unsures = []
for size in sizes:
mislabeler = MislabeledFileMover(size)
mislabeler.random_move_file()
d.train(msgs.HamStream(ham[0], [ham[0]]),
msgs.SpamStream(spam[0], [spam[0]]))
d.test(msgs.HamStream(ham[1], [ham[1]]),
msgs.SpamStream(spam[1], [spam[1]]))
target_rate = d.tester.correct_classification_rate()
target_rates.append(target_rate)
d.train(msgs.HamStream(ham[2], [ham[2]]),
msgs.SpamStream(spam[2], [spam[2]]))
d.test(msgs.HamStream(ham[1], [ham[1]]),
msgs.SpamStream(spam[1], [spam[1]]))
detection_rate = d.tester.correct_classification_rate()
detection_rates.append(detection_rate)
fp = d.tester.nham_wrong
false_positives.append(fp)
fn = d.tester.nspam_wrong
false_negatives.append(fn)
unsure = d.tester.nham_unsure + d.tester.nspam_unsure
unsures.append(unsure)
d.untrain(msgs.HamStream(ham[0], [ham[0]]),
msgs.SpamStream(spam[0], [spam[0]]))
d.untrain(msgs.HamStream(ham[2], [ham[2]]),
msgs.SpamStream(spam[2], [spam[2]]))
mislabeler.reset()
with open("/Users/AlexYang/Desktop/hamasspam.txt", 'w') as outfile:
outfile.write(tabulate({"# of Mislabeled Words": sizes,
"Detection Rates": detection_rates,
"Target Rates": target_rates},
headers="keys", tablefmt="plain"))
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:57,代码来源:vanillatest.py
示例7: create_workers
def create_workers(self):
"""Using the options that were initialised in __init__ and then
possibly overridden by the driver code, create the Bayes object,
the Corpuses, the Trainers and so on."""
if self.is_test:
self.use_db = "pickle"
self.db_name = '_core_server.pickle' # This is never saved.
if not hasattr(self, "db_name"):
self.db_name, self.use_db = storage.database_type([])
self.bayes = storage.open_storage(self.db_name, self.use_db)
# Load stats manager.
self.stats = Stats.Stats(options,
spambayes.message.Message().message_info_db)
self.build_status_strings()
# Don't set up the caches and training objects when running the
# self-test, so as not to clutter the filesystem.
if not self.is_test:
# Create/open the Corpuses. Use small cache sizes to avoid
# hogging lots of memory.
sc = get_pathname_option("Storage", "core_spam_cache")
hc = get_pathname_option("Storage", "core_ham_cache")
uc = get_pathname_option("Storage", "core_unknown_cache")
for d in [sc, hc, uc]:
storage.ensureDir(d)
if self.gzip_cache:
factory = GzipFileMessageFactory()
else:
factory = FileMessageFactory()
age = options["Storage", "cache_expiry_days"]*24*60*60
self.spamCorpus = ExpiryFileCorpus(age, factory, sc,
'[0123456789\-]*',
cacheSize=20)
self.hamCorpus = ExpiryFileCorpus(age, factory, hc,
'[0123456789\-]*',
cacheSize=20)
self.unknownCorpus = ExpiryFileCorpus(age, factory, uc,
'[0123456789\-]*',
cacheSize=20)
# Given that (hopefully) users will get to the stage
# where they do not need to do any more regular training to
# be satisfied with spambayes' performance, we expire old
# messages from not only the trained corpora, but the unknown
# as well.
self.spamCorpus.removeExpiredMessages()
self.hamCorpus.removeExpiredMessages()
self.unknownCorpus.removeExpiredMessages()
# Create the Trainers.
self.spam_trainer = storage.SpamTrainer(self.bayes)
self.ham_trainer = storage.HamTrainer(self.bayes)
self.spamCorpus.addObserver(self.spam_trainer)
self.hamCorpus.addObserver(self.ham_trainer)
开发者ID:dbrandt,项目名称:spambayes-lite,代码行数:56,代码来源:CoreUI.py
示例8: drive
def drive(nsets):
print options.display()
spamdirs = [get_pathname_option("TestDriver", "spam_directories") % i for i in range(1, nsets+1)]
hamdirs = [get_pathname_option("TestDriver", "ham_directories") % i for i in range(1, nsets+1)]
d = TestDriver.Driver()
d.new_classifier()
d.train(msgs.HamStream(hamdirs[0], [hamdirs[0]]), msgs.SpamStream(spamdirs[0], [spamdirs[0]]))
d.test(msgs.HamStream(hamdirs[1], [hamdirs[1]]), msgs.SpamStream(spamdirs[1], [spamdirs[1]]))
d.finishtest()
d.alldone()
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:12,代码来源:alex^2test.py
示例9: drive
def drive(nsets, decision):
print options.display()
spamdirs = [get_pathname_option("TestDriver", "spam_directories") % i for i in range(1, nsets + 1)]
hamdirs = [get_pathname_option("TestDriver", "ham_directories") % i for i in range(1, nsets + 1)]
spamfns = [(x, y, 1) for x in spamdirs for y in os.listdir(x)]
hamfns = [(x, y, 0) for x in hamdirs for y in os.listdir(x)]
nham = len(hamfns)
nspam = len(spamfns)
cc = CostCounter.nodelay()
allfns = {}
for fn in spamfns + hamfns:
allfns[fn] = None
d = hammie.open("weaktest.db", False)
hamtrain = 0
spamtrain = 0
n = 0
for dir, name, is_spam in allfns.iterkeys():
n += 1
m = msgs.Msg(dir, name).guts
if debug > 1:
print "trained:%dH+%dS" % (hamtrain, spamtrain)
scr = d.score(m)
if debug > 1:
print "score:%.3f" % scr
if not decision.tooearly():
if is_spam:
if debug > 0:
print "Spam with score %.2f" % scr
cc.spam(scr)
else:
if debug > 0:
print "Ham with score %.2f" % scr
cc.ham(scr)
de = decision(scr, is_spam)
if de == TRAIN_AS_SPAM:
d.train_spam(m)
spamtrain += 1
elif de == TRAIN_AS_HAM:
d.train_ham(m)
hamtrain += 1
if n % 100 == 0:
print "%5d trained:%dH+%dS wrds:%d" % (n, hamtrain, spamtrain, len(d.bayes.wordinfo))
print cc
print "=" * 70
print "%5d trained:%dH+%dS wrds:%d" % (n, hamtrain, spamtrain, len(d.bayes.wordinfo))
print cc
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:52,代码来源:weaktest.py
示例10: drive
def drive(nsets):
print options.display()
hamdirs = [get_pathname_option("TestDriver", "ham_directories") % \
i for i in range(1, nsets+1)]
spamdirs = [get_pathname_option("TestDriver", "spam_directories") % \
i for i in range(1, nsets+1)]
d = TestDriver.Driver()
# Train it on all sets except the first.
d.train(msgs.HamStream("%s-%d" % (hamdirs[1], nsets),
hamdirs[1:], train=1),
msgs.SpamStream("%s-%d" % (spamdirs[1], nsets),
spamdirs[1:], train=1))
# Now run nsets times, predicting pair i against all except pair i.
for i in range(nsets):
h = hamdirs[i]
s = spamdirs[i]
hamstream = msgs.HamStream(h, [h], train=0)
spamstream = msgs.SpamStream(s, [s], train=0)
if i > 0:
if options["CV Driver", "build_each_classifier_from_scratch"]:
# Build a new classifier from the other sets.
d.new_classifier()
hname = "%s-%d, except %d" % (hamdirs[0], nsets, i+1)
h2 = hamdirs[:]
del h2[i]
sname = "%s-%d, except %d" % (spamdirs[0], nsets, i+1)
s2 = spamdirs[:]
del s2[i]
d.train(msgs.HamStream(hname, h2, train=1),
msgs.SpamStream(sname, s2, train=1))
else:
# Forget this set.
d.untrain(hamstream, spamstream)
# Predict this set.
d.test(hamstream, spamstream)
d.finishtest()
if i < nsets - 1 and not options["CV Driver",
"build_each_classifier_from_scratch"]:
# Add this set back in.
d.train(hamstream, spamstream)
d.alldone()
开发者ID:ehuelsmann,项目名称:spambayes,代码行数:52,代码来源:timcv.py
示例11: main
def main():
ham = [get_pathname_option("TestDriver", "ham_directories") % i for i in range(1, 4)]
spam = [get_pathname_option("TestDriver", "spam_directories") % i for i in range(1, 4)]
injected = get_pathname_option("TestDriver", "spam_directories") % 3
au = ActiveUnlearnDriver.ActiveUnlearner([msgs.HamStream(ham[0], [ham[0]]), msgs.HamStream(ham[2], [ham[2]])],
[msgs.SpamStream(spam[0], [spam[0]]), msgs.SpamStream(spam[2], [spam[2]])],
msgs.HamStream(ham[1], [ham[1]]), msgs.SpamStream(spam[1], [spam[1]]))
msg = choice(au.driver.tester.train_examples[2]) # Randomly chosen from Ham Set3
original_rate = au.driver.tester.correct_classification_rate()
cluster_sizes = []
detection_rates = []
target_cluster_rates = []
sizes = []
for i in range(150, 1050, 50):
sizes.append(i)
for i in range(1000, 15000, 1000):
sizes.append(i)
for size in sizes:
cluster = ActiveUnlearnDriver.Cluster(msg, size, au, "extreme")
print "Clustering with size " + str(cluster.size) + "..."
cluster_sizes.append(size)
detection_rates.append(au.detect_rate(cluster))
target_cluster_rates.append(float(cluster.target_set3()) / float(cluster.size))
file = open("/Users/AlexYang/Desktop/clues.txt", 'w')
features = au.driver.classifier._getclues(msg)
i = 1
for feature in features:
file.write(str(i) + ") ")
file.write(str(feature) + "\n")
i += 1
with open("/Users/AlexYang/Desktop/clusterstats.txt", 'w') as outfile:
outfile.write("Clustered around: " + msg.tag)
outfile.write("\nOriginal Rate: " + str(original_rate) + "\n")
outfile.write(tabulate({"Cluster Sizes": cluster_sizes,
"Detection Rates": detection_rates,
"% of Targets Clustered": target_cluster_rates},
headers="keys", tablefmt="plain"))
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:48,代码来源:clustest.py
示例12: database_type
def database_type(opts):
"""Return the name of the database and the type to use. The output of
this function can be used as the db_type parameter for the open_storage
function, for example:
[standard getopts code]
db_name, db_type = database_types(opts)
storage = open_storage(db_name, db_type)
The selection is made based on the options passed, or, if the
appropriate options are not present, the options in the global
options object.
Currently supports:
-p : pickle
-d : dbm
"""
nm, typ = None, None
for opt, arg in opts:
if _storage_options.has_key(opt):
if nm is None and typ is None:
nm, typ = arg, _storage_options[opt]
else:
raise MutuallyExclusiveError()
if nm is None and typ is None:
typ = options["Storage", "persistent_use_database"]
if typ is True or typ == "True":
typ = "dbm"
elif typ is False or typ == "False":
typ = "pickle"
nm = get_pathname_option("Storage", "persistent_storage_file")
return nm, typ
开发者ID:Xodarap,项目名称:Eipi,代码行数:32,代码来源:storage.py
示例13: main
def main():
print "Pickle is available."
db = dumbdbm.open("dumbdb", "c")
db["1"] = "1"
db.close()
dbstr = whichdb.whichdb("dumbdb")
if dbstr:
print "Dumbdbm is available."
else:
print "Dumbdbm is not available."
db = dbhash.open("dbhash", "c")
db["1"] = "1"
db.close()
dbstr = whichdb.whichdb("dbhash")
if dbstr == "dbhash":
print "Dbhash is available."
else:
print "Dbhash is not available."
if bsddb is None:
dbstr = ""
else:
db = bsddb.hashopen("bsddb3", "c")
db["1"] = "1"
db.close()
dbstr = whichdb.whichdb("bsddb3")
if dbstr == "dbhash":
print "Bsddb[3] is available."
else:
print "Bsddb[3] is not available."
print
hammie = get_pathname_option("Storage", "persistent_storage_file")
use_dbm = options["Storage", "persistent_use_database"]
if not use_dbm:
print "Your storage %s is a: pickle" % (hammie,)
return
if not os.path.exists(hammie):
print "Your storage file does not exist yet."
return
db_type = whichdb.whichdb(hammie)
if db_type == "dbhash":
# could be dbhash or bsddb3
# only bsddb3 has a __version__ attribute - old bsddb module does not
if hasattr(bsddb, '__version__'):
try:
db = bsddb.hashopen(hammie, "r")
except bsddb.error:
pass
else:
db.close()
print "Your storage", hammie, "is a: bsddb[3]"
return
elif db_type is None:
print "Your storage %s is unreadable." % (hammie,)
print "Your storage %s is a: %s" % (hammie, db_type)
开发者ID:Xodarap,项目名称:Eipi,代码行数:59,代码来源:which_database.py
示例14: drive
def drive(num):
print options.display()
spamdirs = [get_pathname_option("TestDriver", "spam_directories") %
i for i in range(1, 4)]
hamdirs = [get_pathname_option("TestDriver", "ham_directories") %
i for i in range(1, 4)]
r = mislabeledfilemover.MislabeledFileMover(num)
r.random_move_file()
d = TestDriver.Driver()
d.new_classifier()
d.train(msgs.HamStream(hamdirs[0], [hamdirs[0]]),
msgs.SpamStream(spamdirs[0], [spamdirs[0]]))
d.train(msgs.HamStream(hamdirs[2], [hamdirs[2]]),
msgs.SpamStream(spamdirs[2], [spamdirs[2]]))
d.test(msgs.HamStream(hamdirs[1], [hamdirs[1]]),
msgs.SpamStream(spamdirs[1], [spamdirs[1]]))
guess = d.classifier.spamprob
polluted = []
for msg in msgs.HamStream(hamdirs[2], [hamdirs[2]]):
msg.prob = guess(msg)
polluted.append(msg)
for msg in msgs.SpamStream(spamdirs[2], [spamdirs[2]]):
msg.prob = guess(msg)
polluted.append(msg)
mislabeled = []
for fp in d.tester.false_positives():
mislabeled.append(fp)
for fn in d.tester.false_negatives():
mislabeled.append(fn)
for unsure in d.unsure:
mislabeled.append(unsure)
d.finishtest()
d.alldone()
data = v_correlation(polluted, mislabeled)
print "Percentage Overlap (Correlation): " + str(data)
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:46,代码来源:correlationtest.py
示例15: splice_set
def splice_set(n, dir_num=3):
destination = get_pathname_option("TestDriver", "spam_directories") % dir_num + "/"
dict_c = 1
for dictionary in listdir(destination):
print "Slicing dictionary", dict_c, "into", n, "parts"
splice(destination + dictionary, n)
remove(destination + dictionary)
dict_c += 1
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:8,代码来源:dictionarysplicer.py
示例16: main
def main():
ham = [get_pathname_option("TestDriver", "ham_directories") % i for i in range(1, 5)]
spam = [get_pathname_option("TestDriver", "spam_directories") % i for i in range(1, 5)]
t = TestDriver.Driver()
t.train(msgs.HamStream(ham[1], [ham[1]]), msgs.SpamStream(spam[1], [spam[1]]))
keep_going = True
trial_number = 0
while keep_going:
start_time = time.time()
if trial_number == 0:
t.test(msgs.HamStream(ham[0], [ham[0]]), msgs.SpamStream(spam[0], [spam[0]]), True)
else:
t.test(t.tester.truth_examples[1], t.tester.truth_examples[0])
end_time = time.time()
seconds = end_time - start_time
trial_number += 1
print "Test sizes: ", len(t.tester.truth_examples[0]), ", ", len(t.tester.truth_examples[1]), "\n"
print "Detection rate:", t.tester.correct_classification_rate(), "\n"
print "\nTime elapsed:", seconds, "seconds.\n"
answer = raw_input("Keep trying (y/n)? You have performed " + str(trial_number) + " trial(s) so far. ")
valid_input = False
while not valid_input:
if answer == "y":
valid_input = True
elif answer == "n":
sys.exit()
else:
answer = raw_input("Please enter either y or n. ")
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:36,代码来源:timer_test.py
示例17: __init__
def __init__(self, number):
self.NUMBER = number
self.ham_num = self.NUMBER
self.ham_source = get_pathname_option("TestDriver", "ham_directories") % 1 + "/"
self.ham_test = get_pathname_option("TestDriver", "ham_directories") % 2 + "/"
self.ham_destination = get_pathname_option("TestDriver", "ham_directories") % 3 + "/"
self.ham_source_files = listdir(self.ham_source)
self.ham_destination_files = listdir(self.ham_destination)
self.spam_num = 0
self.spam_source = get_pathname_option("TestDriver", "spam_directories") % 1 + "/"
self.spam_test = get_pathname_option("TestDriver", "spam_directories") % 2 + "/"
self.spam_destination = get_pathname_option("TestDriver", "spam_directories") % 3 + "/"
self.spam_source_files = listdir(self.spam_source)
self.spam_destination_files = listdir(self.spam_destination)
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:16,代码来源:benignfilemover.py
示例18: usage
elif opt == '-g':
good.append(arg)
elif opt == '-s':
spam.append(arg)
elif opt == "-r":
removetrained = True
elif opt == '-o':
options.set_from_cmdline(arg, sys.stderr)
pck, usedb = storage.database_type(opts)
if args:
usage(2, "Positional arguments not allowed")
if usedb == None:
# Use settings in configuration file.
usedb = options["Storage", "persistent_use_database"]
pck = get_pathname_option("Storage",
"persistent_storage_file")
h = hammie.open(pck, usedb, "c")
for g in good:
if loud:
print "Training ham (%s):" % g
train(h, g, False, force, trainnew, removetrained)
sys.stdout.flush()
save = True
for s in spam:
if loud:
print "Training spam (%s):" % s
train(h, s, True, force, trainnew, removetrained)
sys.stdout.flush()
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:32,代码来源:sb_mboxtrain.py
示例19: main
def main():
import os
import sys
from random import choice
sys.path.insert(-1, os.getcwd())
sys.path.insert(-1, os.path.dirname(os.getcwd()))
from spambayes import ActiveUnlearnDriver
from spambayes.Options import get_pathname_option
from spambayes import msgs
"""
from dictionarywriter import DictionaryWriter
"""
ham = [get_pathname_option("TestDriver", "ham_directories") % i for i in range(1, 5)]
spam = [get_pathname_option("TestDriver", "spam_directories") % i for i in range(1, 5)]
"""
DictionaryWriter(600).write()
"""
keep_going = True
trial_number = 1
au_v = ActiveUnlearnDriver.ActiveUnlearner([msgs.HamStream(ham[1], [ham[1]]),
msgs.HamStream(ham[2], [ham[2]])],
[msgs.SpamStream(spam[1], [spam[1]]),
msgs.SpamStream(spam[3], [spam[3]])],
msgs.HamStream(ham[0], [ham[0]]),
msgs.SpamStream(spam[0], [spam[0]]),
)
while keep_going:
msg = choice(au_v.driver.tester.train_examples[0])
try:
test_cl, counter = au_v.determine_cluster(msg)
test_size = test_cl.size
au_v.learn(test_cl)
except TypeError:
counter = 1
test_size = "100, but fail"
cluster_detection_rates_v = []
cluster_spam_rates_v = []
cluster_sizes = []
au_v.init_ground()
original_rate_v = au_v.driver.tester.correct_classification_rate()
cluster_size = 100
cluster_sizes.append(100)
print "Clustering with size", cluster_size, "..."
cl_v = ActiveUnlearnDriver.Cluster(msg, cluster_size, au_v, "extreme")
cluster_spam_rates_v.append(float(cl_v.target_spam()) / float(cluster_size))
cluster_detection_rates_v.append(au_v.start_detect_rate(cl_v))
for i in range(1, counter + 2):
cluster_size += 100
cluster_sizes.append(cluster_size)
print "Clustering with size", cluster_size, "..."
cluster_detection_rates_v.append(au_v.continue_detect_rate(cl_v, 100))
cluster_spam_rates_v.append(float(cl_v.target_spam()) / float(cluster_size))
with open("C:\Users\Alex\Desktop\det_cluster_stats_v" + str(trial_number) + ".txt", 'w') as outfile:
outfile.write("VANILLA MACHINE\n")
outfile.write("--------------------------\n")
outfile.write("Clustered around: " + msg.tag + "\n")
outfile.write("--------------------------\n")
outfile.write("Detection Rates:\n")
outfile.write(str(original_rate_v) + "\n")
for item in cluster_detection_rates_v:
outfile.write(str(item) + "\n")
outfile.write("--------------------------\n")
outfile.write("Spam Rate:\n")
for item in cluster_spam_rates_v:
outfile.write(str(item) + "\n")
outfile.write("Test Cluster Size:\n")
outfile.write(str(test_size))
answer = raw_input("Keep going (y/n)? You have performed " + str(trial_number) + " trials so far. ")
if answer == "n":
keep_going = False
else:
au_v.learn(cl_v)
au_v.init_ground()
trial_number += 1
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:100,代码来源:det_cluster_test.py
示例20: main
def main():
import os
import sys
import shutil
sys.path.insert(-1, os.getcwd())
sys.path.insert(-1, os.path.dirname(os.getcwd()))
from spambayes import ActiveUnlearnDriver
from spambayes.Options import get_pathname_option
from spambayes import msgs
import time
ham = [get_pathname_option("TestDriver", "ham_directories") % i for i in range(1, 5)]
spam = [get_pathname_option("TestDriver", "spam_directories") % i for i in range(1, 5)]
for i in range(1):
au = ActiveUnlearnDriver.ActiveUnlearnDriver([msgs.HamStream(ham[0], [ham[0]]),
msgs.HamStream(ham[2], [ham[2]]),
msgs.HamStream(ham[3], [ham[3]])],
[msgs.SpamStream(spam[0], [spam[0]]),
msgs.SpamStream(spam[2], [spam[2]]),
msgs.SpamStream(spam[3], [spam[3]])],
msgs.HamStream(ham[2], [ham[2]]),
msgs.SpamStream(spam[2], [spam[2]]),
"ac-extreme")
au.driver.test(msgs.HamStream(ham[0], [ham[0]]), msgs.SpamStream(spam[0], [spam[0]]))
au.driver.untrain(msgs.HamStream(ham[2], [ham[2]]), msgs.SpamStream(spam[2], [spam[2]]))
au.driver.untrain(msgs.HamStream(ham[3], [ham[3]]), msgs.SpamStream(spam[3], [spam[3]]))
au.driver.test(msgs.HamStream(ham[0], [ham[0]]), msgs.SpamStream(spam[0], [spam[0]]))
msg = au.driver.tester.test_examples[5]
shutil.copy(msg.tag, "C:\Users\Alex\Desktop\clustera")
print msg.prob
start_time = time.time()
cluster = (au.cluster(msg, 10))
end_time = time.time()
print cluster
clueslist = []
for clue in msg.clues:
clueslist.append((clue[0], clue[1]))
print clueslist
with open("C:\Users\Alex\Desktop\clustera\cluster7.txt", 'w') as outfile:
spamcounter = 0
for sim in cluster:
with open(sim.tag) as infile:
if sim.tag.endswith(".spam.txt"):
outfile.write("SPAMSPAMSPAMSPAMSPAM" + "\n\n")
if sim.tag.endswith(".ham.txt"):
outfile.write("HAMHAMHAMHAMHAM" + "\n\n")
outfile.write(infile.read())
outfile.write("\n\n" + "----------------------------------------" + "\n\n")
if sim.tag.endswith(".spam.txt"):
spamcounter += 1
print spamcounter
print end_time - start_time
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:65,代码来源:testest.py
注:本文中的spambayes.Options.get_pathname_option函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论