• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python Options.get_pathname_option函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中spambayes.Options.get_pathname_option函数的典型用法代码示例。如果您正苦于以下问题:Python get_pathname_option函数的具体用法?Python get_pathname_option怎么用?Python get_pathname_option使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了get_pathname_option函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: unlearn_compare

def unlearn_compare(nsets, unsets):
    print options.display()

    spamdirs = [get_pathname_option("TestDriver", "spam_directories") % i for i in range(1, nsets+1)]
    hamdirs = [get_pathname_option("TestDriver", "ham_directories") % i for i in range(1, nsets+1)]
    spamhamdirs = zip(spamdirs, hamdirs)
    unspamdirs = [get_pathname_option("TestDriver", "spam_directories") % i for i in range(1, unsets+1)]
    unhamdirs = [get_pathname_option("TestDriver", "ham_directories") % i for i in range(1, unsets+1)]
    unspamhamdirs = zip(unspamdirs, unhamdirs)

    d = TestDriver.Driver()
    d.new_classifier()
    """
    for spamdir, hamdir in spamhamdirs:
        d.train(msgs.HamStream(hamdir, [hamdir]),
                msgs.SpamStream(spamdir, [spamdir]))
    """
    d.train(msgs.HamStream(hamdirs[0], [hamdirs[0]]),
            msgs.SpamStream(spamdirs[0], [spamdirs[0]]))
    d.train(msgs.HamStream(hamdirs[1], [hamdirs[1]]),
            msgs.SpamStream(spamdirs[1], [spamdirs[1]]))
    d.test(msgs.HamStream(hamdirs[2], [hamdirs[2]]),
           msgs.SpamStream(spamdirs[2], [spamdirs[2]]))
    d.finishtest()
    d.alldone()

    unlearn_driver(d, spamhamdirs, unspamhamdirs)
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:27,代码来源:alex^2test.py


示例2: createWorkers

    def createWorkers(self):
        """Using the options that were initialised in __init__ and then
        possibly overridden by the driver code, create the Bayes object,
        the Corpuses, the Trainers and so on."""
        print "Loading database...",
        if self.isTest:
            self.useDB = "pickle"
            self.DBName = '_pop3proxy_test.pickle'   # This is never saved.
        if not hasattr(self, "DBName"):
            self.DBName, self.useDB = storage.database_type([])
        self.bayes = storage.open_storage(self.DBName, self.useDB)
        
        self.buildStatusStrings()

        # Don't set up the caches and training objects when running the self-test,
        # so as not to clutter the filesystem.
        if not self.isTest:
            def ensureDir(dirname):
                try:
                    os.mkdir(dirname)
                except OSError, e:
                    if e.errno != errno.EEXIST:
                        raise

            # Create/open the Corpuses.  Use small cache sizes to avoid hogging
            # lots of memory.
            sc = get_pathname_option("Storage", "spam_cache")
            hc = get_pathname_option("Storage", "ham_cache")
            uc = get_pathname_option("Storage", "unknown_cache")
            map(ensureDir, [sc, hc, uc])
            if self.gzipCache:
                factory = GzipFileMessageFactory()
            else:
                factory = FileMessageFactory()
            age = options["Storage", "cache_expiry_days"]*24*60*60
            self.spamCorpus = ExpiryFileCorpus(age, factory, sc,
                                               '[0123456789\-]*',
                                               cacheSize=20)
            self.hamCorpus = ExpiryFileCorpus(age, factory, hc,
                                              '[0123456789\-]*',
                                              cacheSize=20)
            self.unknownCorpus = ExpiryFileCorpus(age, factory, uc,
                                                  '[0123456789\-]*',
                                                  cacheSize=20)

            # Given that (hopefully) users will get to the stage
            # where they do not need to do any more regular training to
            # be satisfied with spambayes' performance, we expire old
            # messages from not only the trained corpora, but the unknown
            # as well.
            self.spamCorpus.removeExpiredMessages()
            self.hamCorpus.removeExpiredMessages()
            self.unknownCorpus.removeExpiredMessages()

            # Create the Trainers.
            self.spamTrainer = storage.SpamTrainer(self.bayes)
            self.hamTrainer = storage.HamTrainer(self.bayes)
            self.spamCorpus.addObserver(self.spamTrainer)
            self.hamCorpus.addObserver(self.hamTrainer)
开发者ID:Xodarap,项目名称:Eipi,代码行数:59,代码来源:sb_server.py


示例3: drive

def drive():
    print options.display()

    spam = [get_pathname_option("TestDriver", "spam_directories") % i for i in range(1, 5)]
    ham = [get_pathname_option("TestDriver", "ham_directories") % i for i in range(1, 5)]

    d = dictionarywriter.DictionaryWriter(150, 4)
    d.write()

    keep_going = True
    trial_number = 1

    au = ActiveUnlearnDriver.ActiveUnlearner([msgs.HamStream(ham[1], [ham[1]]),
                                              msgs.HamStream(ham[2], [ham[2]])],
                                             [msgs.SpamStream(spam[1], [spam[1]]),
                                              msgs.SpamStream(spam[3], [spam[3]])],
                                             msgs.HamStream(ham[0], [ham[0]]),
                                             msgs.SpamStream(spam[0], [spam[0]]),
                                             )
    with open("C:\Users\Alex\Desktop\dict_correlation_stats.txt", 'w') as outfile:

        while keep_going:
            chosen = set()
            current = au.select_initial()
            cluster = au.determine_cluster(current)
            chosen.add(current)
            au.driver.test(au.testing_ham, au.testing_spam)

            while not cluster:
                current = au.select_initial(chosen)
                cluster = au.determine_cluster(current)
                chosen.add(current)
                au.driver.test(au.testing_ham, au.testing_spam)

            cluster_list = list(cluster.cluster_set)

            dicts = au.driver.tester.train_examples[2]

            data = v_correlation(cluster_list, dicts)

            outfile.write("Trial " + str(trial_number) + " Percentage Overlap (Correlation): " + str(data))
            answer = raw_input("Keep going (y/n)? You have performed " + str(trial_number) + " trial(s) so far. ")

            valid_input = False

            while not valid_input:
                if answer == "n":
                    keep_going = False
                    valid_input = True

                elif answer == "y":
                    au.learn(cluster)
                    au.init_ground()
                    trial_number += 1
                    valid_input = True

                else:
                    print "Please enter either y or n."
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:58,代码来源:dict_correlation_test.py


示例4: __init__

    def __init__(self, spam_feature=None, ham_feature=None, inject_type=0):

        self.h_injected = get_pathname_option("TestDriver", "ham_directories") % 3 + "/"
        self.s_injected = get_pathname_option("TestDriver", "spam_directories") % 3 + "/"

        if inject_type is 0:
            self.feature = spam_feature
        elif inject_type is 1:
            self.feature = ham_feature
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:9,代码来源:InjectionPollution.py


示例5: main

def main():
    ham = [get_pathname_option("TestDriver", "ham_directories") % i for i in range(1, 5)]
    spam = [get_pathname_option("TestDriver", "spam_directories") % i for i in range(1, 5)]

    t = TestDriver.Driver()
    t.train(msgs.HamStream(ham[0], [ham[0]]), msgs.SpamStream(spam[0], [spam[0]]))
    t.dict_test(msgs.HamStream(ham[2], [ham[2]]), msgs.SpamStream(spam[3], [spam[3]]))
    print "Test sizes: ", len(t.tester.truth_examples[0]), ", ", len(t.tester.truth_examples[1]), "\n"
    print "Detection rate:", t.tester.correct_classification_rate(), "\n"
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:9,代码来源:dict_mem_test.py


示例6: main

def main():

    ham = [get_pathname_option("TestDriver", "ham_directories") % i for i in range(1, 4)]
    spam = [get_pathname_option("TestDriver", "spam_directories") % i for i in range(1, 4)]

    sizes = [0, 60, 120, 240, 480, 840, 1200, 2400, 3600, 4800, 6000]

    d = TestDriver.Driver()
    d.new_classifier()

    detection_rates = []
    target_rates    = []
    false_positives = []
    false_negatives = []
    unsures         = []

    for size in sizes:

        mislabeler = MislabeledFileMover(size)
        mislabeler.random_move_file()

        d.train(msgs.HamStream(ham[0], [ham[0]]),
            msgs.SpamStream(spam[0], [spam[0]]))
        d.test(msgs.HamStream(ham[1], [ham[1]]),
               msgs.SpamStream(spam[1], [spam[1]]))

        target_rate = d.tester.correct_classification_rate()
        target_rates.append(target_rate)

        d.train(msgs.HamStream(ham[2], [ham[2]]),
                msgs.SpamStream(spam[2], [spam[2]]))
        d.test(msgs.HamStream(ham[1], [ham[1]]),
               msgs.SpamStream(spam[1], [spam[1]]))

        detection_rate = d.tester.correct_classification_rate()
        detection_rates.append(detection_rate)

        fp = d.tester.nham_wrong
        false_positives.append(fp)
        fn = d.tester.nspam_wrong
        false_negatives.append(fn)
        unsure = d.tester.nham_unsure + d.tester.nspam_unsure
        unsures.append(unsure)

        d.untrain(msgs.HamStream(ham[0], [ham[0]]),
                  msgs.SpamStream(spam[0], [spam[0]]))
        d.untrain(msgs.HamStream(ham[2], [ham[2]]),
                  msgs.SpamStream(spam[2], [spam[2]]))

        mislabeler.reset()

    with open("/Users/AlexYang/Desktop/hamasspam.txt", 'w') as outfile:

        outfile.write(tabulate({"# of Mislabeled Words": sizes,
                                "Detection Rates": detection_rates,
                                "Target Rates": target_rates},
                               headers="keys", tablefmt="plain"))
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:57,代码来源:vanillatest.py


示例7: create_workers

    def create_workers(self):
        """Using the options that were initialised in __init__ and then
        possibly overridden by the driver code, create the Bayes object,
        the Corpuses, the Trainers and so on."""
        if self.is_test:
            self.use_db = "pickle"
            self.db_name = '_core_server.pickle'   # This is never saved.
        if not hasattr(self, "db_name"):
            self.db_name, self.use_db = storage.database_type([])
        self.bayes = storage.open_storage(self.db_name, self.use_db)

        # Load stats manager.
        self.stats = Stats.Stats(options,
                                 spambayes.message.Message().message_info_db)

        self.build_status_strings()

        # Don't set up the caches and training objects when running the
        # self-test, so as not to clutter the filesystem.
        if not self.is_test:
            # Create/open the Corpuses.  Use small cache sizes to avoid
            # hogging lots of memory.
            sc = get_pathname_option("Storage", "core_spam_cache")
            hc = get_pathname_option("Storage", "core_ham_cache")
            uc = get_pathname_option("Storage", "core_unknown_cache")
            for d in [sc, hc, uc]:
                storage.ensureDir(d)
            if self.gzip_cache:
                factory = GzipFileMessageFactory()
            else:
                factory = FileMessageFactory()
            age = options["Storage", "cache_expiry_days"]*24*60*60
            self.spamCorpus = ExpiryFileCorpus(age, factory, sc,
                                               '[0123456789\-]*',
                                               cacheSize=20)
            self.hamCorpus = ExpiryFileCorpus(age, factory, hc,
                                              '[0123456789\-]*',
                                              cacheSize=20)
            self.unknownCorpus = ExpiryFileCorpus(age, factory, uc,
                                                  '[0123456789\-]*',
                                                  cacheSize=20)

            # Given that (hopefully) users will get to the stage
            # where they do not need to do any more regular training to
            # be satisfied with spambayes' performance, we expire old
            # messages from not only the trained corpora, but the unknown
            # as well.
            self.spamCorpus.removeExpiredMessages()
            self.hamCorpus.removeExpiredMessages()
            self.unknownCorpus.removeExpiredMessages()

            # Create the Trainers.
            self.spam_trainer = storage.SpamTrainer(self.bayes)
            self.ham_trainer = storage.HamTrainer(self.bayes)
            self.spamCorpus.addObserver(self.spam_trainer)
            self.hamCorpus.addObserver(self.ham_trainer)
开发者ID:dbrandt,项目名称:spambayes-lite,代码行数:56,代码来源:CoreUI.py


示例8: drive

def drive(nsets):
    print options.display()

    spamdirs = [get_pathname_option("TestDriver", "spam_directories") % i for i in range(1, nsets+1)]
    hamdirs = [get_pathname_option("TestDriver", "ham_directories") % i for i in range(1, nsets+1)]

    d = TestDriver.Driver()
    d.new_classifier()
    d.train(msgs.HamStream(hamdirs[0], [hamdirs[0]]), msgs.SpamStream(spamdirs[0], [spamdirs[0]]))
    d.test(msgs.HamStream(hamdirs[1], [hamdirs[1]]), msgs.SpamStream(spamdirs[1], [spamdirs[1]]))
    d.finishtest()
    d.alldone()
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:12,代码来源:alex^2test.py


示例9: drive

def drive(nsets, decision):
    print options.display()

    spamdirs = [get_pathname_option("TestDriver", "spam_directories") % i for i in range(1, nsets + 1)]
    hamdirs = [get_pathname_option("TestDriver", "ham_directories") % i for i in range(1, nsets + 1)]

    spamfns = [(x, y, 1) for x in spamdirs for y in os.listdir(x)]
    hamfns = [(x, y, 0) for x in hamdirs for y in os.listdir(x)]

    nham = len(hamfns)
    nspam = len(spamfns)
    cc = CostCounter.nodelay()

    allfns = {}
    for fn in spamfns + hamfns:
        allfns[fn] = None

    d = hammie.open("weaktest.db", False)

    hamtrain = 0
    spamtrain = 0
    n = 0
    for dir, name, is_spam in allfns.iterkeys():
        n += 1
        m = msgs.Msg(dir, name).guts
        if debug > 1:
            print "trained:%dH+%dS" % (hamtrain, spamtrain)
        scr = d.score(m)
        if debug > 1:
            print "score:%.3f" % scr
        if not decision.tooearly():
            if is_spam:
                if debug > 0:
                    print "Spam with score %.2f" % scr
                cc.spam(scr)
            else:
                if debug > 0:
                    print "Ham with score %.2f" % scr
                cc.ham(scr)
        de = decision(scr, is_spam)
        if de == TRAIN_AS_SPAM:
            d.train_spam(m)
            spamtrain += 1
        elif de == TRAIN_AS_HAM:
            d.train_ham(m)
            hamtrain += 1
        if n % 100 == 0:
            print "%5d trained:%dH+%dS wrds:%d" % (n, hamtrain, spamtrain, len(d.bayes.wordinfo))
            print cc
    print "=" * 70
    print "%5d trained:%dH+%dS wrds:%d" % (n, hamtrain, spamtrain, len(d.bayes.wordinfo))
    print cc
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:52,代码来源:weaktest.py


示例10: drive

def drive(nsets):
    print options.display()

    hamdirs  = [get_pathname_option("TestDriver", "ham_directories") % \
                i for i in range(1, nsets+1)]
    spamdirs = [get_pathname_option("TestDriver", "spam_directories") % \
                i for i in range(1, nsets+1)]

    d = TestDriver.Driver()
    # Train it on all sets except the first.
    d.train(msgs.HamStream("%s-%d" % (hamdirs[1], nsets),
                            hamdirs[1:], train=1),
            msgs.SpamStream("%s-%d" % (spamdirs[1], nsets),
                            spamdirs[1:], train=1))

    # Now run nsets times, predicting pair i against all except pair i.
    for i in range(nsets):
        h = hamdirs[i]
        s = spamdirs[i]
        hamstream = msgs.HamStream(h, [h], train=0)
        spamstream = msgs.SpamStream(s, [s], train=0)

        if i > 0:
            if options["CV Driver", "build_each_classifier_from_scratch"]:
                # Build a new classifier from the other sets.
                d.new_classifier()

                hname = "%s-%d, except %d" % (hamdirs[0], nsets, i+1)
                h2 = hamdirs[:]
                del h2[i]

                sname = "%s-%d, except %d" % (spamdirs[0], nsets, i+1)
                s2 = spamdirs[:]
                del s2[i]

                d.train(msgs.HamStream(hname, h2, train=1),
                        msgs.SpamStream(sname, s2, train=1))

            else:
                # Forget this set.
                d.untrain(hamstream, spamstream)

        # Predict this set.
        d.test(hamstream, spamstream)
        d.finishtest()

        if i < nsets - 1 and not options["CV Driver",
                                         "build_each_classifier_from_scratch"]:
            # Add this set back in.
            d.train(hamstream, spamstream)

    d.alldone()
开发者ID:ehuelsmann,项目名称:spambayes,代码行数:52,代码来源:timcv.py


示例11: main

def main():

    ham = [get_pathname_option("TestDriver", "ham_directories") % i for i in range(1, 4)]
    spam = [get_pathname_option("TestDriver", "spam_directories") % i for i in range(1, 4)]
    injected = get_pathname_option("TestDriver", "spam_directories") % 3

    au = ActiveUnlearnDriver.ActiveUnlearner([msgs.HamStream(ham[0], [ham[0]]), msgs.HamStream(ham[2], [ham[2]])],
                                             [msgs.SpamStream(spam[0], [spam[0]]), msgs.SpamStream(spam[2], [spam[2]])],
                                             msgs.HamStream(ham[1], [ham[1]]), msgs.SpamStream(spam[1], [spam[1]]))

    msg = choice(au.driver.tester.train_examples[2])    # Randomly chosen from Ham Set3

    original_rate = au.driver.tester.correct_classification_rate()
    cluster_sizes = []
    detection_rates = []
    target_cluster_rates = []

    sizes = []
    for i in range(150, 1050, 50):
        sizes.append(i)
    for i in range(1000, 15000, 1000):
        sizes.append(i)

    for size in sizes:
        cluster = ActiveUnlearnDriver.Cluster(msg, size, au, "extreme")
        print "Clustering with size " + str(cluster.size) + "..."
        cluster_sizes.append(size)
        detection_rates.append(au.detect_rate(cluster))
        target_cluster_rates.append(float(cluster.target_set3()) / float(cluster.size))

    file = open("/Users/AlexYang/Desktop/clues.txt", 'w')

    features = au.driver.classifier._getclues(msg)
    i = 1
    for feature in features:
        file.write(str(i) + ") ")
        file.write(str(feature) + "\n")
        i += 1

    with open("/Users/AlexYang/Desktop/clusterstats.txt", 'w') as outfile:

        outfile.write("Clustered around: " + msg.tag)
        outfile.write("\nOriginal Rate: " + str(original_rate) + "\n")

        outfile.write(tabulate({"Cluster Sizes": cluster_sizes,
                                "Detection Rates": detection_rates,
                                "% of Targets Clustered": target_cluster_rates},
                               headers="keys", tablefmt="plain"))
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:48,代码来源:clustest.py


示例12: database_type

def database_type(opts):
    """Return the name of the database and the type to use.  The output of
    this function can be used as the db_type parameter for the open_storage
    function, for example:

        [standard getopts code]
        db_name, db_type = database_types(opts)
        storage = open_storage(db_name, db_type)

    The selection is made based on the options passed, or, if the
    appropriate options are not present, the options in the global
    options object.

    Currently supports:
       -p  :  pickle
       -d  :  dbm
    """
    nm, typ = None, None
    for opt, arg in opts:
        if _storage_options.has_key(opt):
            if nm is None and typ is None:
                nm, typ = arg, _storage_options[opt]
            else:
                raise MutuallyExclusiveError()
    if nm is None and typ is None:
        typ = options["Storage", "persistent_use_database"]
        if typ is True or typ == "True":
            typ = "dbm"
        elif typ is False or typ == "False":
            typ = "pickle"
        nm = get_pathname_option("Storage", "persistent_storage_file")
    return nm, typ
开发者ID:Xodarap,项目名称:Eipi,代码行数:32,代码来源:storage.py


示例13: main

def main():
    print "Pickle is available."
    db = dumbdbm.open("dumbdb", "c")
    db["1"] = "1"
    db.close()
    dbstr = whichdb.whichdb("dumbdb")
    if dbstr:
        print "Dumbdbm is available."
    else:
        print "Dumbdbm is not available."

    db = dbhash.open("dbhash", "c")
    db["1"] = "1"
    db.close()
    dbstr = whichdb.whichdb("dbhash")
    if dbstr == "dbhash":
        print "Dbhash is available."
    else:
        print "Dbhash is not available."

    if bsddb is None:
        dbstr = ""
    else:
        db = bsddb.hashopen("bsddb3", "c")
        db["1"] = "1"
        db.close()
        dbstr = whichdb.whichdb("bsddb3")
    if dbstr == "dbhash":
        print "Bsddb[3] is available."
    else:
        print "Bsddb[3] is not available."

    print

    hammie = get_pathname_option("Storage", "persistent_storage_file")
    use_dbm = options["Storage", "persistent_use_database"]
    if not use_dbm:
        print "Your storage %s is a: pickle" % (hammie,)
        return

    if not os.path.exists(hammie):
        print "Your storage file does not exist yet."
        return
    db_type = whichdb.whichdb(hammie)
    if db_type == "dbhash":
        # could be dbhash or bsddb3
        # only bsddb3 has a __version__ attribute - old bsddb module does not
        if hasattr(bsddb, '__version__'):
            try:
                db = bsddb.hashopen(hammie, "r")
            except bsddb.error:
                pass
            else:
                db.close()
                print "Your storage", hammie, "is a: bsddb[3]"
                return
    elif db_type is None:
        print "Your storage %s is unreadable." % (hammie,)
    print "Your storage %s is a: %s" % (hammie, db_type)
开发者ID:Xodarap,项目名称:Eipi,代码行数:59,代码来源:which_database.py


示例14: drive

def drive(num):
    print options.display()

    spamdirs = [get_pathname_option("TestDriver", "spam_directories") %
                i for i in range(1, 4)]
    hamdirs = [get_pathname_option("TestDriver", "ham_directories") %
               i for i in range(1, 4)]

    r = mislabeledfilemover.MislabeledFileMover(num)
    r.random_move_file()

    d = TestDriver.Driver()
    d.new_classifier()
    d.train(msgs.HamStream(hamdirs[0], [hamdirs[0]]),
            msgs.SpamStream(spamdirs[0], [spamdirs[0]]))
    d.train(msgs.HamStream(hamdirs[2], [hamdirs[2]]),
            msgs.SpamStream(spamdirs[2], [spamdirs[2]]))
    d.test(msgs.HamStream(hamdirs[1], [hamdirs[1]]),
           msgs.SpamStream(spamdirs[1], [spamdirs[1]]))

    guess = d.classifier.spamprob
    polluted = []
    for msg in msgs.HamStream(hamdirs[2], [hamdirs[2]]):
        msg.prob = guess(msg)
        polluted.append(msg)

    for msg in msgs.SpamStream(spamdirs[2], [spamdirs[2]]):
        msg.prob = guess(msg)
        polluted.append(msg)

    mislabeled = []
    for fp in d.tester.false_positives():
        mislabeled.append(fp)

    for fn in d.tester.false_negatives():
        mislabeled.append(fn)

    for unsure in d.unsure:
        mislabeled.append(unsure)

    d.finishtest()
    d.alldone()

    data = v_correlation(polluted, mislabeled)

    print "Percentage Overlap (Correlation): " + str(data)
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:46,代码来源:correlationtest.py


示例15: splice_set

def splice_set(n, dir_num=3):
    destination = get_pathname_option("TestDriver", "spam_directories") % dir_num + "/"
    dict_c = 1
    for dictionary in listdir(destination):
        print "Slicing dictionary", dict_c, "into", n, "parts"
        splice(destination + dictionary, n)
        remove(destination + dictionary)
        dict_c += 1
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:8,代码来源:dictionarysplicer.py


示例16: main

def main():
    ham = [get_pathname_option("TestDriver", "ham_directories") % i for i in range(1, 5)]
    spam = [get_pathname_option("TestDriver", "spam_directories") % i for i in range(1, 5)]

    t = TestDriver.Driver()
    t.train(msgs.HamStream(ham[1], [ham[1]]), msgs.SpamStream(spam[1], [spam[1]]))

    keep_going = True
    trial_number = 0

    while keep_going:
        start_time = time.time()
        if trial_number == 0:
            t.test(msgs.HamStream(ham[0], [ham[0]]), msgs.SpamStream(spam[0], [spam[0]]), True)

        else:
            t.test(t.tester.truth_examples[1], t.tester.truth_examples[0])
        end_time = time.time()
        seconds = end_time - start_time

        trial_number += 1
        print "Test sizes: ", len(t.tester.truth_examples[0]), ", ", len(t.tester.truth_examples[1]), "\n"
        print "Detection rate:", t.tester.correct_classification_rate(), "\n"
        print "\nTime elapsed:", seconds, "seconds.\n"
        answer = raw_input("Keep trying (y/n)? You have performed " + str(trial_number) + " trial(s) so far. ")

        valid_input = False
        while not valid_input:
            if answer == "y":
                valid_input = True

            elif answer == "n":
                sys.exit()

            else:
                answer = raw_input("Please enter either y or n. ")
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:36,代码来源:timer_test.py


示例17: __init__

    def __init__(self, number):
        self.NUMBER = number

        self.ham_num = self.NUMBER
        self.ham_source = get_pathname_option("TestDriver", "ham_directories") % 1 + "/"
        self.ham_test = get_pathname_option("TestDriver", "ham_directories") % 2 + "/"
        self.ham_destination = get_pathname_option("TestDriver", "ham_directories") % 3 + "/"
        self.ham_source_files = listdir(self.ham_source)
        self.ham_destination_files = listdir(self.ham_destination)

        self.spam_num = 0
        self.spam_source = get_pathname_option("TestDriver", "spam_directories") % 1 + "/"
        self.spam_test = get_pathname_option("TestDriver", "spam_directories") % 2 + "/"
        self.spam_destination = get_pathname_option("TestDriver", "spam_directories") % 3 + "/"
        self.spam_source_files = listdir(self.spam_source)
        self.spam_destination_files = listdir(self.spam_destination)
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:16,代码来源:benignfilemover.py


示例18: usage

        elif opt == '-g':
            good.append(arg)
        elif opt == '-s':
            spam.append(arg)
        elif opt == "-r":
            removetrained = True
        elif opt == '-o':
            options.set_from_cmdline(arg, sys.stderr)
    pck, usedb = storage.database_type(opts)
    if args:
        usage(2, "Positional arguments not allowed")

    if usedb == None:
        # Use settings in configuration file.
        usedb = options["Storage", "persistent_use_database"]
        pck = get_pathname_option("Storage",
                                          "persistent_storage_file")

    h = hammie.open(pck, usedb, "c")

    for g in good:
        if loud:
            print "Training ham (%s):" % g
        train(h, g, False, force, trainnew, removetrained)
        sys.stdout.flush()
        save = True

    for s in spam:
        if loud:
            print "Training spam (%s):" % s
        train(h, s, True, force, trainnew, removetrained)
        sys.stdout.flush()
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:32,代码来源:sb_mboxtrain.py


示例19: main

def main():
    import os
    import sys
    from random import choice

    sys.path.insert(-1, os.getcwd())
    sys.path.insert(-1, os.path.dirname(os.getcwd()))

    from spambayes import ActiveUnlearnDriver
    from spambayes.Options import get_pathname_option
    from spambayes import msgs

    """
    from dictionarywriter import DictionaryWriter
    """

    ham = [get_pathname_option("TestDriver", "ham_directories") % i for i in range(1, 5)]
    spam = [get_pathname_option("TestDriver", "spam_directories") % i for i in range(1, 5)]
    """
    DictionaryWriter(600).write()
    """

    keep_going = True
    trial_number = 1

    au_v = ActiveUnlearnDriver.ActiveUnlearner([msgs.HamStream(ham[1], [ham[1]]),
                                                msgs.HamStream(ham[2], [ham[2]])],
                                               [msgs.SpamStream(spam[1], [spam[1]]),
                                                msgs.SpamStream(spam[3], [spam[3]])],
                                               msgs.HamStream(ham[0], [ham[0]]),
                                               msgs.SpamStream(spam[0], [spam[0]]),
                                               )
    while keep_going:
        msg = choice(au_v.driver.tester.train_examples[0])
        try:
            test_cl, counter = au_v.determine_cluster(msg)
            test_size = test_cl.size
            au_v.learn(test_cl)

        except TypeError:
            counter = 1
            test_size = "100, but fail"

        cluster_detection_rates_v = []
        cluster_spam_rates_v = []
        cluster_sizes = []

        au_v.init_ground()
        original_rate_v = au_v.driver.tester.correct_classification_rate()
        cluster_size = 100
        cluster_sizes.append(100)

        print "Clustering with size", cluster_size, "..."

        cl_v = ActiveUnlearnDriver.Cluster(msg, cluster_size, au_v, "extreme")
        cluster_spam_rates_v.append(float(cl_v.target_spam()) / float(cluster_size))
        cluster_detection_rates_v.append(au_v.start_detect_rate(cl_v))

        for i in range(1, counter + 2):
            cluster_size += 100
            cluster_sizes.append(cluster_size)

            print "Clustering with size", cluster_size, "..."

            cluster_detection_rates_v.append(au_v.continue_detect_rate(cl_v, 100))
            cluster_spam_rates_v.append(float(cl_v.target_spam()) / float(cluster_size))

        with open("C:\Users\Alex\Desktop\det_cluster_stats_v" + str(trial_number) + ".txt", 'w') as outfile:
            outfile.write("VANILLA MACHINE\n")

            outfile.write("--------------------------\n")

            outfile.write("Clustered around: " + msg.tag + "\n")

            outfile.write("--------------------------\n")

            outfile.write("Detection Rates:\n")
            outfile.write(str(original_rate_v) + "\n")

            for item in cluster_detection_rates_v:
                outfile.write(str(item) + "\n")

            outfile.write("--------------------------\n")

            outfile.write("Spam Rate:\n")
            for item in cluster_spam_rates_v:
                outfile.write(str(item) + "\n")

            outfile.write("Test Cluster Size:\n")
            outfile.write(str(test_size))

        answer = raw_input("Keep going (y/n)? You have performed " + str(trial_number) + " trials so far. ")

        if answer == "n":
            keep_going = False

        else:
            au_v.learn(cl_v)
            au_v.init_ground()
            trial_number += 1
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:100,代码来源:det_cluster_test.py


示例20: main

def main():
    import os
    import sys
    import shutil

    sys.path.insert(-1, os.getcwd())
    sys.path.insert(-1, os.path.dirname(os.getcwd()))

    from spambayes import ActiveUnlearnDriver
    from spambayes.Options import get_pathname_option
    from spambayes import msgs
    import time

    ham = [get_pathname_option("TestDriver", "ham_directories") % i for i in range(1, 5)]
    spam = [get_pathname_option("TestDriver", "spam_directories") % i for i in range(1, 5)]

    for i in range(1):
        au = ActiveUnlearnDriver.ActiveUnlearnDriver([msgs.HamStream(ham[0], [ham[0]]),
                                                      msgs.HamStream(ham[2], [ham[2]]),
                                                      msgs.HamStream(ham[3], [ham[3]])],
                                                     [msgs.SpamStream(spam[0], [spam[0]]),
                                                      msgs.SpamStream(spam[2], [spam[2]]),
                                                      msgs.SpamStream(spam[3], [spam[3]])],
                                                     msgs.HamStream(ham[2], [ham[2]]),
                                                     msgs.SpamStream(spam[2], [spam[2]]),
                                                     "ac-extreme")


        au.driver.test(msgs.HamStream(ham[0], [ham[0]]), msgs.SpamStream(spam[0], [spam[0]]))
        au.driver.untrain(msgs.HamStream(ham[2], [ham[2]]), msgs.SpamStream(spam[2], [spam[2]]))
        au.driver.untrain(msgs.HamStream(ham[3], [ham[3]]), msgs.SpamStream(spam[3], [spam[3]]))
        au.driver.test(msgs.HamStream(ham[0], [ham[0]]), msgs.SpamStream(spam[0], [spam[0]]))
        msg = au.driver.tester.test_examples[5]

        shutil.copy(msg.tag, "C:\Users\Alex\Desktop\clustera")
        print msg.prob

        start_time = time.time()
        cluster = (au.cluster(msg, 10))
        end_time = time.time()
        print cluster

        clueslist = []
        for clue in msg.clues:
            clueslist.append((clue[0], clue[1]))
        print clueslist

        with open("C:\Users\Alex\Desktop\clustera\cluster7.txt", 'w') as outfile:
            spamcounter = 0
            for sim in cluster:
                with open(sim.tag) as infile:
                    if sim.tag.endswith(".spam.txt"):
                        outfile.write("SPAMSPAMSPAMSPAMSPAM" + "\n\n")
                    if sim.tag.endswith(".ham.txt"):
                        outfile.write("HAMHAMHAMHAMHAM" + "\n\n")

                    outfile.write(infile.read())
                    outfile.write("\n\n" + "----------------------------------------" + "\n\n")

                if sim.tag.endswith(".spam.txt"):
                    spamcounter += 1

            print spamcounter

        print end_time - start_time
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:65,代码来源:testest.py



注:本文中的spambayes.Options.get_pathname_option函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python options.set_from_cmdline函数代码示例发布时间:2022-05-27
下一篇:
Python Options._函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap