本文整理汇总了Python中spambayes.mboxutils.getmbox函数的典型用法代码示例。如果您正苦于以下问题:Python getmbox函数的具体用法?Python getmbox怎么用?Python getmbox使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了getmbox函数的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: score
def score(h, msgs, reverse=0):
"""Score (judge) all messages from a mailbox."""
# XXX The reporting needs work!
mbox = mboxutils.getmbox(msgs)
i = 0
spams = hams = unsures = 0
for msg in mbox:
i += 1
prob, clues = h.score(msg, True)
if hasattr(msg, '_mh_msgno'):
msgno = msg._mh_msgno
else:
msgno = i
isspam = (prob >= SPAM_THRESHOLD)
isham = (prob <= HAM_THRESHOLD)
if isspam:
spams += 1
if not reverse:
print "%6s %4.2f %1s" % (msgno, prob, isspam and "S" or "."),
print h.formatclues(clues)
elif isham:
hams += 1
if reverse:
print "%6s %4.2f %1s" % (msgno, prob, isham and "S" or "."),
print h.formatclues(clues)
else:
unsures += 1
print "%6s %4.2f U" % (msgno, prob),
print h.formatclues(clues)
return (spams, hams, unsures)
开发者ID:bloggse,项目名称:spambayes-lite,代码行数:30,代码来源:hammiebulk.py
示例2: cull
def cull(mbox_name, cullext, designation, tdict):
print "writing new %s mbox..." % designation
n = m = 0
if cullext:
culled_mbox = file(mbox_name + cullext, "w")
for msg in mboxutils.getmbox(mbox_name):
m += 1
if msg["message-id"] in tdict:
if cullext:
culled_mbox.write(str(msg))
n += 1
elif not cullext:
response = msg.imap_server.uid(
"STORE", msg.uid, "+FLAGS.SILENT", "(\\Deleted \\Seen)")
command = "set %s to be deleted and seen" % (msg.uid,)
msg.imap_server.check_response(command, response)
sys.stdout.write("\r%5d of %5d" % (n, m))
sys.stdout.flush()
sys.stdout.write("\n")
if cullext:
culled_mbox.close()
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:25,代码来源:tte.py
示例3: mapmessages
def mapmessages(f, mboxtype, mapdb):
i = 0
for msg in getmbox(f):
i += 1
sys.stdout.write('\r%s: %d' % (f, i))
sys.stdout.flush()
msgid = msg.get("message-id")
if msgid is None:
continue
for t in tokenize(msg):
ham, spam = mapdb.get(t, ({}, {}))
if mboxtype == "ham":
msgids = ham.get(f, set())
msgids.add(msgid)
ham[f] = msgids
else:
msgids = spam.get(f, set())
msgids.add(msgid)
spam[f] = msgids
mapdb[t] = (ham, spam)
if options["Classifier", "x-use_bigrams"]:
for t in Classifier()._enhance_wordstream(tokenize(msg)):
ham, spam = mapdb.get(t, ({}, {}))
if mboxtype == "ham":
msgids = ham.get(f, set())
msgids.add(msgid)
ham[f] = msgids
else:
msgids = spam.get(f, set())
msgids.add(msgid)
spam[f] = msgids
mapdb[t] = (ham, spam)
sys.stdout.write("\n")
开发者ID:ehuelsmann,项目名称:spambayes,代码行数:33,代码来源:mkreversemap.py
示例4: extractmessages
def extractmessages(features, mapdb, hamfile, spamfile):
"""extract messages which contain given features"""
hamids = {}
spamids = {}
for feature in features:
ham, spam = mapdb.get(feature, ([], []))
if hamfile is not None:
for mbox in ham:
msgids = hamids.get(mbox, set())
msgids.update(ham.get(mbox, set()))
hamids[mbox] = msgids
if spamfile is not None:
for mbox in spam:
msgids = spamids.get(mbox, set())
msgids.update(spam.get(mbox, set()))
spamids[mbox] = msgids
# now run through each mailbox in hamids and spamids and print
# matching messages to relevant ham or spam files
for mailfile in hamids:
i = 0
msgids = hamids[mailfile]
for msg in getmbox(mailfile):
if msg.get("message-id") in msgids:
i += 1
sys.stdout.write('\r%s: %5d' % (mailfile, i))
sys.stdout.flush()
print >> hamfile, msg
print
for mailfile in spamids:
i = 0
msgids = spamids[mailfile]
for msg in getmbox(mailfile):
if msg.get("message-id") in msgids:
i += 1
sys.stdout.write('\r%s: %5d' % (mailfile, i))
sys.stdout.flush()
print >> spamfile, msg
print
开发者ID:ehuelsmann,项目名称:spambayes,代码行数:41,代码来源:extractmessages.py
示例5: main
def main():
h = HammieFilter()
actions = []
opts, args = getopt.getopt(sys.argv[1:], 'hxd:p:nfgstGSo:',
['help', 'examples', 'option='])
create_newdb = False
for opt, arg in opts:
if opt in ('-h', '--help'):
usage(0)
elif opt in ('-x', '--examples'):
examples()
elif opt in ('-o', '--option'):
Options.options.set_from_cmdline(arg, sys.stderr)
elif opt == '-f':
actions.append(h.filter)
elif opt == '-g':
actions.append(h.train_ham)
elif opt == '-s':
actions.append(h.train_spam)
elif opt == '-t':
actions.append(h.filter_train)
elif opt == '-G':
actions.append(h.untrain_ham)
elif opt == '-S':
actions.append(h.untrain_spam)
elif opt == "-n":
create_newdb = True
h.dbname, h.usedb = storage.database_type(opts)
if create_newdb:
h.newdb()
sys.exit(0)
if actions == []:
actions = [h.filter]
if not args:
args = ["-"]
for fname in args:
mbox = mboxutils.getmbox(fname)
for msg in mbox:
for action in actions:
action(msg)
if args == ["-"]:
unixfrom = msg.get_unixfrom() is not None
else:
unixfrom = True
result = mboxutils.as_string(msg, unixfrom=unixfrom)
sys.stdout.write(result)
开发者ID:Xodarap,项目名称:Eipi,代码行数:49,代码来源:sb_filter.py
示例6: train
def train(store, ham, spam, maxmsgs, maxrounds, tdict, reverse, verbose):
smisses = hmisses = round = 0
ham_cutoff = Options.options["Categorization", "ham_cutoff"]
spam_cutoff = Options.options["Categorization", "spam_cutoff"]
while round < maxrounds and (hmisses or smisses or round == 0):
hambone = mboxutils.getmbox(ham)
spamcan = mboxutils.getmbox(spam)
if reverse:
hambone = reversed(list(hambone))
spamcan = reversed(list(spamcan))
round += 1
if verbose:
print >> sys.stderr, "*** round", round, "***"
hmisses = smisses = nmsgs = 0
start = datetime.datetime.now()
try:
while not maxmsgs or nmsgs < maxmsgs:
hammsg = hambone.next()
spammsg = spamcan.next()
nmsgs += 2
sys.stdout.write("\r%5d" % nmsgs)
sys.stdout.flush()
score = store.spamprob(tokenize(hammsg))
if score > ham_cutoff:
if verbose:
print >> sys.stderr, "miss ham: %.6f %s" % (score, hammsg["message-id"])
hmisses += 1
tdict[hammsg["message-id"]] = True
store.learn(tokenize(hammsg), False)
score = store.spamprob(tokenize(spammsg))
if score < spam_cutoff:
if verbose:
print >> sys.stderr, "miss spam: %.6f %s" % (score, spammsg["message-id"])
smisses += 1
tdict[spammsg["message-id"]] = True
store.learn(tokenize(spammsg), True)
except StopIteration:
pass
delta = datetime.datetime.now()-start
seconds = delta.seconds + delta.microseconds/1000000
print "\rround: %2d, msgs: %4d, ham misses: %3d, spam misses: %3d, %.1fs" % \
(round, nmsgs, hmisses, smisses, seconds)
# We count all untrained messages so the user knows what was skipped.
# We also tag them for saving so we don't lose messages which might have
# value in a future run
nhamleft = 0
try:
while True:
msg = hambone.next()
tdict[msg["message-id"]] = True
nhamleft += 1
except StopIteration:
if nhamleft: print nhamleft, "untrained hams"
nspamleft = 0
try:
while True:
msg = spamcan.next()
tdict[msg["message-id"]] = True
nspamleft += 1
except StopIteration:
if nspamleft: print nspamleft, "untrained spams"
开发者ID:Xodarap,项目名称:Eipi,代码行数:72,代码来源:tte.py
示例7: pickle_read
try:
mapd = pickle_read(mapfile)
except IOError:
usage("Mapfile %s does not exist" % mapfile)
return 1
if not features and not args:
usage("Require at least one feature (-f) arg or one message file")
return 1
if not features:
# extract significant tokens from each message and identify
# where they came from
for f in args:
for msg in getmbox(f):
evidence = msg.get("X-Spambayes-Evidence", "")
evidence = re.sub(r"\s+", " ", evidence)
l = [e.rsplit(": ", 1)[0]
for e in evidence.split("; ")[2:]]
for s in l:
try:
s = make_header(decode_header(s)).__unicode__()
except:
s = unicode(s, 'us-ascii', 'replace')
features.add(s)
if not features:
usage("No X-Spambayes-Evidence headers found")
return 1
if spamfile is not None:
开发者ID:ehuelsmann,项目名称:spambayes,代码行数:30,代码来源:extractmessages.py
示例8: test_spambayes
def test_spambayes(iterations, timer, messages, ham_classifier):
# Prime the pump. This still leaves some hot functions uncompiled; these
# will be noticed as hot during the timed loops below.
for msg in messages:
ham_classifier.score(msg)
times = []
for _ in xrange(iterations):
t0 = timer()
for msg in messages:
ham_classifier.score(msg)
t1 = timer()
times.append(t1 - t0)
return times
if __name__ == "__main__":
parser = optparse.OptionParser(
usage="%prog [options]",
description=("Run the SpamBayes benchmark."))
util.add_standard_options_to(parser)
options, args = parser.parse_args()
data_dir = os.path.join(os.path.dirname(__file__), "data")
mailbox = os.path.join(data_dir, "spambayes_mailbox")
ham_data = os.path.join(data_dir, "spambayes_hammie.pkl")
msgs = list(mboxutils.getmbox(mailbox))
ham_classifier = hammie.open(ham_data, "pickle", "r")
util.run_benchmark(options, options.num_runs, test_spambayes,
msgs, ham_classifier)
开发者ID:bennn,项目名称:retic_performance,代码行数:30,代码来源:bm_spambayes.py
示例9: bench_spambayes
Run a canned mailbox through a SpamBayes ham/spam classifier.
"""
import os.path
import perf
from spambayes import hammie, mboxutils
__author__ = "[email protected] (Skip Montanaro)"
__contact__ = "[email protected] (Collin Winter)"
def bench_spambayes(ham_classifier, messages):
for msg in messages:
ham_classifier.score(msg)
if __name__ == "__main__":
runner = perf.Runner()
runner.metadata['description'] = "Run the SpamBayes benchmark."
data_dir = os.path.join(os.path.dirname(__file__), "data")
mailbox = os.path.join(data_dir, "spambayes_mailbox")
ham_data = os.path.join(data_dir, "spambayes_hammie.pkl")
messages = list(mboxutils.getmbox(mailbox))
ham_classifier = hammie.open(ham_data, "pickle", "r")
runner.bench_func('spambayes', bench_spambayes, ham_classifier, messages)
开发者ID:Yaspee,项目名称:performance,代码行数:30,代码来源:bm_spambayes.py
示例10: range
outdirs = [outputbasepath + ("%d" % i) for i in range(1, n+1)]
for dir in outdirs:
if not os.path.isdir(dir):
os.makedirs(dir)
counter = 0
cksums = set()
skipped = 0
for inputpath in inputpaths:
if doglob:
inpaths = glob.glob(inputpath)
else:
inpaths = [inputpath]
for inpath in inpaths:
mbox = mboxutils.getmbox(inpath)
for msg in mbox:
astext = str(msg)
cksum = md5(astext).hexdigest()
if delete_dups and cksum in cksums:
skipped += 1
continue
cksums.add(cksum)
i = random.randrange(n)
#assert astext.endswith('\n')
counter += 1
msgfile = open('%s/%d' % (outdirs[i], counter), 'wb')
msgfile.write(astext)
msgfile.close()
if verbose:
if counter % 100 == 0:
开发者ID:ehuelsmann,项目名称:spambayes,代码行数:31,代码来源:splitndirs.py
示例11: train
def train(store, hambox, spambox, maxmsgs, maxrounds, tdict, reverse, verbose,
ratio):
round = 0
ham_cutoff = Options.options["Categorization", "ham_cutoff"]
spam_cutoff = Options.options["Categorization", "spam_cutoff"]
# list-ify ham and spam iterators immediately. We don't really want to
# fetch the messages multiple times, and this is no worse than what happened
# before when -R was passed.
hambone_ = list(mboxutils.getmbox(hambox))
spamcan_ = list(mboxutils.getmbox(spambox))
if reverse:
hambone_ = list(reversed(hambone_))
spamcan_ = list(reversed(spamcan_))
nspam, nham = len(spamcan_), len(hambone_)
if ratio:
rspam, rham = ratio
# If the actual ratio of spam to ham in the database is better than
# what was asked for, use that better ratio.
if (rspam > rham) == (rspam * nham > rham * nspam):
rspam, rham = nspam, nham
# define some indexing constants
ham = 0
spam = 1
name = ('ham','spam')
misses = [0, 0]
misclassified = lambda is_spam, score: (
is_spam and score < spam_cutoff or not is_spam and score > ham_cutoff)
while round < maxrounds and (misses[ham] or misses[spam] or round == 0):
round += 1
if verbose:
print >> sys.stderr, "*** round", round, "***"
start = datetime.datetime.now()
hambone = iter(hambone_)
spamcan = iter(spamcan_)
i = [0, 0]
msgs_processed = 0
misses = [0, 0]
training_sets = [hambone, spamcan]
while not maxmsgs or msgs_processed < maxmsgs:
# should the next message come from hambone or spamcan?
train_spam = i[ham] * rspam > i[spam] * rham
try:
train_msg = training_sets[train_spam].next()
except StopIteration:
break
i[train_spam] += 1
msgs_processed += 1
sys.stdout.write("\r%5d" % msgs_processed)
sys.stdout.flush()
tokens = list(tokenize(train_msg))
score = store.spamprob(tokens)
selector = train_msg["message-id"] or train_msg["subject"]
if misclassified(train_spam, score) and selector is not None:
if verbose:
print >> sys.stderr, "\tmiss %s: %.6f %s" % (
name[train_spam], score, selector)
misses[train_spam] += 1
tdict[train_msg["message-id"]] = True
store.learn(tokens, train_spam)
delta = datetime.datetime.now()-start
seconds = delta.seconds + delta.microseconds/1000000
print "\rround: %2d, msgs: %4d, ham misses: %3d, spam misses: %3d, %.1fs" % \
(round, msgs_processed, misses[0], misses[1], seconds)
training_sets = [hambone, spamcan]
# We count all untrained messages so the user knows what was skipped.
# We also tag them for saving so we don't lose messages which might have
# value in a future run
for is_spam in ham, spam:
nleft = 0
try:
while True:
msg = training_sets[is_spam].next()
score = store.spamprob(tokenize(msg))
if misclassified(is_spam, score):
tdict[msg["message-id"]] = True
nleft += 1
except StopIteration:
if nleft:
print nleft, "untrained %ss" % name[is_spam]
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:100,代码来源:tte.py
注:本文中的spambayes.mboxutils.getmbox函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论