本文整理汇总了Python中nltk.corpus.names.words函数的典型用法代码示例。如果您正苦于以下问题:Python words函数的具体用法?Python words怎么用?Python words使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了words函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: main
def main():
from nltk.corpus import names
names = ([(name, 'male') for name in names.words('male.txt')] +
[(name, 'female') for name in names.words('female.txt')])
random.shuffle(names)
train_names = names[1500:]
devtest_names = names[500:1500]
test_names = names[:500]
train_set = [(gender_features(n), g) for (n,g) in train_names]
devtest_set = [(gender_features(n), g) for (n,g) in devtest_names]
classifier = nltk.NaiveBayesClassifier.train(train_set)
print classifier.classify(gender_features('Neo'))
print classifier.classify(gender_features('Trinity'))
print 'attila:', classifier.classify(gender_features('Attila'))
print classifier.classify(gender_features('Bori'))
print classifier.classify(gender_features('Gabi'))
print 'andy:', classifier.classify(gender_features('Andy'))
print 'dom:', classifier.classify(gender_features('Dom'))
print 'monica:', classifier.classify(gender_features('Monica'))
print 'donnie:', classifier.classify(gender_features('Donald'))
print "accuracy:", nltk.classify.accuracy(classifier, devtest_set)
print classifier.show_most_informative_features(5)
errors = []
for (name, tag) in devtest_names:
guess = classifier.classify(gender_features(name))
if guess != tag:
errors.append((tag, guess, name))
for (tag, guess, name) in sorted(errors): # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
print 'correct=%-8s guess=%-8s name=%-30s' % (tag, guess, name)
开发者ID:attibalazs,项目名称:nltk-examples,代码行数:34,代码来源:6_Gender_Classification.py
示例2: initGenderClassifier
def initGenderClassifier():
"""Initialize gender classifier"""
from nltk.corpus import names
names = ([(name, 'male') for name in names.words('male.txt')] +
[(name, 'female') for name in names.words('female.txt')])
featuresets = [(gender_features(n), g) for (n,g) in names]
return nltk.NaiveBayesClassifier.train(featuresets)
开发者ID:hjfu,项目名称:LinkedIngine,代码行数:7,代码来源:classifier.py
示例3: __calculateAgreement
def __calculateAgreement(self):
if len(self.np) == 1:
if self.np[0,0] in names.words('male.txt'): self.gender = 'male'
elif self.np[0,0] in names.words('female.txt'): self.gender = 'female'
if {'NNS', 'NNPS'}.intersection({b for (a, b) in self.np.pos()}) or {',','and'}.intersection(self.np.leaves()):
self.number = {'plural'}
else:
self.number = {'singular'}
if 'PRP' in self.np[0].label():
if self.np[0,0].lower() in {'they', 'them', 'themselves', 'their'}: self.number = {'plural'}
elif self.np[0,0].lower() in {'him', 'he', 'himself'}:
self.gender = 'male'
self.number = {'singular'}
elif self.np[0,0].lower() in {'her', 'herself' , 'she'}:
self.number = {'singular'}
self.gender = 'female'
elif self.np[0,0].lower() in {'it', 'itself'}: self.number = {'singular'}
elif self.np[0,0].lower() in {'us', 'we', 'our', 'ourselves'}:
self.number = {'plural'}
self.person = 'first'
elif self.np[0,0].lower() in {'I', 'me', 'my', 'myself'}:
self.number = {'singular'}
self.person = 'first'
elif self.np[0,0].lower() in {'yourself'}:
self.number = {'singular'}
self.person = 'second'
elif self.np[0,0].lower() in {'you', 'your'}:
self.number = {'singular', 'plural'}
self.person = 'second'
elif self.np[0,0].lower() in {'yourselves'}:
self.number = {'plural'}
self.person = 'second'
开发者ID:5aurabhpathak,项目名称:all-I-ve-done,代码行数:33,代码来源:classes.py
示例4: gender
def gender(word):
"""Method to determine the gender of given word by comparing it to name dictionaries.
Args:
word (str): Word. (usually a name)
Keyword Args:
is_server (bool): Is Dragonfire running as an API server?
user_id (int): User's ID.
Returns:
str: Male or Female
.. note::
This method is a very naive and not very useful. So it will be deprecated in the future.
"""
labeled_names = ([(name, 'male') for name in names.words('male.txt')] +
[(name, 'female')
for name in names.words('female.txt')])
shuffle(labeled_names)
featuresets = [(Classifier.gender_features(n), gender)
for (n, gender) in labeled_names]
train_set = featuresets[500:]
classifier = nltk.NaiveBayesClassifier.train(train_set)
return classifier.classify(Classifier.gender_features(word))
开发者ID:ismlkrkmz,项目名称:Dragonfire,代码行数:28,代码来源:nlplib.py
示例5: createName
def createName():
mynames = ([(name, 'male') for name in names.words('male.txt')] +
[(name, 'female') for name in names.words('female.txt')])
random.shuffle(mynames)
firstname = str(mynames[0][0]).replace(' ','')
return firstname
开发者ID:dattasaurabh82,项目名称:Decoy-Browsing,代码行数:7,代码来源:DecoyFacebookBrowsing.py
示例6: semanticClassify
def semanticClassify(self, s):
"""
对分段进行语义分类,仅动词和名词具有语义标签,需要先进行POS标记
Input: [('i', 'PRON'), ('love', 'VERB'), ('you', 'PRON')]
Output: [('i', 'PRON', ' '), ('love', 'VERB', 'love.n.01'), ('you', 'PRON', ' ')]
"""
classified_seg = []
for seg in s:
male_name = [w.lower() for w in names.words('male.txt')]
female_name = [w.lower() for w in names.words('female.txt')]
month = ['january', 'february', 'march', 'april', 'may', 'june',
'july', 'august', 'september', 'october', 'november', 'december']
if seg[1] == 'NP':
if seg[0] in male_name:
classified_seg.append((seg[0], seg[1], 'male_name'))
elif seg[0] in female_name:
classified_seg.append((seg[0], seg[1], 'female_name'))
elif seg[0] in month:
classified_seg.append((seg[0], seg[1], 'month'))
else:
classified_seg.append((seg[0], seg[1], ' '))
elif (seg[1] == 'VERB' or seg[1] == 'NOUN'):
classified = wn.synsets(seg[0])
if len(classified) > 0:
classified_seg.append(
(seg[0], seg[1], classified[0].name()))
else:
classified_seg.append((seg[0], seg[1], ' '))
else:
classified_seg.append((seg[0], seg[1], ' '))
return self.encodeutf8(classified_seg)
开发者ID:nichijouyc,项目名称:SemanticGuessGenerator,代码行数:34,代码来源:semanticclassify.py
示例7: demo
def demo():
def gender_features(word):
return {'last_letter': word[-1], 'penultimate_letter': word[-2]}
from nltk.classify import accuracy
from nltk.corpus import names
import random
names = ([(name, 'male') for name in names.words('male.txt')] +
[(name, 'female') for name in names.words('female.txt')])
import random
random.seed(60221023)
random.shuffle(names)
featuresets = [(gender_features(n), g) for (n,g) in names]
train_set, test_set = featuresets[500:], featuresets[:500]
print '--- nltk.classify.svm demo ---'
print 'Number of training examples:', len(train_set)
classifier = SvmClassifier.train(train_set)
print 'Total SVM dimensions:', len(classifier._svmfeatureindex)
print 'Label mapping:', classifier._labelmapping
print '--- Processing an example instance ---'
print 'Reference instance:', names[0]
print 'NLTK-format features:\n ' + str(test_set[0])
print 'SVMlight-format features:\n ' + str(map_instance_to_svm(test_set[0], classifier._labelmapping, classifier._svmfeatureindex))
distr = classifier.prob_classify(test_set[0][0])
print 'Instance classification and confidence:', distr.max(), distr.prob(distr.max())
print '--- Measuring classifier performance ---'
print 'Overall accuracy:', accuracy(classifier, test_set)
开发者ID:approximatelylinear,项目名称:nltk,代码行数:32,代码来源:svm.py
示例8: demo
def demo():
def gender_features(word):
return {"last_letter": word[-1], "penultimate_letter": word[-2]}
from nltk.classify import accuracy
from nltk.corpus import names
import random
names = [(name, "male") for name in names.words("male.txt")] + [
(name, "female") for name in names.words("female.txt")
]
import random
random.seed(60221023)
random.shuffle(names)
featuresets = [(gender_features(n), g) for (n, g) in names]
train_set, test_set = featuresets[500:], featuresets[:500]
print "--- nltk.classify.svm demo ---"
print "Number of training examples:", len(train_set)
classifier = SvmClassifier.train(train_set)
print "Total SVM dimensions:", len(classifier._svmfeatureindex)
print "Label mapping:", classifier._labelmapping
print "--- Processing an example instance ---"
print "Reference instance:", names[0]
print "NLTK-format features:\n " + str(test_set[0])
print "SVMlight-format features:\n " + str(
map_instance_to_svm(test_set[0], classifier._labelmapping, classifier._svmfeatureindex)
)
distr = classifier.prob_classify(test_set[0][0])
print "Instance classification and confidence:", distr.max(), distr.prob(distr.max())
print "--- Measuring classifier performance ---"
print "Overall accuracy:", accuracy(classifier, test_set)
开发者ID:trunghlt,项目名称:nltk,代码行数:35,代码来源:svm.py
示例9: new_naive_bayes_classifier
def new_naive_bayes_classifier():
# Create feature set consisting of male and female names for training
global CLASSIFIER_CACHE
if CLASSIFIER_CACHE:
return CLASSIFIER_CACHE
else:
male_word_seq = _new_training_set(
'male', names.words('male.txt'), MALE_PRONOUN_SEQ)
female_word_seq = _new_training_set(
'female', names.words('female.txt'), FEMALE_PRONOUN_SEQ)
neutral_pronoun_seq = _new_training_set(
'neutral', NEUTRAL_PRONOUN_SEQ)
excess_seq = _new_training_set(
'excess', ABBREVIATION_SEQ, PREPOSITION_SEQ, string.punctuation,
('looking', 'is'),
)
featureset_seq = (
(_gender_features(word), gender)
for word, gender in chain(
male_word_seq,
female_word_seq,
neutral_pronoun_seq,
excess_seq,
))
CLASSIFIER_CACHE = nltk.NaiveBayesClassifier.train(featureset_seq)
return CLASSIFIER_CACHE
开发者ID:emilisto,项目名称:pygenus,代码行数:28,代码来源:pygenus.py
示例10: make_classifier
def make_classifier():
from nltk.corpus import names
training_names = [(name, 'male') for name in names.words('male.txt')] + \
[(name, 'female') for name in names.words('female.txt')]
feature_sets = [(name_features(name), gender) for (name, gender) in training_names]
classifier = nltk.NaiveBayesClassifier.train(feature_sets)
return classifier
开发者ID:mitchpowell1,项目名称:Bechdel,代码行数:8,代码来源:Script_Tagger.py
示例11: identify_gender3
def identify_gender3():
import random
from nltk.corpus import names
names = ([(name, 'male') for name in names.words('male.txt')] +
[(name, 'female') for name in names.words('female.txt')])
random.shuffle(names)
featuresets = [(gender_features3(n), g) for n, g in names]
return classify(nltk.NaiveBayesClassifier, featuresets, 500)
开发者ID:fishmacs,项目名称:mycode,代码行数:9,代码来源:chap06.py
示例12: feature_nameList
def feature_nameList(word):
if word in names.words('male.txt'):
return 1
elif word in names.words('female.txt'):
return 1
elif GeoText(word):
return 1
else:
return 0
开发者ID:DomWag,项目名称:TMP_Andre,代码行数:9,代码来源:Project2.py
示例13: get_variations
def get_variations(s):
base = s.split()
variations = []
for n in base:
if n in names.words():
variations.append(n)
if s in names.words():
variations.append(s)
return variations
开发者ID:GregoryElliott,项目名称:TGMA_NLP_Project,代码行数:9,代码来源:gg_api.py
示例14: create_featuresets
def create_featuresets(self):
'''
Create featuresets of name, gender based on the names corpora
'''
train_names = ([(name,'male') for name in names.words('male.txt')] +
[(name,'female') for name in names.words('female.txt')])
random.shuffle(train_names)
return [(self.gender_features(n), g) for (n,g) in train_names]
开发者ID:neviim,项目名称:Georgetown-Capstone,代码行数:9,代码来源:Genders.py
示例15: nltkTest
def nltkTest():
labeled_names = ([(name, 'male') for name in names.words('male.txt')] +
[(name, 'female') for name in names.words('female.txt')])
import random
random.shuffle(labeled_names)
featuresets = [(gender_features(n), gender) for (n, gender) in labeled_names]
train_set, test_set = featuresets[500:], featuresets[:500]
classifier = nltk.NaiveBayesClassifier.train(train_set)
val = classifier.classify(gender_features('Neo'))
print val
开发者ID:saurabhc123,项目名称:SharedTask,代码行数:10,代码来源:nltkPoc.py
示例16: __init__
def __init__(self):
super(Gender, self).__init__()
labeled_names = ([(name, 'male') for name in names.words('male.txt')] +
[(name, 'female') for name in names.words('female.txt')])
random.shuffle(labeled_names)
featuresets = [(gender_features(n), gender) for (n, gender) in labeled_names]
train_set = featuresets
#print train_set[0]
#Training data for the BayerClassifier
self.classifier = nltk.NaiveBayesClassifier.train(train_set)
开发者ID:ayush2k,项目名称:AgileTweetViz-Desert-Walkers,代码行数:10,代码来源:gender.py
示例17: classify
def classify(self, name):
feats = self._nameFeatures(name)
# print(name, feats)
for male in names.words('male.txt'):
if name == male:
return 'M'
for female in names.words('female.txt'):
if name == female:
return 'F'
return self.classifier.classify(feats)
开发者ID:theopak,项目名称:storytellingbot,代码行数:11,代码来源:genderPredictor.py
示例18: main
def main():
name = ([(n, 'male') for n in names.words('male.txt')] + [(n, 'female') for n in names.words('female.txt')])
randomNames = random.shuffle(name)
featuresets = [(genderFeature(n.lower()), g) for (n, g) in name]
train_set, test_set = featuresets[500:], featuresets[:500]
classifier = nltk.NaiveBayesClassifier.train(train_set)
string = raw_input("Enter a name: ")
print classifier.classify(genderFeature(string))
print nltk.classify.accuracy(classifier, test_set)
开发者ID:GirishSrinivas,项目名称:PythonPrograms,代码行数:11,代码来源:ch6sam.py
示例19: feature_nameList
def feature_nameList(word):
for name_m in names.words('male.txt'):
if word[0].decode('unicode-escape') == name_m.decode('unicode-escape'):
return 1
for name_m in names.words('female.txt'):
if word[0].decode('unicode-escape') == name_m.decode('unicode-escape'):
return 1
if len(GeoText(word[0]).cities)>0 or len(GeoText(word[0]).countries)>0:
return 1
else:
return 0
开发者ID:DomWag,项目名称:TMP_Andre,代码行数:11,代码来源:project2_feat.py
示例20: partial_names_demo
def partial_names_demo(trainer, features=names_demo_features):
from nltk.corpus import names
import random
male_names = names.words('male.txt')
female_names = names.words('female.txt')
random.seed(654321)
random.shuffle(male_names)
random.shuffle(female_names)
# Create a list of male names to be used as positive-labeled examples for training
positive = map(features, male_names[:2000])
# Create a list of male and female names to be used as unlabeled examples
unlabeled = map(features, male_names[2000:2500] + female_names[:500])
# Create a test set with correctly-labeled male and female names
test = [(name, True) for name in male_names[2500:2750]] \
+ [(name, False) for name in female_names[500:750]]
random.shuffle(test)
# Train up a classifier.
print 'Training classifier...'
classifier = trainer(positive, unlabeled)
# Run the classifier on the test data.
print 'Testing classifier...'
acc = accuracy(classifier, [(features(n),m) for (n,m) in test])
print 'Accuracy: %6.4f' % acc
# For classifiers that can find probabilities, show the log
# likelihood and some sample probability distributions.
try:
test_featuresets = [features(n) for (n,m) in test]
pdists = classifier.batch_prob_classify(test_featuresets)
ll = [pdist.logprob(gold)
for ((name, gold), pdist) in zip(test, pdists)]
print 'Avg. log likelihood: %6.4f' % (sum(ll)/len(test))
print
print 'Unseen Names P(Male) P(Female)\n'+'-'*40
for ((name, is_male), pdist) in zip(test, pdists)[:5]:
if is_male == True:
fmt = ' %-15s *%6.4f %6.4f'
else:
fmt = ' %-15s %6.4f *%6.4f'
print fmt % (name, pdist.prob(True), pdist.prob(False))
except NotImplementedError:
pass
# Return the classifier
return classifier
开发者ID:0day1day,项目名称:golismero,代码行数:53,代码来源:util.py
注:本文中的nltk.corpus.names.words函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论