本文整理汇总了Python中nltk.corpus.wordnet.all_synsets函数的典型用法代码示例。如果您正苦于以下问题:Python all_synsets函数的具体用法?Python all_synsets怎么用?Python all_synsets使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了all_synsets函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: load
def load(self, pos):
wn = self.wn
if pos == 'n':
roots = wn.synsets('entity')
else:
roots = [s for s in wn.all_synsets(pos) if len(s.hypernyms()) == 0]
self.root = WordNetTreeNode('root')
for synset in roots:
self.__append_synset(synset, self.root)
# unfortunately, the block above is not guaranteed to build
# the entire WordNet tree. The reason is that it starts at root
# adding the descendants retrieved from synset.hyponyms(). For some
# odd reason that method not always returns all hyponyms. For
# example, portugal.n.01 is not retrieved as a hyponym of
# european_country.n.01, but if we call
# wn.synsets('portugal')[0].hypernym_paths()
# european-country.n.01 appears as its ancestor.
# check for synsets that were not foundss
index = self.hashtable()
for synset in wn.all_synsets(pos):
if synset.name() not in index:
for path in synset.hypernym_paths():
keys = [s.name() for s in path]
self.__extend(keys,
is_internal = len(path[-1].hyponyms()) > 0)
开发者ID:vialab,项目名称:semantic-guesser,代码行数:30,代码来源:wordnet.py
示例2: prepare
def prepare(self):
for verb in wn.all_synsets('v'):
for lemma in verb.lemmas():
if 1 in lemma.frame_ids():
for lemma in verb.lemmas():
#print lemma.name()
#print (lemma, lemma.frame_ids(), "|".join(lemma.frame_strings()))
#print verb.frame_strings()
verbs.append(str(lemma.name()).replace('_', ' '))
#print verbs
for noun in wn.all_synsets('n'):
#print noun
for lemma in noun.lemmas():
#print lemma.name()
nouns.append(self.plural(str(lemma.name()).replace('_', ' ')))
#print nouns
for adj in wn.all_synsets('a'):
#print adj
for lemma in adj.lemmas():
#print lemma.name()
adjectives.append(str(lemma.name()).replace('_', ' '))
for adv in wn.all_synsets('r'):
#print adv
for lemma in adv.lemmas():
#print lemma.name()
adverbs.append(str(lemma.name()).replace('_', ' '))
开发者ID:magali-br,项目名称:webtest,代码行数:29,代码来源:ChomskySatzGenerator.py
示例3: list_nouns
def list_nouns():
global NOUNS
print "[+] Creating list of nouns... (This only has to be done once)"
if WIKI_LANGUAGE == 'en':
## Make list of nouns from wordnet
NOUNS = {x.name().split('.', 1)[0] for x in wn.all_synsets('n')}
## TODO CREATE A SEPARATE LIST FOR NOUNS ENDING IN S
elif WIKI_LANGUAGE == 'es':
## Make list of nouns from cess_esp
list = nltk.corpus.cess_esp.tagged_words()
sust = []
for elem in list:
if elem[1][0] == 'n':
sust.append(elem[0])
NOUNS = set(sust)
# TODO german language support
# elif WIKI_LANGUAGE == 'de':
else:
print "[!] Language not recognised, using English."
## Make list of nouns from wordnet
NOUNS = {x.name().split('.', 1)[0] for x in wn.all_synsets('n')}
print " Done!"
开发者ID:Juamps,项目名称:FollowText,代码行数:26,代码来源:follow_text_0.3.py
示例4: populateBars
def populateBars():
connection = mdb.connect('localhost', 'user', 'pass', 'barlytics')
current = connection.cursor()
nounsList = []
adjectiveList = []
cityList = ['San Francisco', 'Chicago', 'New York', 'Austin', 'Seattle']
print "here"
count = 0
for synset in list(wn.all_synsets('n')):
nounsList.append(str(synset.name).split('.')[0])
count = count + 1
if count >= 50000:
break
count= 0
print "here"
for synset in list(wn.all_synsets('a')):
adjectiveList.append(str(synset.name).split('.')[0])
count = count + 1
if count >= 50000:
break
print "here"
finalList = []
for i in range(10000):
string = ''
string = "The " + adjectiveList[randint(0, len(adjectiveList) - 1)].capitalize()
string = string + " " + nounsList[randint(0, len(nounsList) - 1)].capitalize()
finalList.append(string)
name = string
license = str(randint(1000000, 9000000))
city = str(address.city())
phone = str(phone_number.phone_number_format(0))
addr = str(randint(1, 255)) + " " + address.street_name()
query = 'insert into bars values("' + name + '", "' + license + '", "' + city + '", "' + phone + '", "' + addr + '"); '
print query
try:
current.execute(query)
except mdb.IntegrityError:
print "integrity error:"
print 'commit'
connection.commit()
开发者ID:kaushal,项目名称:barlytics,代码行数:49,代码来源:barsNLTK.py
示例5: _run_extract
def _run_extract(self):
#extract all 2 word AN and NN compounds from WN and write to file
print "Extracting noun compounds from WN"
discards=[]
allsynsets=list(wn.all_synsets(self.parameters['pos']))
if not self.parameters['testing']:
self.n=len(allsynsets)
for synset in list(wn.all_synsets(self.parameters['pos']))[:self.n]:
for lemma in synset.lemmas: #walk over all lemmas for all synsets
#print lemma.name
words=lemma.name.split('_')
if len(words)==2:#check 2 words
poslist=[]
for word in words:
poslist.append(PairGenerator.getpos(word))#generate a PosList List for this pair of words
#print words,poslist
headpos=poslist.pop()
if 'N' in headpos:#is 'N' a possible part of speech for the head word (last word in the list)
phrase=words.pop()+'/N'
modpos=poslist.pop()
mod=words.pop()
if 'N' in modpos: #is 'N' a poss part of speech for mod
NNphrase=phrase+":nn-DEP:"+mod+'/N'
self.NNs.append(NNphrase)
if 'J' in modpos:#is 'J' a poss part of speech for mod
ANphrase=phrase+":amod-DEP:"+mod+'/J'
self.ANs.append(ANphrase)
if len(modpos)==0:#only considering J and N for mod
#print "Discarding "+lemma.name
discards.append(lemma.name)
else:#only considering N for head
#print "Discarding "+lemma.name
discards.append(lemma.name)
print len(self.NNs),self.NNs
print len(self.ANs),self.ANs
print len(discards),discards
#write lists to file
with open(self.ANpath,'w') as outstream:
for AN in self.ANs:
outstream.write(AN+'\n')
with open(self.NNpath,'w') as outstream:
for NN in self.NNs:
outstream.write(NN+'\n')
return
开发者ID:julieweeds,项目名称:Compounds,代码行数:48,代码来源:wordpairs.py
示例6: exercise3
def exercise3():
print
print "Exercise - 3"
ss = [w for w in wn.all_synsets('v')]
result = sum([len(ss[i].hypernyms()) for i in range(len(ss))])
print "Total number of hypernyms of 'v' is: %d" %result
print "Average number of hypernyms is: %f" %(result/float(len(ss)))
开发者ID:GirishSrinivas,项目名称:PythonPrograms,代码行数:7,代码来源:GirishSrinivas_ExtraCredit.py
示例7: getAllGlossLinks
def getAllGlossLinks(useTagger=False, useverbs=False, reflexive=False, n=10000):
links = {}
print "Gathering synsets"
synsets = [ss for ss in wordnet.all_synsets()]
n = 0
for ss in synsets:
print "%.3f"%(float(n)/float(len(synsets)))
n += 1
ssname = ss.name
defn = wordboundary.split(ss.definition.strip())
if useTagger:
defn = [(form, wdnettags[tag[0]]) for form, tag in useTagger.tag(defn) if not form == "" and tag[0] in wdnettags]
if not ssname in links:
links[ssname] = {}
for w in defn[:n]:
if type(w) == "str":
wsynsets = wordnet.synsets(w)
else:
wsynsets = wordnet.synsets(w[0], w[1])
for s in wsynsets:
sname = s.name
links[ssname][sname] = True
if reflexive:
if not sname in links:
links[sname] = {}
links[sname][ssname] = True
if not ssname in links:
print ssname, defn
for l in links:
ll = links[l]
for d in ll:
links[l][d] = 1.0/float(len(ll))
return links
开发者ID:AllanRamsay,项目名称:COMP34411,代码行数:33,代码来源:ppr.py
示例8: __init__
def __init__(self):
t0 = time()
print 'initalizing random word generator'
self.s_articles = ['A', 'The']
self.o_articles = ['a','the']
self.prepositions = ['of','in','to','for','with','on','at','from','by',
'about','as','into','like','through','after','over','out','around']
self.nouns = list(wn.all_synsets(wn.NOUN))
self.verbs = list(wn.all_synsets(wn.VERB))
self.adjectives = list(wn.all_synsets(wn.ADJ))
self.adverbs = list(wn.all_synsets(wn.ADV))
t1 = time()
runTime = t1-t0
print 'word list initalized in ' + str(runTime) + ' seconds'
开发者ID:DanFlannel,项目名称:TwitterBot,代码行数:16,代码来源:RndSentence.py
示例9: main
def main(argv):
huang_vocab = LoadHuang()
manaal_vocab = LoadManaal()
brown_vocab = LoadBrown()
all_lemmas = {x.lower() for x in wn.all_lemma_names(pos=wn.ADJ)}
all_alpha_lemmas = {x for x in all_lemmas if x.isalpha()}
all_synsets = set(wn.all_synsets(pos=wn.ADJ))
all_alpha_synsets = {x for x in all_synsets if IsAlphaSS(x)}
all_lemmas_with_single_synset = {x for x in all_lemmas if IsSingleSynset(x)}
all_lemmas_ambig_synset = {x for x in all_lemmas if not IsSingleSynset(x)}
all_lemmas_with_single_synset_alpha = {x for x in all_lemmas_with_single_synset if x.isalpha()}
all_lemmas_ambig_synset_alpha = {x for x in all_lemmas_ambig_synset if x.isalpha()}
all_alpha_lemmas_has_noun = {x for x in all_alpha_lemmas if LemmaHasNoun(x)}
all_alpha_lemmas_has_noun_single_lexname = {x for x in all_alpha_lemmas_has_noun if IsNounSingleLexName(x) }
print "all_lemmas:", len(all_lemmas)
print "all_alpha_lemmas:", len(all_alpha_lemmas)
print "all_synsets:", len(all_synsets)
print "all_alpha_synsets:", len(all_alpha_synsets)
print "all_lemmas_with_single_synset:", len(all_lemmas_with_single_synset)
print "all_lemmas_ambig_synset:", len(all_lemmas_ambig_synset)
print "all_lemmas_with_single_synset_alpha", len(all_lemmas_with_single_synset_alpha)
print "all_lemmas_ambig_synset_alpha", len(all_lemmas_ambig_synset_alpha)
print "all_alpha_lemmas_has_noun", len(all_alpha_lemmas_has_noun)
print "all_alpha_lemmas_has_noun_single_lexname", len(all_alpha_lemmas_has_noun_single_lexname)
print "huang.intersect(all_alpha_lemmas)", len(huang_vocab.intersection(all_alpha_lemmas))
print "manaal.intersect(all_alpha_lemmas)", len(manaal_vocab.intersection(all_alpha_lemmas))
print "brown.intersect(all_alpha_lemmas)", len(brown_vocab.intersection(all_alpha_lemmas))
print "huang*manaal*brown*all_alpha_lemmas", len(huang_vocab.intersection(all_alpha_lemmas, manaal_vocab, brown_vocab))
print "huang.intersect(all_lemmas_with_single_synset_alpha)", len(huang_vocab.intersection(all_lemmas_with_single_synset_alpha))
print "manaal.intersect(all_lemmas_with_single_synset_alpha)", len(manaal_vocab.intersection(all_lemmas_with_single_synset_alpha))
print "brown.intersect(all_lemmas_with_single_synset_alpha)", len(brown_vocab.intersection(all_lemmas_with_single_synset_alpha))
print "huang*manaal*brown*all_lemmas_with_single_synset_alpha", len(huang_vocab.intersection(all_lemmas_with_single_synset_alpha, manaal_vocab, brown_vocab))
开发者ID:ytsvetko,项目名称:supersense_classifier,代码行数:33,代码来源:statistics.py
示例10: convert_all_to_basic
def convert_all_to_basic(reviews):
print("Process Started")
print("Gettin all nouns....")
words=[s for s in wn.all_synsets(wn.NOUN) if (s.name().find('-')==-1) and (s.name().find('_')==-1) and len(s.name().split('.')[0])<12]
print("Processing basic logic probability...")
words2 = []
filter_basic_logic(words,words2)
print("Removing redundancy...")
a = list(set(words2))
a.sort()
remove_unwanted(a)
newReviews = []
for review in reviews:
tempReview = ""
tokens = word_tokenize(review)
for token in tokens:
tempword = check_basic(token,a)
if tempword:
tempReview = tempReview + " " + tempword
else:
tempReview = tempReview + " " + token
newReviews.append(tempReview)
return newReviews
开发者ID:Salihan04,项目名称:TomatoEngine,代码行数:25,代码来源:bc_prog.py
示例11: wn_pos_dist
def wn_pos_dist():
"""Count the Synsets in each WordNet POS category."""
# One-dimensional count dict with 0 as the default value:
cats = defaultdict(int)
# The counting loop:
for synset in wn.all_synsets():
cats[synset.pos] += 1
开发者ID:gthandavam,项目名称:SunyVisD,代码行数:7,代码来源:prepare.py
示例12: load_corpora
def load_corpora( self ):
print "Loading corpora..."
pth = os.path.realpath( os.path.dirname(__file__) )
nltk.data.path.append( os.path.join( pth, "nltk_data" ) )
from nltk.corpus import wordnet as wn
self._adjectives = list(wn.all_synsets('a'))
self._nouns = list(wn.all_synsets('n'))
with open( os.path.join( pth, "firstnames.txt") ) as fh:
self._firstnames = fh.readlines()
with open( os.path.join( pth, "surnames.txt") ) as fh:
self._surnames = fh.readlines()
开发者ID:prehensile,项目名称:everysummertitle,代码行数:16,代码来源:generator.py
示例13: populate_cache
def populate_cache():
adjectives, nouns = (set(), set())
for wordset, kind in [
(adjectives, wordnet.ADJ),
(nouns, wordnet.NOUN),
]:
for synset in wordnet.all_synsets(kind):
for lemma in filter(
lambda l: all((
not re.search(r'\d', l.name()),
l.name() not in BLACKLIST,
not l.name().endswith('_to'),
l.count() > 0,
)), synset.lemmas()
):
wordset.add(lemma.name().replace('_', ' '))
os.mkdir(CACHE_PATH)
for words, filename in [
(adjectives, 'adjectives'),
(nouns, 'nouns'),
]:
with open(os.path.join(CACHE_PATH, filename), 'w') as f:
f.writelines((u'{}\n'.format(w) for w in words))
开发者ID:tinruufu,项目名称:littlepileof,代码行数:26,代码来源:miserable.py
示例14: list_nouns
def list_nouns():
## TODO CREATE A SEPARATE LIST FOR NOUNS ENDING IN S
global NOUNS
print "[+] Creating list of nouns... (This only has to be done once)"
## Make list of nouns in wordnet
NOUNS = {x.name().split('.', 1)[0] for x in wn.all_synsets('n')}
print " Done!"
开发者ID:Juamps,项目名称:FollowText,代码行数:7,代码来源:follow_text_0.2.py
示例15: ex26_branchingfactor
def ex26_branchingfactor():
from nltk.corpus import wordnet as wn
num_synsets = 0
num_hyponyms = 0
for noun_synset in wn.all_synsets("n"):
(num_hyponyms, num_synsets) = \
branchingfactor_r(noun_synset, num_synsets, num_hyponyms)
print "branching factor=", (num_hyponyms / num_synsets)
开发者ID:447327642,项目名称:nltk-examples,代码行数:8,代码来源:ch02_ex.py
示例16: print_all_synset_categories
def print_all_synset_categories():
"""
Prints all domains and
categories for research purposes
"""
categories = []
for synset in list(wordnet.all_synsets('n')):
categories.append(synset)
return categories
开发者ID:cheekybastard,项目名称:namebot,代码行数:9,代码来源:nlp.py
示例17: getTaggedHyps
def getTaggedHyps():
allsynsets = [ss for ss in wordnet.all_synsets()]
alltags = {}
for ss in allsynsets:
alltags[ss.pos] = True
taggedhyps = {}
for tag in alltags:
taggedhyps[tag] = getAllHyps(allsynsets, tag=tag)
return taggedhyps
开发者ID:AllanRamsay,项目名称:COMP34411,代码行数:9,代码来源:allhyps.py
示例18: getUpDownLinks
def getUpDownLinks():
links = {}
for ss in wordnet.all_synsets():
for s in ss.hypernyms()+ss.hyponyms():
try:
links[ss.name][s.name] = True
except:
links[ss.name] = {s.name:True}
return links
开发者ID:AllanRamsay,项目名称:COMP34411,代码行数:9,代码来源:ppr.py
示例19: synonyms
def synonyms(word, lch_threshold=2.26):
for net1 in wn.synsets(word):
for net2 in wn.all_synsets():
try:
lch = net1.lch_similarity(net2)
except:
continue
if lch >= lch_threshold:
yield (net1, net2, lch)
开发者ID:rdorado79,项目名称:wiki-dm-project,代码行数:9,代码来源:eval.py
示例20: print_all_synset_categories
def print_all_synset_categories():
"""Print all domains and categories for research purposes.
:rtype categories (list): A list of all wordnet synsets.
"""
categories = []
for synset in list(wordnet.all_synsets('n')):
categories.append(synset)
return categories
开发者ID:christabor,项目名称:namebot,代码行数:9,代码来源:nlp.py
注:本文中的nltk.corpus.wordnet.all_synsets函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论