本文整理汇总了Python中nltk.corpus.wordnet.morphy函数的典型用法代码示例。如果您正苦于以下问题:Python morphy函数的具体用法?Python morphy怎么用?Python morphy使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了morphy函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: lowest_common_hypernym
def lowest_common_hypernym(fr):
"""
Returns the lowest common hypernym of the two mentions (based on WordNet).
Again assuming that the last word = head word, and that it represents the phrase.
Also considering only the first sense.
"""
try:
i_final=wn.morphy(re.sub(r"\W", r"",fr.i_token.split('_')[-1]))
j_final=wn.morphy(re.sub(r"\W", r"",fr.j_token.split('_')[-1]))
if i_final is None or j_final is None:
return "lowest_common_hypernym={}".format(False)
if _is_pronoun(i_final) or _is_pronoun(j_final):
return "lowest_common_hypernym={}".format(False)
i_synsets=wn.synsets(i_final)
j_synsets=wn.synsets(j_final)
lowest_common_hypernym=i_synsets[0].lowest_common_hypernyms(j_synsets[0])[0]
return "lowest_common_hypernym={}".format(lowest_common_hypernym)
except wn_error:
return "lowest_common_hypernym={}".format(False)
开发者ID:pkarmstr,项目名称:relation-extraction,代码行数:26,代码来源:feature_functions.py
示例2: get_similarity
def get_similarity(self,word1,word2):
'''计算相似度:基于WordNet语义词典'''
'''
print 'before stemmed:',word1
print 'after stemmed:',wn.morphy(word1.lower())
print 'before stemmed:',word2
print 'after stemmed:',wn.morphy(word2.lower())
'''
#stemmed word
if wn.morphy(word1.lower()) != None :
word1 = wn.morphy(word1.lower())
if wn.morphy(word2.lower()) != None :
word2 = wn.morphy(word2.lower())
word1_synsets = wn.synsets(word1)
#print word1_synsets
word2_synsets = wn.synsets(word2)
#print word2_synsets
sim = 0
for syn1 in word1_synsets:
w1 = wn.synset(syn1.name())
for syn2 in word2_synsets:
w2 = wn.synset(syn2.name())
tmp = w1.path_similarity(w2)
#print tmp,syn1.name(),syn2.name()
if tmp > sim:
sim = tmp
return sim
开发者ID:cnspica,项目名称:ASExtractor,代码行数:28,代码来源:EnKeywordExtraction.py
示例3: subclass
def subclass(feats):
if string_match(feats).endswith("False"):
try:
result = False
i_clean = wn.morphy(feats.i_cleaned.lower(), wn.NOUN)
i_synsets = wn.synsets(i_clean)
j_clean = wn.morphy(feats.j_cleaned.lower(), wn.NOUN)
j_synsets = wn.synsets(j_clean)
def get_common_hypernym(i_synset,j_synset):
i_hypernyms = i_synset.hypernyms()
j_hypernyms = j_synset.hypernyms()
if len(i_hypernyms) == 0:
i_synset = i_synset.instance_hypernyms()[0]
if len(j_hypernyms) == 0:
j_synset = j_synset.instance_hypernyms()[0]
subc = i_synset.common_hypernyms(j_synset)
return (i_synset in subc) or (j_synset in subc)
for synset in i_synsets:
for syn in j_synsets:
result = get_common_hypernym(synset,syn)
if result: break
if result:break
return "subclass={}".format(result)
except:
wn_error
return "subclass={}".format(False)
else:
return "subclass={}".format(False)
开发者ID:pkarmstr,项目名称:coreference-project,代码行数:30,代码来源:feature_functions.py
示例4: preprocessWords
def preprocessWords(lst):
index = 0
while index < len(lst):
word = lst[index].lower()
if word not in reservedWordList:
#special handling from java code
if word == 'financial':
lst[index] = 'finance'
#avoid _id is a word in dscrp
if word == '_id':
lst[index] = 'id'
#only VERB and NOUN are saved, do not know if wn.morphy has many speech stem, which will return as wn.morphy(word)
# if wn.morphy(word, wn.VERB) and wn.morphy(word, wn.NOUN) and wn.morphy(word, wn.VERB) != wn.morphy(word, wn.NOUN):
# print word, wn.morphy(word, wn.VERB), wn.morphy(word, wn.NOUN), wn.morphy(word)
if wn.morphy(word, wn.VERB) or wn.morphy(word, wn.NOUN):
if wn.morphy(word) != word:
lst[index] = wn.morphy(word)
word = lst[index]
elif wn.morphy(PorterStemmer().stem_word(word)):
lst[index] = PorterStemmer().stem_word(word)
word = lst[index]
else:
del lst[index]
continue
if len(word) == 1 or word in stopWordList or word.isdigit():
del lst[index]
continue
index += 1
return lst
开发者ID:CollaborativeScientificWorkflow,项目名称:semantic-oesvm,代码行数:29,代码来源:preprocess.py
示例5: expand_queries
def expand_queries(file):
'''
For each term in a query, takes the first synset of the word from wordnet and adds all synonyms of that synset
'''
file = open(file)
for sentence in file:
sentence = sentence.strip()
if sentence.find('<text>') != -1:
query = sentence[sentence.find('>')+1: sentence.rfind('<')]
additions = ''
updated_q = nltk.pos_tag(nltk.wordpunct_tokenize(query.lower()))
full_q = query
for word, pos in updated_q:
if word not in stopwords.words('english'):
looking_for = str(word)+'.'+str(get_wordnet_pos(pos))+'.01'
synsets = wn.synsets(word)
if looking_for in str(synsets):
new_words = (wn.synset(looking_for).lemma_names) #was .definition
for new_word in new_words:
if new_word.lower() != word.lower():
full_q = full_q +' '+ str(new_word)
else:
if wn.morphy(word) != None:
word = wn.morphy(word)
looking_for = str(word)+'.'+str(get_wordnet_pos(pos))+'.01'
print str(looking_for) + ' THIS IS WORD'
synsets = wn.synsets(word)
if looking_for in str(synsets):
new_words = (wn.synset(looking_for).lemma_names) #was .definition
for new_word in new_words:
if new_word.lower() != word.lower():
full_q = full_q +' '+ str(new_word)
print query + ' '+ full_q
开发者ID:britth,项目名称:inls890-microblog,代码行数:33,代码来源:wordnetExpansion.py
示例6: same_hypernym
def same_hypernym(fr):
"""
True if the two mentions have the same hypernym in WordNet.
In multiword mentions, considering only the last word (I'm assuming last word=head).
Not considering pronouns.
Most of the logic was borrowed from Julia's WN function in the coref project - thank you.
"""
try:
i_final=wn.morphy(re.sub(r"\W", r"",fr.i_token.split('_')[-1]))
j_final=wn.morphy(re.sub(r"\W", r"",fr.j_token.split('_')[-1]))
if i_final is None or j_final is None:
return "same_hypernym={}".format(False)
if _is_pronoun(i_final) or _is_pronoun(j_final):
return "same_hypernym={}".format(False)
i_synsets=wn.synsets(i_final)
j_synsets=wn.synsets(j_final)
for i_synset in i_synsets:
i_hypernym_set=set(i_synset.hypernyms())
for j_synset in j_synsets:
j_hypernym_set=set(j_synset.hypernyms())
if i_hypernym_set.intersection(j_hypernym_set):
return "same_hypernym={}".format(True)
return "same_hypernym={}".format(False)
except wn_error:
return "same_hypernym={}".format(False)
开发者ID:pkarmstr,项目名称:relation-extraction,代码行数:33,代码来源:feature_functions.py
示例7: _wnbase
def _wnbase(self):
if self.postag == 'n':
return wn.morphy(self.lemma, wn.NOUN)
elif self.postag == 'v':
return wn.morphy(self.lemma, wn.VERB)
elif self.postag == 'a':
return wn.morphy(self.lemma, wn.ADJ)
return None
开发者ID:aurora1625,项目名称:Recognizing-Textual-Entailment,代码行数:8,代码来源:part4.py
示例8: ApplyBNB
def ApplyBNB(doc_tokens, classes_postings, condprob, prior, vocabulary, selected_features):
## Assumes global dictionaries defined: stop_words, names, negation_words
global stop_words, names, negation_words
scores = dict()
for c in classes_postings:
scores[c] = 0 # math.log(prior[c])
negation_found = False
adverb_found = False
adverb_condprob = 0.0
doc_features = []
for t in doc_tokens:
t = t.lower()
if constants.LA and t in negation_words:
negation_found = True
continue
if t in stop_words:
continue
if t in names:
continue
isAdj = wn.morphy(t, wn.ADJ) is not None
isNoun = wn.morphy(t, wn.NOUN) is not None
isVerb = wn.morphy(t, wn.VERB) is not None
isAdv = wn.morphy(t, wn.ADV) is not None
if constants.LA and negation_found:
negation_found = False
continue
t = process_word(t)
if t not in vocabulary:
continue
if constants.FEATURE_SELECTION is not None and t not in selected_features[c]:
continue
doc_features.append(t)
vocab = vocabulary
if constants.FEATURE_SELECTION is not None:
vocab = selected_features[c]
for t in vocabulary:
if t in doc_features:
scores[c] += math.log(condprob[t][c])
else:
scores[c] += math.log(1.0 - condprob[t][c])
diff = math.fabs(scores["0"] - scores["1"])
return (scores, diff)
开发者ID:aimannajjar,项目名称:columbiau-set-movie-review-classifier,代码行数:55,代码来源:algorithm.py
示例9: getRoot
def getRoot(w, tag=False):
if tag == False:
for tag in ['v', 'n', 'a', 'r']:
r = wordnet.morphy(w, tagequiv(tag))
if r:
return r
return w
try:
return wordnet.morphy(w, tag)
except:
return w
开发者ID:AllanRamsay,项目名称:COMP34411,代码行数:11,代码来源:te.py
示例10: get_synonyms_as_set
def get_synonyms_as_set(input_word):
if input_word is None:
return set()
synonyms = set()
synSets = wn.synsets(input_word)
for syn in synSets:
for lemma_name in syn.lemma_names():
if wn.morphy(lemma_name) is not None:
synonyms.add(str(wn.morphy(lemma_name).encode('utf-8').decode('ascii','ignore')))
return synonyms
开发者ID:azariaa,项目名称:ENC,代码行数:11,代码来源:helperFunctions.py
示例11: getGroup
def getGroup(count, word, threshold, wordsSeen, groups):
word = "".join(l for l in word if l not in string.punctuation)
best = 0
group = word
#searchForExisting
if(wordsSeen.has_key(word)):
return wordsSeen.get(word)
#get synset of word
if(wn.synsets(word)):
wordSyn = wn.synsets(word)[0]
elif(wn.morphy(word)):
wordSyn = wn.morphy(word)[0]
else:
#no synset; use word
wordsSeen.update({word: group})
if(groups.has_key(group)):
newValue = groups.get(group)
newValue.update([word])
groups.update({group: newValue})
else:
newValue = set()
newValue.update([word])
groups.update({group: newValue})
wordsSeen.update({word: group})
return word
#compare to each group
# is there a way to compare one word to many words?
for super_word in count.keys():
#get synset of group being tested against
comparisons = groups.get(super_word)
sim = nSim(wordSyn, comparisons)
if(sim >= threshold and sim > best):
group = super_word
best = sim
wordsSeen.update({word: group})
if(groups.has_key(group)):
newValue = groups.get(group)
newValue.update([word])
groups.update({group: newValue})
else:
newValue = set()
newValue.update([word])
groups.update({group: newValue})
wordsSeen.update({word: group})
return group
开发者ID:EmilyVanHorn,项目名称:Good-Turing,代码行数:52,代码来源:GoodTuringEdit.py
示例12: chunktaged
def chunktaged(tokens, tagged, word):
'''
Extract the meaningful chunk (phrase) from the sentence.
Also can be imagined as a phrase detection.
PARAMETER LIST:
tokens is a list of the words in the sentence:
['I', 'previously', 'booked', 'the', 'nice', 'flight', '.']
tagged is a list of tuples consisting of word and POS:
[('I', 'PRP'), ('previously', 'RB'), ('booked', 'VBD'), ('the', 'DT'), ('nice', 'JJ'), ('flight', 'NN'), ('.', '.')]
word is what we look up for:
'booked'
The return value should be a phrase like 'turn_on' or just the origin word.
# the rules as our knowledge:
# 1, consecutive nouns
# 2, verb before a preposition
'''
word_index = tokens.index(word)
if (pos_map.has_key(tagged[word_index][1])):
word_pos = pos_map[tagged[word_index][1]]
else:
return word
if (word_pos == 'VERB' and (wn.morphy(word, wn.VERB) != None)):
word = wn.morphy(word, wn.VERB)
elif (word_pos == 'NOUN' and (wn.morphy(word, wn.NOUN) != None)):
word = wn.morphy(word, wn.NOUN)
if word_index == len(tokens) - 1:
return word
if (pos_map.has_key(tagged[word_index + 1][1])):
next_word_pos = pos_map[tagged[word_index + 1][1]]
else:
return word
if (word_pos == 'VERB' and next_word_pos == 'PP') or \
(word_pos == 'NOUN' and next_word_pos == 'NOUN'):
possible_chunk = word + '_' + tokens[word_index+1]
# in case the consecutive Noun is not a phrase
if wn.synsets(possible_chunk) == []:
return word
else:
return possible_chunk
else:
return word
开发者ID:ouyangz,项目名称:SchoolProjects,代码行数:50,代码来源:chunk_ver2.py
示例13: get_roots
def get_roots(sentence):
roots = []
for idx, token in enumerate(sentence.clean_tokens):
if sentence.tokens_pos[idx] == "VB":
root = wn.morphy(token, wn.VERB)
else:
root = wn.morphy(token)
if root is None:
root = token
roots.append(root)
return roots
开发者ID:djrenren,项目名称:nlpQ-A,代码行数:14,代码来源:davila_features.py
示例14: main
def main():
punIn = raw_input("Pun File: ") # get it it's a pun on "punning" hah hah
f = open(punIn, "r")
for line in f:
posList = POSCheck(line) # returns a list of words that stood out in the POS tagging
hList = homophoneCheck(line) # returns a list of homophones, along with the original word from the sentence
print (posList)
print (hList)
extText = POSextract(line) # returns a list with all of the important words extracted
print (extText)
hiscore = 0
highSim = []
for word in extText:
for i in range(0, len(hList)):
hSim = conceptCheck(word, hList[i])
if hSim == []:
continue
elif hSim[2] > hiscore:
highSim = hSim
hiscore = highSim[2]
for a in range(0, len(hList)):
mword = wn.morphy(word)
if mword:
hMorphSim = conceptCheck(mword, hList[a])
if hMorphSim == []:
continue
elif hMorphSim[2] > hiscore:
highSim = hMorphSim
hiscore = highSim[2]
else:
break
for j in range(0, len(posList)):
pSim = conceptCheck(word, posList[j])
if pSim == []:
continue
elif pSim[2] > hiscore:
highSim = pSim
hiscore = highSim[2]
for b in range(0, len(posList)):
mword = wn.morphy(word)
if mword:
pMorphSim = conceptCheck(mword, posList[b])
if pMorphSim == []:
continue
elif pMorphSim[2] > hiscore:
highSim = pMorphSim
hiscore = highSim[2]
else:
break
print (highSim)
开发者ID:jxjzhang,项目名称:Punnography,代码行数:50,代码来源:punRecog.py
示例15: get_antonyms_as_set
def get_antonyms_as_set(input_word):
if input_word is None:
return set()
antonyms = set()
synonyms = wn.synsets(input_word)
for syn in synonyms:
lemmas = syn.lemmas()
for lem in lemmas:
for ant in lem.antonyms():
if wn.morphy(ant.name()) is not None:
antonyms.add(str(wn.morphy(ant.name()).encode('utf-8').decode('ascii', 'ignore')))
return antonyms
开发者ID:azariaa,项目名称:ENC,代码行数:15,代码来源:helperFunctions.py
示例16: simple_pos_morphy
def simple_pos_morphy(self, word):
"""
Helper funcion for simpletextprocess function. It doesn't process
the PoS tagging in the first place.
@param word: the raw word before morph
@type word: C{string}
"""
morphied = wn.morphy(word, wn.NOUN)
if morphied != None:
return morphied
else:
morphied = wn.morphy(word, wn.VERB)
if morphied != None:
return morphied
return word
开发者ID:arnaudsj,项目名称:AINews,代码行数:15,代码来源:AINewsTextProcessor.py
示例17: find_candidates
def find_candidates(self, property_subtree):
if not isinstance(property_subtree, ParentedTree):
raise AttributeError
candidates = set(self.__get_property_string_forms(property_subtree))
new_candidates = set()
for candidate in candidates:
for label in self.__fetch_from_wikibase(candidate):
new_candidates.add(label)
candidates.update(new_candidates)
new_candidates = set()
for candidate in candidates:
new_candidates.update(self.__fetch_synonyms_and_hypernyms(candidate))
candidates.update(new_candidates)
new_candidates = set()
for candidate in candidates:
for POS in [wordnet.ADJ, wordnet.ADV, wordnet.NOUN, wordnet.VERB]:
morphy = wordnet.morphy(candidate, POS)
if morphy is not None:
new_candidates.add(morphy)
candidates.update(new_candidates)
return candidates
开发者ID:EIFSDB,项目名称:search-engine,代码行数:26,代码来源:property_finder.py
示例18: nominalise
def nominalise(verb):
# to be used when turning verbs to noun forms when extracting simple co-occ.
stem = STP.stem(verb)
# going with pure stem
for suff in SUFFIXES:
noun_form = wn.morphy(stem+suff,wn.NOUN)
if noun_form != None:
return noun_form
# trying with the last character of the stem doubled
stem += stem[-1]
for suff in SUFFIXES:
noun_form = wn.morphy(stem+suff,wn.NOUN)
if noun_form != None:
return noun_form
# returning None if nothing works
return None
开发者ID:GullyAPCBurns,项目名称:SKIMMR,代码行数:16,代码来源:util.py
示例19: __load_lesk_vector
def __load_lesk_vector(self, dicty, window_size=100):
# print example.word, example.pos, example.target
# print example.senses
# Generate WordSets of surrounding words
other_sets = []
words = self.words_window(window_size)
for word, pos in words:
# print word, pos
baseword = wn.morphy(word)
if baseword is not None:
pos = penn_to_wn(pos)
synsets = wn.synsets(baseword, pos=pos) if pos is not None else wn.synsets(baseword)
for synset in synsets:
other_sets.append(WordSet(synset.definition))
# for sety in other_sets:
# print sety.words
# Loop through possible wordsets and note counts:
counts = []
for sense in self.all_senses(dicty):
curr_set = WordSet(sense.gloss)
# print curr_set.words
counts.append(curr_set.overlap(other_sets))
# print counts
# Normalize and return
countfirsts = [count[0] for count in counts]
countfirsts_max = max(countfirsts)
if countfirsts_max > 0:
return [float(count)/countfirsts_max for count in countfirsts]
else:
return [0.0 for count in countfirsts]
开发者ID:jcccf,项目名称:cs4740,代码行数:34,代码来源:Parser.py
示例20: statement_stemmer
def statement_stemmer(stmt):
"""
str -> [list of strs]
Return list of stemmed words from a single statement string.
"""
stmt_stems = []
#only stem statements with strings (used in statement_corpus() below)
if type(stmt) is float:
return
#stmt_text_str = rev_text.encode('utf-8') ##deprecated.
stmt_text_str = stmt.lower()
sentence_list = nltk.sent_tokenize(stmt_text_str)
for sent in sentence_list:
word_list = nltk.word_tokenize(sent)
for word in word_list: #compare with WordNet Corpus
wn_word = wordnet.morphy(word)
if wn_word is not None:
wn_stem = PorterStemmer().stem_word(wn_word)
stmt_stems.append(wn_stem)
return stmt_stems
开发者ID:BradAJ,项目名称:Kaggle-SeeClickPredictFix,代码行数:26,代码来源:seeclickfix.py
注:本文中的nltk.corpus.wordnet.morphy函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论