本文整理汇总了Python中nltk.corpus.wordnet.synset函数的典型用法代码示例。如果您正苦于以下问题:Python synset函数的具体用法?Python synset怎么用?Python synset使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了synset函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: process_verb
def process_verb(verb):
verb = verb[:-1] # Remove newline char
with open('youtube_setof_verbs.txt') as f:
verb_dict = f.read()
verb_dict = verb_dict.split('\n')
max_score = 0
finl_verb = (verb, '<>')
verb_list = re.findall('[A-Z][^A-Z]*', verb)
for prob_verb in verb_list:
if prob_verb[len(prob_verb)-3:] == 'ing':
prob_verb = prob_verb[:-3] # Remove 'ing' from verb
if prob_verb.lower() == 'cutt':
prob_verb = 'cut'
if wn.synsets(prob_verb):
try:
v1 = wn.synset(prob_verb + '.v.01')
for yout_verb in verb_dict:
if yout_verb != '':
# if wn.synsets(yout_verb):
v2 = wn.synset(yout_verb + '.v.01')
score = v1.wup_similarity(v2)
if score > max_score:
finl_verb = (prob_verb, yout_verb)
max_score = score
except:
finl_verb = (prob_verb, '<>')
pass
# print finl_verb, max_score
return finl_verb[1]
开发者ID:sumitb,项目名称:YouTube2Action,代码行数:32,代码来源:word.py
示例2: process_subj
def process_subj(subj, flag):
if flag == 1:
with open('youtube_setof_subjects.txt') as f:
subj_dict = f.read()
subj_dict = subj_dict.split('\n')
elif flag == 2:
with open('youtube_setof_objects.txt') as f:
obj_dict = f.read()
subj_dict = obj_dict.split('\n')
max_score = 0
finl_subj = (subj, '<>')
subj_list = subj.split(',')
if len(subj_list) == 1:
return subj
for prob_subj in subj_list:
prob_subj = prob_subj.strip()
if wn.synsets(prob_subj):
try:
v1 = wn.synset(prob_subj + '.n.01')
for yout_subj in subj_dict:
if yout_subj != '':
v2 = wn.synset(yout_subj + '.n.01')
score = v1.wup_similarity(v2)
if score > max_score:
finl_subj = (prob_subj, yout_subj)
max_score = score
except:
finl_subj = (prob_subj, '<>')
pass
# print finl_verb, max_score
return (finl_subj[1])
开发者ID:sumitb,项目名称:YouTube2Action,代码行数:34,代码来源:word.py
示例3: preprocess_docs
def preprocess_docs():
stopwords = nltk.corpus.stopwords.words('english')
corpus = list(filtered_corpus())
counter = 0
for train, topic, title, text in corpus:
if counter % 10 == 0:
print "%.2f %%\r" % (counter * 100.0 / len(corpus),),
sys.stdout.flush()
counter += 1
text = [i for i in nltk.word_tokenize(title) if i.lower() not in stopwords]
buf = []
for word in text:
synsets = wn.synsets(word)
grain = []
wheat = []
for s in synsets:
grain.append(s.path_similarity(wn.synset('wheat.n.02')))
wheat.append(s.path_similarity(wn.synset('grain.n.08')))
grain = [i for i in grain if i is not None]
wheat = [i for i in wheat if i is not None]
if len(grain) == 0:
grain = 0
else:
grain = sum(grain) * 1.0 / len(grain)
if len(wheat) == 0:
wheat = 0
else:
wheat = sum(wheat) * 1.0 / len(wheat)
buf.append((word, grain, wheat))
yield train, topic, buf
print ""
开发者ID:Sentimentron,项目名称:CS909-Excercise8,代码行数:33,代码来源:pre713.py
示例4: get_score
def get_score(tags, groups):
sscore = 0
scount = 0
illegal_word = 0
if (tags != None ) :
for g in groups:
for x in k.tags:
try :
#print str(x.text),
#check substring else calculate words similarity score
if g in str(x.text).lower():
sscore += 2.0
scount += 1
else:
tag = wn.synset(str(x.text).lower()+'.n.01')
group = wn.synset(g+ '.n.01')
sem = wn.path_similarity(group,tag)
if sem >= 0.3 :
sscore += sem
scount += 1
except:
illegal_word += 1
if scount != 0 :
return sscore/scount
else :
return 0
开发者ID:tushar19,项目名称:Web-Image-Ranking-Retrieval,代码行数:28,代码来源:imgsearch.py
示例5: getSenseSimilarity
def getSenseSimilarity(worda,wordb):
"""
find similarity betwwn word senses of two words
"""
wordasynsets = wn.synsets(worda)
wordbsynsets = wn.synsets(wordb)
synsetnamea = [wn.synset(str(syns.name)) for syns in wordasynsets]
synsetnameb = [wn.synset(str(syns.name)) for syns in wordbsynsets]
for sseta, ssetb in [(sseta,ssetb) for sseta in synsetnamea for ssetb in synsetnameb]:
pathsim = sseta.path_similarity(ssetb)
wupsim = sseta.wup_similarity(ssetb)
if pathsim != None:
print "Path Sim Score: ",pathsim," WUP Sim Score: ",wupsim,"\t",sseta.definition, "\t", ssetb.definition
开发者ID:dxd132630,项目名称:NeoPythonic,代码行数:27,代码来源:similarity.py
示例6: probability
def probability(tokens, category, dictionary, total):
if category == "sense":
total_score = 0
dic = dictionary
if len(tokens) == 0:
return 0
for token in tokens:
for dict_sense in dic:
score = wn.synset(token).path_similarity(wn.synset(dict_sense))
if score is not None:
total_score += score * dic[dict_sense]
return (total_score/len(tokens))
else:
p = 0
dic = dictionary
total_instances = total
for token in tokens:
if token in dic:
token_prob = dic[token]
else:
token_prob = 0
# smooth one out
curr = token_prob/float(total_instances)
p += curr
return p
开发者ID:aiqiliu,项目名称:AskReddit-analytics,代码行数:26,代码来源:titleAnalysis.py
示例7: get_similar_words
def get_similar_words(word):
lemmas_noun = hypernyms_noun = lemmas_verb = hypernyms_verb =[]
try:
lemmas_noun = [str(lemma.name()) for lemma in wn.synset(word + '.n.01').lemmas()]
except WordNetError:
pass
try:
hypernyms_noun = [str(lemma.name()).split('.')[0] for lemma in wn.synset(word + '.n.01').hypernyms()]
except WordNetError:
pass
if len(lemmas_noun) == 0 and len(hypernyms_noun) == 0:
"""
Only try verbs if there are no similar nouns
"""
try:
lemmas_verb = [str(lemma.name()) for lemma in wn.synset(word + '.v.01').lemmas()]
except WordNetError:
pass
try:
hypernyms_verb = [str(lemma.name()).split('.')[0] for lemma in wn.synset(word + '.v.01').hypernyms()]
except WordNetError:
pass
similar_words = lemmas_noun + hypernyms_noun + lemmas_verb + hypernyms_verb
# filter words which are not purely alphabets (there will be words with underscore)
# this is because if we want to process such words like "domestic_animal", we have to
# implement 2-grams search which is not done here
pattern = re.compile('^[a-zA-Z]+$')
return filter(lambda x: pattern.match(x) and x != word, similar_words)
开发者ID:seowyanyi,项目名称:cs3245-4,代码行数:32,代码来源:helper.py
示例8: expand_queries
def expand_queries(file):
'''
For each term in a query, takes the first synset of the word from wordnet and adds all synonyms of that synset
'''
file = open(file)
for sentence in file:
sentence = sentence.strip()
if sentence.find('<text>') != -1:
query = sentence[sentence.find('>')+1: sentence.rfind('<')]
additions = ''
updated_q = nltk.pos_tag(nltk.wordpunct_tokenize(query.lower()))
full_q = query
for word, pos in updated_q:
if word not in stopwords.words('english'):
looking_for = str(word)+'.'+str(get_wordnet_pos(pos))+'.01'
synsets = wn.synsets(word)
if looking_for in str(synsets):
new_words = (wn.synset(looking_for).lemma_names) #was .definition
for new_word in new_words:
if new_word.lower() != word.lower():
full_q = full_q +' '+ str(new_word)
else:
if wn.morphy(word) != None:
word = wn.morphy(word)
looking_for = str(word)+'.'+str(get_wordnet_pos(pos))+'.01'
print str(looking_for) + ' THIS IS WORD'
synsets = wn.synsets(word)
if looking_for in str(synsets):
new_words = (wn.synset(looking_for).lemma_names) #was .definition
for new_word in new_words:
if new_word.lower() != word.lower():
full_q = full_q +' '+ str(new_word)
print query + ' '+ full_q
开发者ID:britth,项目名称:inls890-microblog,代码行数:33,代码来源:wordnetExpansion.py
示例9: get_similarity
def get_similarity(self,word1,word2):
'''计算相似度:基于WordNet语义词典'''
'''
print 'before stemmed:',word1
print 'after stemmed:',wn.morphy(word1.lower())
print 'before stemmed:',word2
print 'after stemmed:',wn.morphy(word2.lower())
'''
#stemmed word
if wn.morphy(word1.lower()) != None :
word1 = wn.morphy(word1.lower())
if wn.morphy(word2.lower()) != None :
word2 = wn.morphy(word2.lower())
word1_synsets = wn.synsets(word1)
#print word1_synsets
word2_synsets = wn.synsets(word2)
#print word2_synsets
sim = 0
for syn1 in word1_synsets:
w1 = wn.synset(syn1.name())
for syn2 in word2_synsets:
w2 = wn.synset(syn2.name())
tmp = w1.path_similarity(w2)
#print tmp,syn1.name(),syn2.name()
if tmp > sim:
sim = tmp
return sim
开发者ID:cnspica,项目名称:ASExtractor,代码行数:28,代码来源:EnKeywordExtraction.py
示例10: print_other_lexical_rel
def print_other_lexical_rel():
good1 = wn.synset('good.a.01')
wn.lemmas('good')
print("Antonyms of 'good': " + str(good1.lemmas()[0].antonyms()))
print("")
print("Entailment of 'walk': " + str(wn.synset('walk.v.01').entailments()))
print("")
开发者ID:anirudhcoder,项目名称:Natural-Language-Processing,代码行数:7,代码来源:hw2-part2-wordnet-examples.py
示例11: overlapCount
def overlapCount(self, sentence):
#set count to be one so we can guess in case there are no sentences with overlap
count = 1
sWiki = TextBlob(self.arrayToString(sentence))
sVerbs = self.getVerbs(sWiki)
#compare verbs for similarities and based on wordnet's similarity score
#if they're exactly the same, they'll score 1
for sverb in sVerbs:
synv = wn.synset(sverb + '.v.01')
for qverb in self.questionVerbs:
synq = wn.synset(qverb + '.v.01')
count += synv.path_similarity(synq)
#remove stop words from sentence AFTER we've gotten POS tags
s = self.removeStopWords(sentence)
sLower = self.removeStopWords(sentence.lower())
for word in self.qList:
if word in s:
count += 1
else:
if word.lower() in sLower:
count += 0.1
return count
开发者ID:FlyingGroundhogs,项目名称:QASystem,代码行数:26,代码来源:VOverlap.py
示例12: compare
def compare(self, word1, word2):
tmp1 = wn.synsets(word1)[0].name
tmp2 = wn.synsets(word2)[0].name
w1 = wn.synset(tmp1)
w2 = wn.synset(tmp2)
val = w1.wup_similarity(w2)
return val
开发者ID:danjamker,项目名称:N-Fly,代码行数:7,代码来源:WordNet.py
示例13: is_ingredient
def is_ingredient(word):
"""
Return True if the word is an ingredient, False otherwise.
>>> is_ingredient('milk')
True
>>> is_ingredient('blackberries')
True
>>> is_ingredient('Canada')
False
>>> is_ingredient('breakfast')
False
>>> is_ingredient('dish')
False
"""
reject_synsets = ['meal.n.01', 'meal.n.02', 'dish.n.02', 'vitamin.n.01']
reject_synsets = set(wordnet.synset(w) for w in reject_synsets)
accept_synsets = ['food.n.01', 'food.n.02']
accept_synsets = set(wordnet.synset(w) for w in accept_synsets)
for word_synset in wordnet.synsets(word, wordnet.NOUN):
all_synsets = set(word_synset.closure(lambda s: s.hypernyms()))
all_synsets.add(word_synset)
for synset in reject_synsets:
if synset in all_synsets:
return False
for synset in accept_synsets:
if synset in all_synsets:
return True
return word in wordlists.ingredients
开发者ID:JoshRosen,项目名称:cmps140_creative_cooking_assistant,代码行数:29,代码来源:ingredients.py
示例14: ontoList
def ontoList(self, synset):
# things to pick from
if self.pos == 'v':
ln = wn.synset(synset).lexname.split('.')[1]
hyper = self.lemmatize(self.getHypernyms(synset))
definition = self.getDefinition(synset)
lemmas = self.lemmatize(self.getLemmas(synset))
examples = self.getExamples(synset)
strings = [string.replace("_", " ") for string in self.getFrameStrings(synset)]
hypo = self.lemmatize(self.getHyponyms(synset))
ontologyList = [strings, ln, lemmas, examples, hypo, definition, hyper]
else:
ln = wn.synset(synset).lexname.split('.')[1]
hyper = self.lemmatize(self.getHypernyms(synset))
definition = self.getDefinition(synset)
lemmas = self.lemmatize(self.getLemmas(synset))
examples = self.getExamples(synset)
hypo = self.lemmatize(self.getHyponyms(synset))
ontologyList = [ln, lemmas, examples, hypo, definition, hyper]
returnList = list()
for o in ontologyList:
if o:
returnList.append(o)
return returnList
开发者ID:aankit,项目名称:centrality,代码行数:25,代码来源:wnQuery_dev.py
示例15: calculate_and_write_edge_weigthings_for_synsets
def calculate_and_write_edge_weigthings_for_synsets(synset_filenames_dict, file_name):
max_co_occurrence = calculate_max_co_occurrence(synset_filenames_dict)
edge_weigthings_for_synsets = dict()
how_many_added = 0
how_many_done = 0
how_many_to_do = len(synset_filenames_dict.keys()) * (len(synset_filenames_dict.keys())-1)
write_edge_weightings_to_file(dict(), file_name)
for synset1, filenames1 in synset_filenames_dict.iteritems():
for synset2, filenames2 in synset_filenames_dict.iteritems():
if synset1 < synset2:
how_many_done += 1
#if (synset1.name, synset2.name) not in similarity_histogram:
similarity = wn.synset(synset1).lch_similarity(wn.synset(synset2))
co_occurence = len(set(synset_filenames_dict[synset1]).intersection(set(synset_filenames_dict[synset2])))
normalized_co_occurrence = co_occurence/max_co_occurrence
if similarity < 2.0:
similarity = 0
if normalized_co_occurrence < 0.4:
normalized_co_occurrence = 0
edge_weighting = similarity + 4*normalized_co_occurrence
if edge_weighting != 0:
edge_weigthings_for_synsets[(synset1, synset2)] = edge_weighting
how_many_added += 1
if how_many_added > 1000:
print_status("Done with " + str(how_many_done) + " von " + str(how_many_to_do) + "\n")
write_edge_weightings_to_file(edge_weigthings_for_synsets, file_name, append_to_file=True)
edge_weigthings_for_synsets = dict()
how_many_added = 0
write_edge_weightings_to_file(edge_weigthings_for_synsets, file_name, append_to_file=True)
开发者ID:nicolas-fricke,项目名称:semmul2013-group1,代码行数:30,代码来源:mcl_keyword_clustering.py
示例16: get_message
def get_message(message_parser):
message_split = message_parser.split("|")
mobile_number = message_split[0]
need_synonyms = ["require", "want", "motivation", "motive", "ask", "call for", "demand", "involve", "necessitate", "need", "postulate", "take", "indigence", "pauperism", "pauperization", "penury"]
supply_synonyms = ["issue", "furnish", "provide", "render", "add", "append", "cater", "ply", "provision", "supplying", "afford", "yield", "commit", "consecrate", "dedicate", "devote", "spring", "springiness", "impart", "leave", "pass on", "ease up", "give way", "move over", "render", "feed", "generate", "return", "throw", "chip in", "contribute", "kick in", "grant", "pay", "break", "cave in", "collapse", "fall in", "founder", "hand", "pass", "reach", "turn over", "have", "hold", "make", "establish", "open", "apply", "gift", "present", "sacrifice"]
tokens = nltk.word_tokenize(message_split[1])
need = len(set(tokens) & set(need_synonyms)) > 0
need_json = {"need": True} if need else {"supply": True}
need_json.update({"number": mobile_number})
tagged_tokens = nltk.pos_tag(tokens)
for i in range(len(tagged_tokens)):
if tagged_tokens[i][1] == 'CD':
current_count = get_integer(tagged_tokens[i][0])
elif tagged_tokens[i][1] == 'DT':
current_count = 1
elif tagged_tokens[i][1] in ['NNS','NN']:
if tagged_tokens[i][0] in ["cups", "cup", "packets","packet","bottle", "bottles", "bundle","bundles","packages", "package", need_synonyms, supply_synonyms]:
continue
current_category = tagged_tokens[i][0]
c = wn.synsets(current_category)
food = wn.synset('food.n.01')
water = wn.synset('water.n.01')
food = food.wup_similarity(c[0])
water = water.wup_similarity(c[0])
current_category = "food" if food > water else "water"
print current_count
try :
current_count = current_count
except NameError:
current_count =1
if current_count == None:
current_count =1
need_json.update({current_category: current_count})
current_count = None
return need_json
开发者ID:iamsiva11,项目名称:Disaster-whatsapp-bot,代码行数:35,代码来源:message_parser.py
示例17: define
def define(word, Webster, bestdef, changed, old_topic, new_topic):
"""Defines a word, if desired by the user, and if the topic has changed."""
import answer
if ((Webster != "") and (not changed)): return (False, Webster)
if (Webster == ""):
answer.write("The word " + word + " was not defined under the topic " + old_topic + ".")
else:
asked = ask.getPlay("The word " + word + " was defined under the topic " + old_topic + " as " + Webster + ".\nDo you want this meaning to carry over to the new topic " + new_topic + "? ")
if yes(asked):
return (False, Webster)
undone = True
dno = 1
while (undone):
if (dno == bestdef): dno += 1
string = word + ".n." + str(dno)
try:
if (dno < len(wordnet.synsets(word, pos = wordnet.NOUN))):
asked = ask.getPlay("Does " + wordnet.synset(string).definition + " work for your usage of " + word + "? ")
undone = not yes(ask)
newdef = wordnet.synset(string).definition
dno += 1
else:
newdef = ask.getPlay("Then how would you define " + word + "? ")
undone = False
except(Exception):
newdef = ask.getPlay("How would you define " + word + "? ")
undone = False
return (True, newdef)
开发者ID:jjs0sbw,项目名称:revelator,代码行数:28,代码来源:define.py
示例18: similarity
def similarity(word1, word2, tag):
obj1 = wn.synset(word1 + "."+ tag+".01")
obj2 = wn.synset(word2 + "."+ tag+".01")
#print(obj1)
brown_ic = wordnet_ic.ic('ic-brown.dat') # Information content
semcor_ic = wordnet_ic.ic('ic-brown.dat')
value = obj1.res_similarity(obj2, brown_ic)
return value
开发者ID:Lightyagami1,项目名称:exploratoryProject,代码行数:8,代码来源:project.py
示例19: wsd
def wsd(self,sent,target, tag=None):
if tag is None:
self.scoring(sent, target)
else:
self.scoring(sent, target,tag)
sense = self.getGreedyBestSenses(10)
print wordnet.synset(sense).definition
return sense
开发者ID:blodstone,项目名称:CCS590v2,代码行数:8,代码来源:AdaptedLesk.py
示例20: get_relative_similarity
def get_relative_similarity(a,b):
'''
Returns path similarity between two word a and b.
Used for merging two clusters
'''
x=wn.synset("%s.n.01"%a)
y=wn.synset("%s.n.01"%b)
return x.path_similarity(y)
开发者ID:ParinSanghavi,项目名称:Mining-Quality-Parameters-from-Yelp-Reviews-for-Improving-Businesses,代码行数:8,代码来源:cluster_yelp.py
注:本文中的nltk.corpus.wordnet.synset函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论