本文整理汇总了Python中nltk.corpus.wordnet.synsets函数的典型用法代码示例。如果您正苦于以下问题:Python synsets函数的具体用法?Python synsets怎么用?Python synsets使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了synsets函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: findSimilarity
def findSimilarity(self):
#As we recommend only one item first item of this list will be recommended item
#Second item can be list of items
'''So what we try to do is get exact synset of first item and get 10 synsets (to reduce computation costs) of second list of items over
which the first item was preferred/recommended'''
recommendation = wn.synsets(self.recoItems[0]) # @UndefinedVariable
recommendationFiltered = []
for eachSyn in recommendation:
if self.recoItems[0] in str(eachSyn):
recommendationFiltered.append(eachSyn)
choices = {}
for eachItem in self.recoItems[1]:
choices[eachItem] = wn.synsets(eachItem)[:10] # @UndefinedVariable getting only 10 items
choiceScores = {}
for key, value in choices.iteritems():
choiceScores[key] = []
for eachValue in choices[key]:
for eachRecoSyn in recommendationFiltered:
choiceScores[key].append(eachRecoSyn.path_similarity(eachValue))
maxChoiceScores = {}
for eachKey in choiceScores.keys():
maxChoiceScores[eachKey] = max(choiceScores[eachKey])
return maxChoiceScores
开发者ID:malughanshyam,项目名称:B552_Knowledge_Based_AI_Project,代码行数:29,代码来源:SimilarityFinder.py
示例2: parseLyrics2
def parseLyrics2(outlist):
bandLyricInfo = {}
master = [['death', 0],['violence',0],['sacrifice',0],['nature',0],['peace',0],['storm',0],['spirit',0],[ 'dark',0],['scream',0],['pain',0],['blood',0],['flesh',0],['love',0],['greed',0],['poison',0],['anger',0],['revenge',0],['misery',0],['hell',0],['heaven',0],['hate',0],['soul',0],['battle',0],['ghost',0],['joy',0],['light',0],['omen',0],['miracle',0],['magic',0],['universe',0],['disease',0],['god',0],['satan',0],['struggle',0],['heart',0]]
for key in outlist:
templist = copy.deepcopy(master) ;
#key = 'Queensryche'
raw = outlist[key];
raw = raw.lower();
words = re.findall(r'\w+', raw,flags = re.UNICODE | re.LOCALE) # punctuation
imp_words = filter(lambda x: x not in stopwords.words('english'), words) # filter noise
lmt = WordNetLemmatizer()
words_new = [lmt.lemmatize(x) for x in words]
dw = list(set(words_new))
for word in dw:
for m in templist:
p1 = wordnet.synsets(word) ;
p2 = wordnet.synsets(m[0]) ;
if(len(p1) >0 and len(p2) >0):
c = p1[0].wup_similarity(p2[0])
if(c > m[1]):
m[1] = c
# sort words according to similarity
tnew = sorted(templist,key=lambda val:val[1],reverse=True) [0:10] ;
# remove the other column
for l in tnew:
del l[1]
print 'Done ',key
#break ;
bandLyricInfo[key] = tnew
#del templist
return bandLyricInfo
开发者ID:sam8401,项目名称:pyMusicRecommender,代码行数:32,代码来源:mainScript.py
示例3: relation
def relation(a,b) :
''' Given two words(strings) returns a number that denotes relation between
the two words.
Parameters
----------
a : string
b : string
Returns
-------
float
relation (less than 1) between two strings
Notes
-----
First it applies BFS on the nltk wordnet and finds the least distance between
the two given words. If the distance is x the function returns 1/(x+1), else return 0.
'''
a = wn.synsets(a)
b = wn.synsets(b)
visited_a = set([])
visited_b = set([])
stemmed_a = set([])
stemmed_b = set([])
depth = 0
while True:
if depth > 2:
return 0
new_a = set([])
depth += 1
for syn in a:
if stemmer.stem(syn.lemma_names[0]) in stemmed_b:
return 1.0/depth
if syn in visited_a:
continue
visited_a.add(syn)
stemmed_a.add(stemmer.stem(syn.lemma_names[0]))
hyp = set(syn.hyponyms())
for lemma in syn.lemma_names:
None
hyp |= set(wn.synsets(lemma))
new_a |= hyp
a = new_a
new_b = set([])
depth += 1
for syn in b:
if stemmer.stem(syn.lemma_names[0]) in stemmed_a:
return 1.0/depth
if syn in visited_b:
continue
visited_b.add(syn)
stemmed_b.add(stemmer.stem(syn.lemma_names[0]))
hyp = set(syn.hyponyms())
for lemma in syn.lemma_names:
None
hyp |= set(wn.synsets(lemma))
new_b |= hyp
b = new_b
开发者ID:jamesosullivan,项目名称:Sentiment_Analysis,代码行数:60,代码来源:sentiment_analysis.py
示例4: tell
def tell(para1,para2):
#Strip anything but not alphanum
para1=re.sub(r'[^\w ]+', '', para1)
para2=re.sub(r'[^\w ]+', '', para2)
para1=para1.lower().split()
para2=para2.lower().split()
if para1==[] or para2==[]:
return 0
if not filter(lambda t:t.lower() not in stopwords, para1) == []:
para1=filter(lambda t:t.lower() not in stopwords, para1)
if not filter(lambda t:t.lower() not in stopwords, para2) == []:
para2=filter(lambda t:t.lower() not in stopwords, para2)
score=len(set(para1).intersection(para2))
score_1=float(score)/math.sqrt(len(para2)*len(para1))
para1_with_dictionary=reduce(lambda x,y:x+y, map(lambda word:[l.name for s in wordnet.synsets(word) for l in s.lemmas],para1))
para1_with_dictionary=map(lambda ele:ele.lower(), para1_with_dictionary)
#^^ Returns duplicated elements as well. So we need to remove the duplicates. Converting into set does that
para2_with_dictionary=reduce(lambda x,y:x+y, map(lambda word:[l.name for s in wordnet.synsets(word) for l in s.lemmas],para2))
para2_with_dictionary=map(lambda ele:ele.lower(), para2_with_dictionary)
#^^ Returns duplicated elements as well. So we need to remove the duplicates. While taking intersection the same is handled
score1=len(set(para1_with_dictionary).intersection(para2))
score2=len(set(para2_with_dictionary).intersection(para1))
score_2=float(max(score1,score2))/min(len(para2),len(para1))
score=(score_1+score_2)/2
return score
开发者ID:sanjitsbatra,项目名称:Sentiment_Mining_Twitter,代码行数:35,代码来源:similarity.py
示例5: parse_file
def parse_file(f):
for l in f.readlines():
word = l.strip()
synsets = wn.synsets(word)
if word in synonym_values:
continue
# get first order synonyms
synonyms = set()
for synset in synsets:
synonyms = set(synonyms) | set(synset.lemma_names)
# add in synonyms of those synonyms
for syn in synonyms:
for syn_synset in wn.synsets(syn):
synonyms = set(synonyms) | set(syn_synset.lemma_names)
synonyms_with_values = set(synonyms) & set(synonym_values.keys())
if not len(synonyms_with_values):
continue
avg = 0
total = 0
for syn in synonyms_with_values:
value = synonym_values[syn]
avg = (avg * total + float(value)) / (total + 1)
total += 1
# print "Adding", word, avg
synonym_values[word] = int(abs_ceil(avg))
f.close()
开发者ID:tomconroy,项目名称:extend-afinn,代码行数:34,代码来源:compile-sentiment.py
示例6: scoreFile
def scoreFile(filename, targetWords, verbose=False):
meanScore = 0.0
baseWordCount = 0
wordCount = 0
f = file(filename)
for l in f:
wordScored = False
fields = [x.strip().lower() for x in re.split(r"\s+", l)]
if (targetWords is not None) and (fields[0] not in targetWords):
continue
baseSynsets = wordnet.synsets(fields[0])
if baseSynsets is None:
continue
for word in fields[1:]:
# Ignore identical word if it occurs
if word == fields[0]:
continue
targetSynsets = wordnet.synsets(word)
if targetSynsets is None:
continue
wordScore = scoreWord(baseSynsets, targetSynsets)
meanScore += wordScore
wordCount += 1
wordScored = True
baseWordCount += 1 if wordScored else 0
if verbose:
if (baseWordCount > 0) and (baseWordCount % 1000 == 0):
print "Words scored : %d, Current Score : %f" % (
baseWordCount,
meanScore / (wordCount if wordCount > 0 else 1),
)
f.close()
meanScore /= wordCount if wordCount > 0 else 1
return {"baseWordCount": baseWordCount, "totalWordCount": wordCount, "meanScore": meanScore}
开发者ID:tgflynn,项目名称:NLP-Challenge,代码行数:34,代码来源:score.py
示例7: hypernyms
def hypernyms(self, word, question):
hyper = []
sentence = self.parse(question)
pos = ''
for sent, tag in sentence[0]:
if sent == word:
pos = tag
break
if pos in ['JJ','JJR','JJS']:
for synset in wn.synsets(word, pos = wn.ADJ):
for lemma in synset.lemmas():
if lemma.name() not in hyper and len(hyper)<7:
hyper.append(lemma.name())
elif pos in ['NN','NNS']:
for synset in wn.synsets(word, pos = wn.NOUN):
for lemma in synset.lemmas():
if lemma.name() not in hyper and len(hyper)<7:
hyper.append(lemma.name())
elif pos in ['VB','VBG','VBD','VBN','VBP','VBZ']:
for synset in wn.synsets(word, pos = wn.VERB):
for lemma in synset.lemmas():
if lemma.name() not in hyper and len(hyper)<7:
hyper.append(lemma.name())
elif pos in ['RB','RBR','RBS']:
for synset in wn.synsets(word, pos = wn.ADV):
for lemma in synset.lemmas():
if lemma.name() not in hyper and len(hyper)<7:
hyper.append(lemma.name())
return hyper
开发者ID:Saurav-95,项目名称:BrainX,代码行数:29,代码来源:qp.py
示例8: c_wn_max_path_similarity
def c_wn_max_path_similarity(score,word_from,word_to):
"""
WordNet path similarity for the most similar synsets. (1 if same word)
This feature can be precomputed by EQUALS
"""
# Enforce returning 1 when words are equal (would be 0 if synset not found)
# NOTE: since EQUALS precomputes this feature, the assignment in the second
# if is double. It is mantained to keep the indipendence on the imple-
# mentation of EQUALS.
if not score.is_feature_set[score.EQUALS]:
c_equals(score,word_from,word_to)
if score.features[score.EQUALS] == 1:
score.set_feature(score.WN_MAX_PATH_SIMILARITY,1)
return
# Compute the actual distance
_r = 0
for ss_from in wn.synsets(word_from.text):
for ss_to in wn.synsets(word_to.text):
current_similarity = ss_to.path_similarity(ss_from)
if current_similarity > _r:
_r = current_similarity
score.set_feature(score.WN_MAX_PATH_SIMILARITY,_r)
开发者ID:dario-chiappetta,项目名称:TDM-lu,代码行数:27,代码来源:word.py
示例9: CollectSemcorSupersenses
def CollectSemcorSupersenses():
oracle_matrix = collections.defaultdict(WordSupersenses)
for sent in semcor.tagged_sents(tag='both'):
for chk in sent:
if chk.node and len(chk.node)>3 and chk.node[-3]=='.' and chk.node[-2:].isdigit():
if chk[0].node.startswith('N'):
pos = "n"
elif chk[0].node.startswith('V'):
pos = "v"
else:
continue
lemmas = chk.node[:-3]
wnsn = int(chk.node[-2:])
ssets = wn.synsets(lemmas, pos)
sorted_ssets = sorted(ssets, key=lambda x: x.name)
filtered_ssets = None
for lemma in lemmas.split("_"):
if not filtered_ssets or len(filtered_ssets) == 0:
filtered_ssets = filter(lambda x: lemma in x.name, sorted_ssets)
if filtered_ssets and len(filtered_ssets) > 0:
sorted_ssets = filtered_ssets
try:
supersense = sorted_ssets[wnsn-1].lexname # prints 'noun.group
except:
#print("."),
continue
for lemma in lemmas.split("_"):
ssets = wn.synsets(lemma, pos)
if len(ssets) > 0:
if lemma.isdigit():
lemma = "0"
oracle_matrix[lemma].Add(supersense, "semcor")
return oracle_matrix
开发者ID:wammar,项目名称:multilingual-embeddings-eval-portal,代码行数:33,代码来源:supersense_matrix.py
示例10: generatesynsets
def generatesynsets(table):
table2 = []
table3 = {}
for i in table:
search1 = "N.*"
search2 = "V.*"
if re.findall(search1, i[1]):
x = wns.synsets(i[0], pos=wns.NOUN)
elif re.findall(search2, i[1]):
x = wns.synsets(i[0], pos=wns.VERB)
for z in range(len(x)):
for y in x[z].lemma_names:
syn = 'SYN'
if y not in ['match', 'be', 'in', 'is']:
table2.append((y, syn))
test = 0
test += 1
for i in table2:
try:
table3[i] += test
except:
table3[i] = test
return table3
开发者ID:Jigar54,项目名称:Cricinfo-Query-Responder-NLP,代码行数:25,代码来源:phase2-part3.py
示例11: polar_values
def polar_values(self, positive_seeds, negative_seeds):
self.values = []
POS_tags = list(set(nltk.pos_tag(WordPunctTokenizer().tokenize(self.data))))
words = []
for (w, s) in POS_tags:
w= w.lower()
POS = self.get_wordnet_pos(s)
if POS =='' or re.match("^[\w]+$",w) == None:
words.append('0')
else:
w+="."+POS
w+=".01"
words.append(w)
negative_set = []
for nw in negative_seeds:
for s in wordnet.synsets(nw):
negative_set.append(s)
positive_set = []
for pw in positive_seeds:
for s in wordnet.synsets(pw):
positive_set.append(s)
self.eval_words(words, positive_set, negative_set)
return self.values
开发者ID:sherifEwis,项目名称:polar_analyzer,代码行数:25,代码来源:polar_analyzer.py
示例12: getSynonym
def getSynonym(word, tag):
pos_list = {"JJ":"ADJ","JJR":"ADJ", "JJS":"ADJ","NN":"NOUN","NNS":"NOUN","NPS":"NOUN","NP":"NOUN","RBR":"ADV","RBS":"ADV","RB":"ADV","VB":"VERB","VBD":"VERB","VBG":"VERB","VBN":"VERB","VBP":"VERB","VBZ":"VERB"};
tag_list = pos_list.keys()
li = {}
if tag in tag_list:
dd = pos_list.get(tag)
if dd == "VERB":
tt = wn.synsets(word,pos=wn.VERB)
for key in tt:
ss = key.lemma_names
for s in ss:
li[s] = s
if dd == "NOUN":
tt = wn.synsets(word,pos=wn.NOUN)
for key in tt:
ss = key.lemma_names
for s in ss:
li[s] = s
if dd == "ADV":
tt = wn.synsets(word,pos=wn.ADV)
for key in tt:
ss = key.lemma_names
for s in ss:
li[s] = s
if dd == "ADJ":
tt = wn.synsets(word,pos=wn.ADJ)
for key in tt:
ss = key.lemma_names
for s in ss:
li[s] = s
return li.keys()
开发者ID:pulishahu,项目名称:nlp,代码行数:32,代码来源:test.py
示例13: xhyper
def xhyper(words)->[str]:
'''returns the highest order x hypernyms'''
x = UI.request_x()
print("\nNote: this program will use the first parallel synset if there are any")
print("\nGathering data...")
result = [x]
hyp = lambda w: w.hypernyms()
#This would pick up the deepest branch's depth -> valueAt returns None -> returns None
#depth = lambda L: isinstance(L, list) and max(map(depth, L))+1
for i in range(len(words)):
synsets = wordnet.synsets(words[i])
if len(synsets) > 0:
for s in range(len(synsets)):
hyper = wordnet.synsets(words[i])[s].tree(hyp)
if (hyper[0].pos() in ['a','s','r']):
result.append([words[i], 'None', 'None', [None]])
continue
d = first_depth(hyper) - 1
xhyper = []
for j in range(x):
xhyper.append(valueAt(d - j, hyper))
if xhyper[-1] is None:
break
result.append([words[i], pos_redef(hyper[0].pos()), hyper[0], xhyper])
else:
result.append([words[i], 'None', 'None', [None]])
return result
开发者ID:Malthanatos,项目名称:Bifrost,代码行数:27,代码来源:controller.py
示例14: userEnteredWordSensor
def userEnteredWordSensor(user_input):
#what stage are we currently in ? -- whether AS,IM or WI ?
#what response did user enter ?
if exactly_right:
#save our total action plan.
cursor.executeQuery("insert into path values('',session['uid'],session['wordid']")
pathid=cursor.executeQuery("select pathid from path where wordid = session['wordid']")
cursor.executeQuery("insert into waypoint values('',pathid,session['type'],session['waypoint_info'])")
#LOG the path
#procede to the next word.
perform()
pass;
elif nearly_right:
#nearly right means -->
#one of the tags
wid=cursor.executeQuery("Select wordid from words where word like 'session['word']'")
tags=cursor.executeQuery("Select tags from words where wordid=wid")
for tag in tags:
if ( tag == word )
#perform action sequence for NEXT
break;
#its synonym
for s in wn.synsets('session['word']'):
if( s == user_input )
#perform action sequence for NEXT
break;
else:
#tags's synonym...?
for s in wn.synsets('tag'):
if( s == user_input )
#perform action sequence for NEXT
break;
开发者ID:ClaudiaWinchester,项目名称:dumb-charades-ai,代码行数:32,代码来源:sensors.py
示例15: wndist
def wndist(fs):
"""
Distance between NP1 and NP2 in WordNet (using the first sense only)
"""
wndist=-100000
i_pos=__get_pos__(fs.article,fs.sentence,fs.offset_begin,fs.offset_end)
j_pos=__get_pos__(fs.article,fs.sentence_ref,fs.offset_begin_ref,fs.offset_end_ref)
#print "Orig:", fs.token, '\t', fs.token_ref
if i_pos.startswith('NN') and j_pos.startswith('NN') and not i_pos.endswith('P') and not j_pos.endswith('P'):
# considering only common nouns
lemmatizer = nltk.WordNetLemmatizer()
i=lemmatizer.lemmatize(fs.i_cleaned, pos='n')
j=lemmatizer.lemmatize(fs.j_cleaned, pos='n')
synsets_i=wn.synsets(i)
synsets_j=wn.synsets(j)
if len(synsets_i)>0 and len(synsets_j)>0:
wn_sense1_i=synsets_i[0]
wn_sense1_j=synsets_j[0]
wn_pos_i=str(wn_sense1_i).split('.')[1]
wn_pos_j=str(wn_sense1_j).split('.')[1]
if wn_pos_i==wn_pos_j:
wndist=wn_sense1_i.lch_similarity(wn_sense1_j)
wndist=(ceil(wndist * 100) / 100.0)
#print "Lemmatized:", i, '\t', j, '\t', str(wndist)
#print
#print
return "wndist={}".format(wndist)
开发者ID:pkarmstr,项目名称:coreference-project,代码行数:33,代码来源:feature_functions.py
示例16: get_least_specific
def get_least_specific(n,word_list):
word_list = [(w, min([synset.min_depth()
for synset in wn.synsets(w,'n')]))
for w in word_list if len(wn.synsets(w,'n'))>0]
return [w for (w,n) in sorted(filter(lambda pair:pair[1]>0,word_list),
key=itemgetter(1))[:n]]
开发者ID:xiaojunf,项目名称:cis530,代码行数:7,代码来源:hw4_code_xiaojunf.py
示例17: subclass
def subclass(feats):
if string_match(feats).endswith("False"):
try:
result = False
i_clean = wn.morphy(feats.i_cleaned.lower(), wn.NOUN)
i_synsets = wn.synsets(i_clean)
j_clean = wn.morphy(feats.j_cleaned.lower(), wn.NOUN)
j_synsets = wn.synsets(j_clean)
def get_common_hypernym(i_synset,j_synset):
i_hypernyms = i_synset.hypernyms()
j_hypernyms = j_synset.hypernyms()
if len(i_hypernyms) == 0:
i_synset = i_synset.instance_hypernyms()[0]
if len(j_hypernyms) == 0:
j_synset = j_synset.instance_hypernyms()[0]
subc = i_synset.common_hypernyms(j_synset)
return (i_synset in subc) or (j_synset in subc)
for synset in i_synsets:
for syn in j_synsets:
result = get_common_hypernym(synset,syn)
if result: break
if result:break
return "subclass={}".format(result)
except:
wn_error
return "subclass={}".format(False)
else:
return "subclass={}".format(False)
开发者ID:pkarmstr,项目名称:coreference-project,代码行数:30,代码来源:feature_functions.py
示例18: ch03_42_wordnet_semantic_index
def ch03_42_wordnet_semantic_index():
from nltk.corpus import webtext
from nltk.corpus import wordnet as wn
postings = []
docids = {}
for (pos, fileid) in enumerate(webtext.fileids()):
docids[pos] = fileid
wpos = 0
words = webtext.words(fileid)
for word in words:
try:
postings.append((word.lower(), (pos, wpos)))
offset = wn.synsets(word)[0].offset
postings.append((offset, (pos, wpos)))
poffset = wn.synsets(word)[0].hypernyms()[0].offset
postings.append((poffset, (pos, wpos)))
except IndexError:
continue
wpos = wpos + 1
index = nltk.Index(postings)
query = "canine"
qpostings = []
qpostings.extend([(pos, wpos) for (pos, wpos) in index[query]])
try:
offset = wn.synsets(query)[0].offset
qpostings.extend([(pos, wpos) for (pos, wpos) in index[offset]])
except IndexError:
pass
for (pos, wpos) in qpostings:
left = webtext.words(docids[pos])[wpos-4:wpos]
right = webtext.words(docids[pos])[wpos:wpos+4]
print left, right
开发者ID:447327642,项目名称:nltk-examples,代码行数:32,代码来源:ch03_ex.py
示例19: add_word
def add_word(word):
maximum = 0
maxJCN = 0
flag = 0
for chain in lexical_chains: #for all chains that are present
for synset in wn.synsets(word): #for all synsets of current word
for sense in chain.senses: #for all senses of the current word in current element of the current chain
similarity = sense.wup_similarity(synset) #using wup_similarity
if(similarity >= maximum):
if similarity >= threshold:
#print word, synset, sense, sense.jcn_similarity(synset, brown_ic)
JCN = sense.jcn_similarity(synset, brown_ic) #using jcn_similarity
if JCN >= jcnTreshold:
if sense.path_similarity(synset) >= 0.2: #using path similarity
if JCN >= maxJCN:
maximum = similarity
maxJCN = JCN
maxChain = chain
flag = 1
if flag == 1:
maxChain.addWord(word)
maxChain.addSense(synset)
return
lexical_chains.append(Chain([word], wn.synsets(word)))
开发者ID:rakeshadk7,项目名称:LexicalChains,代码行数:26,代码来源:Chains.py
示例20: syns
def syns(q): return set(wn.synsets(q) + [x for y in wn.synsets(q) for x in set(
set(y.closure(lambda a: a.hypernyms(), depth=3)) |
set(y.closure(lambda a: a.hyponyms(), depth=3)) |
set(y.closure(lambda a: a.hyponyms() + a.similar_tos(), depth=3))
)]) - ABSTRACT()
def defs(q): return [l.definition for l in wn.synsets(word)]
开发者ID:jmandel,项目名称:synonym-server,代码行数:7,代码来源:app.py
注:本文中的nltk.corpus.wordnet.synsets函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论