本文整理汇总了Python中nltk.corpus.wordnet_ic.ic函数的典型用法代码示例。如果您正苦于以下问题:Python ic函数的具体用法?Python ic怎么用?Python ic使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了ic函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: similarity_by_infocontent
def similarity_by_infocontent(sense1, sense2, option):
""" Returns similarity scores by information content. """
if sense1.pos != sense2.pos: # infocontent sim can't do diff POS.
return 0
info_contents = ['ic-bnc-add1.dat', 'ic-bnc-resnik-add1.dat',
'ic-bnc-resnik.dat', 'ic-bnc.dat',
'ic-brown-add1.dat', 'ic-brown-resnik-add1.dat',
'ic-brown-resnik.dat', 'ic-brown.dat',
'ic-semcor-add1.dat', 'ic-semcor.dat',
'ic-semcorraw-add1.dat', 'ic-semcorraw-resnik-add1.dat',
'ic-semcorraw-resnik.dat', 'ic-semcorraw.dat',
'ic-shaks-add1.dat', 'ic-shaks-resnik.dat',
'ic-shaks-resnink-add1.dat', 'ic-shaks.dat',
'ic-treebank-add1.dat', 'ic-treebank-resnik-add1.dat',
'ic-treebank-resnik.dat', 'ic-treebank.dat']
if option in ['res', 'resnik']:
return wn.res_similarity(sense1, sense2, wnic.ic('ic-bnc-resnik-add1.dat'))
#return min(wn.res_similarity(sense1, sense2, wnic.ic(ic)) \
# for ic in info_contents)
elif option in ['jcn', "jiang-conrath"]:
return wn.jcn_similarity(sense1, sense2, wnic.ic('ic-bnc-add1.dat'))
elif option in ['lin']:
return wn.lin_similarity(sense1, sense2, wnic.ic('ic-bnc-add1.dat'))
开发者ID:ChenglongChen,项目名称:pywsd,代码行数:32,代码来源:similarity.py
示例2: similarity
def similarity(word1, word2, tag):
obj1 = wn.synset(word1 + "."+ tag+".01")
obj2 = wn.synset(word2 + "."+ tag+".01")
#print(obj1)
brown_ic = wordnet_ic.ic('ic-brown.dat') # Information content
semcor_ic = wordnet_ic.ic('ic-brown.dat')
value = obj1.res_similarity(obj2, brown_ic)
return value
开发者ID:Lightyagami1,项目名称:exploratoryProject,代码行数:8,代码来源:project.py
示例3: test_wordnet_similarities
def test_wordnet_similarities(self):
# Path based similarities.
self.assertAlmostEqual(S('cat.n.01').path_similarity(S('cat.n.01')), 1.0)
self.assertAlmostEqual(S('dog.n.01').path_similarity(S('cat.n.01')), 0.2)
self.assertAlmostEqual(S('dog.n.01').lch_similarity(S('cat.n.01')), 2.028, places=3)
self.assertAlmostEqual(S('dog.n.01').wup_similarity(S('cat.n.01')), 0.8571, places=3)
# Information Content similarities.
brown_ic = wnic.ic('ic-brown.dat')
self.assertAlmostEqual(S('dog.n.01').jcn_similarity(S('cat.n.01'), brown_ic), 0.4497, places=3)
semcor_ic = wnic.ic('ic-semcor.dat')
self.assertAlmostEqual(S('dog.n.01').lin_similarity(S('cat.n.01'), semcor_ic), 0.8863, places=3)
开发者ID:alpaco42,项目名称:ML_Spring_2018,代码行数:11,代码来源:test_wordnet.py
示例4: _other_recognition
def _other_recognition(self, tagged_sentences, all_entities, question):
# Nouns retrieval
nouns = []
for sentence in tagged_sentences:
nouns += filter(lambda x: x[1] == "NN", sentence)
nouns = [noun for (noun, tag) in nouns]
# Nouns filtering
# Remove all entities that are nouns
all_entities = set(itertools.chain(*map(str.split, all_entities)))
nouns = [noun for noun in nouns if noun not in all_entities]
features = QuestionClassifier.get_features(question.text, "hn")
head = features["head"]
if head == "":
return nouns
# Filter nouns with WordNet synsets
try:
threshold = float(MyConfig.get("answer_extraction", "other_threshold"))
except MyConfigException as e:
logger = logging.getLogger("qa_logger")
logger.warning(str(e))
threshold = 0.6
try:
ic = wordnet_ic.ic(MyConfig.get("answer_extraction", "ic"))
except MyConfigException as e:
logger = logging.getLogger("qa_logger")
logger.warning(str(e))
ic = wordnet_ic.ic("ic-bnc.dat")
result = []
head_synsets = wn.synsets(head, pos=wn.NOUN)
if len(head_synsets) == 0:
noun_synsets = wn.synsets(features["noun"], pos=wn.NOUN)
if len(noun_synsets) == 0:
return nouns
else:
head_synset = noun_synsets[0]
else:
head_synset = head_synsets[0]
for noun in nouns:
try:
noun_synset = wn.synsets(noun, pos=wn.NOUN)[0]
if threshold < noun_synset.lin_similarity(head_synset, ic) < 0.9:
result.append(noun)
except IndexError:
continue
return result
开发者ID:danigarabato,项目名称:qa,代码行数:53,代码来源:answer.py
示例5: test
def test():
col = nltk.TextCollection(nltk.corpus.brown)
brown_ic = wordnet_ic.ic('ic-brown.dat')
sc = SimilarityCalculator(col, 'bp', brown_ic)
sentence1 = preprocess("The jurors were taken into the courtroom in groups of 40 and asked to fill out a questionnaire.")
sentence2 = preprocess("About 120 potential jurors were being asked to complete a lengthy questionnaire.")
print sc.similarity_bidirectional(sentence1, sentence2)
开发者ID:varzan,项目名称:semantic-longestpoem,代码行数:7,代码来源:semsim.py
示例6: __init__
def __init__ (self, sim_threshold = 0.1, sim_weight = 1, **kwds):
global brown_ic
super().__init__(**kwds)
if not brown_ic:
brown_ic = wordnet_ic.ic('ic-brown.dat')
self.__threshold = sim_threshold
self.__weight = sim_weight
开发者ID:agarsev,项目名称:grafeno,代码行数:7,代码来源:sim_link.py
示例7: get_similarity
def get_similarity(self, synsets1, synsets2):
brown_ic = wordnet_ic.ic("ic-brown.dat")
max_value = 0
for synset1 in synsets1:
for synset2 in synsets2:
value = wn.res_similarity(synset1, synset2, brown_ic)
if value > max_value:
max_value = value
return max_value
开发者ID:prasnko,项目名称:nlp-event-coreference,代码行数:9,代码来源:LexicalFeatureExtractor.py
示例8: sensesim
def sensesim(ss1,ss2,metric):
if metric=='path':
sim=ss1.path_similarity(ss2)
elif metric=='lin':
sim=ss1.lin_similarity(ss2,wn_ic.ic('ic-brown.dat'))
elif metric=='jcn':
sim=ss1.jcn_similarity(ss2,wn_ic.ic('ic-brown.dat'))
elif metric=='res':
sim=ss1.res_similarity(ss2,wn_ic.ic('ic-brown.dat'))
elif metric=='lch':
sim=ss1.lch_similarity(ss2)
elif metric=='wup':
sim=ss1.wup_similarity(ss2)
else:
print "Unknown metric", metric
sim=0
return sim
开发者ID:julieweeds,项目名称:Compounds,代码行数:18,代码来源:compare.py
示例9: __init__
def __init__(self,parameters):
self.parameters=parameters
self.wn_sim=self.parameters.get("wn_sim",Analyser.simmetric)
self.ic=wn_ic.ic('ic-semcor.dat')
self.candidates={}
self.synsetthresh=self.parameters.get("synset_thresh",Analyser.synsetthresh)
self.totalthresh=self.parameters.get("total_thresh",Analyser.totalthresh)
self.propthresh=self.parameters.get("prop_thresh",Analyser.propthresh)
self.simthresh=self.parameters.get("sim_thresh",Analyser.simthresh)
开发者ID:julieweeds,项目名称:SentenceCompletionChallenge,代码行数:10,代码来源:senses.py
示例10: get_lin_distance
def get_lin_distance(self, word1, word2):
brown_ic = wordnet_ic.ic('ic-brown.dat')
if len(wn.synsets(word1)) == 0 or len(wn.synsets(word2)) == 0:
return 0
target1 = wn.synsets(word1)[0]
target2 = wn.synsets(word2)[0]
try:
result = target1.lin_similarity(target2, brown_ic)
return result
except:
return 0
开发者ID:kanghj,项目名称:wordnews_server,代码行数:13,代码来源:WordDistanceCalculator.py
示例11: similarity_by_path
def similarity_by_path(sense1, sense2, option="path"):
""" Returns maximum path similarity between two senses. """
if option.lower() in ["path", "path_similarity"]: # Path similaritys
return max(wn.path_similarity(sense1,sense2),
wn.path_similarity(sense1,sense2))
elif option.lower() in ["wup", "wupa", "wu-palmer", "wu-palmer"]: # Wu-Palmer
return wn.wup_similarity(sense1, sense2)
elif option.lower() in ['lch', "leacock-chordorow"]: # Leacock-Chodorow
if sense1.pos != sense2.pos: # lch can't do diff POS
return 0
return wn.lch_similarity(sense1, sense2)
return wn.lin_similarity(sense1, sense2, wnic.ic('ic-bnc-add1.dat'))
开发者ID:alee101,项目名称:wsd,代码行数:13,代码来源:semanticsim.py
示例12: single_jiang_conrath
def single_jiang_conrath(cast_no1, cast_no2, syn_dict):
brown_ic = wordnet_ic.ic('ic-brown.dat')
synsets1 = syn_dict[cast_no1]
synsets2 = syn_dict[cast_no2]
total_sim = 0.0
no_of_comparisons = 0.0
for original_syn in synsets1:
for syn1 in synsets1:
if len(synsets1) is not 0 and syn1 is not None:
for syn2 in synsets2:
if len(synsets2) is not 0 and syn2 is not None and syn1.pos()==syn2.pos() and ((syn1.pos() == "n") or (syn2.pos() == "v")):
sim = syn1.lch_similarity(syn2, brown_ic)
total_sim = total_sim + sim
no_of_comparisons+=1
return total_sim/no_of_comparisons
开发者ID:wingy-wing,项目名称:dissertation,代码行数:15,代码来源:lch_wordnet_adapted_lesk_for_two_casts.py
示例13: main
def main():
brown_ic = wordnet_ic.ic('ic-brown.dat')
human_sims = parseFile("input.txt")
lin_sims = linSimilarities(human_sims.keys(), brown_ic)
res_sims = resSimilarities(human_sims.keys(), brown_ic)
#print "Initializing Model"
model = None
model = gensim.models.Word2Vec()
model = model.load_word2vec_format(RESOURCES+'glove_model.txt', binary=False)
#print "Model created calling vec Sim"
vec_sims = vecSimilarities(human_sims.keys(), model)
#print "AFter call to vec Sim"
lin_score = 0
res_score = 0
vec_score = 0
print '{0:15} {1:15} {2:10} {3:20} {4:20} {5:20}'.format('word1','word2',
'human', 'Lin',
'Resnik', 'Word2Vec')
for key, human in human_sims.items():
try:
lin = lin_sims[key]
except:
lin = 0
lin_score += (lin - human) ** 2
try:
res = res_sims[key]
except:
res = 0
res_score += (res - human) ** 2
try:
vec = vec_sims[key]
except:
vec = 0
vec_score += (vec - human) ** 2
firstword=key.partition('(')[-1].rpartition(',')[0]
secondword=key.partition(',')[-1].rpartition(')')[0]
secondword=secondword.strip()
print '{0:15} {1:15} {2:10} {3:20} {4:20} {5:20}'.format(firstword,secondword, human,
lin, res, vec)
num_examples = len(human_sims)
print "\nMean Squared Errors"
print "Lin method error: %0.2f" % (lin_score/num_examples)
print "Resnick method error: %0.2f" % (res_score/num_examples)
print "Vector-based method error: %0.2f" % (vec_score/num_examples)
开发者ID:niha-p,项目名称:Natural-Language-Processing,代码行数:48,代码来源:B.py
示例14: lexical_compare
def lexical_compare(lemma_text,lemma_hypothesis):
similarity_score = 0
brown_ic = wordnet_ic.ic('ic-brown.dat')
if re.search(lemma_text,lemma_hypothesis,re.M|re.I):
return 50
hypo_synset = wn.synsets(lemma_hypothesis)
text_synset = wn.synsets(lemma_text)
synset_index = get_index(hypo_synset, text_synset)
if synset_index == -1:
return 0
if len(hypo_synset) > 0 and len(text_synset) > 0:
similarity_score = hypo_synset[synset_index].path_similarity(text_synset[0],brown_ic)
similarity_score += hypo_synset[synset_index].wup_similarity(text_synset[0],brown_ic)
similarity_score += hypo_synset[synset_index].lin_similarity(text_synset[0],brown_ic)
similarity_score += hypo_synset[synset_index].res_similarity(text_synset[0],brown_ic)
return similarity_score
开发者ID:racoder,项目名称:question-answering-nlp,代码行数:16,代码来源:BOW_1.py
示例15: extract_word_clusters
def extract_word_clusters(commentList, commentCount):
brown_ic = wordnet_ic.ic('ic-brown.dat')
a, corpus, global_synsets = extract_global_bag_of_words(commentList, True)
similarity_dict = {}
i = 0
t = len(global_synsets)**2
for syn_out in global_synsets:
similarity_dict[syn_out] = {}
for syn_in in global_synsets:
if syn_in.pos() == syn_out.pos():
similarity_dict[syn_out][syn_in] = syn_out.lin_similarity(syn_in, brown_ic)
else:
similarity_dict[syn_out][syn_in] = max(wn.path_similarity(syn_out,syn_in), wn.path_similarity(syn_in,syn_out))
if i % 10000 == 0:
print i, 'synsets processed out of',len(global_synsets)**2, '(',float(i)/(t),'%)'
i += 1
tuples = [(i[0], i[1].values()) for i in similarity_dict.items()]
vectors = [np.array(tup[1]) for tup in tuples]
# Rule of thumb
n = sqrt(len(global_synsets)/2)
print "Number of clusters", n
km_model = KMeans(n_clusters=n)
km_model.fit(vectors)
clustering = collections.defaultdict(list)
for idx, label in enumerate(km_model.labels_):
clustering[label].append(tuples[idx][0])
pprint.pprint(dict(clustering), width=1)
feature_vector = np.zeros([len(corpus),n])
for i,comment in enumerate(corpus):
for w in comment:
for key, clust in clustering.items():
if w in clust:
feature_vector[i][key] += 1
if i % 1000 == 0:
print i, 'comments processed'
print feature_vector
'''
开发者ID:DirkBrand,项目名称:Comment-Classification,代码行数:47,代码来源:mainExtractor.py
示例16: wnsim
def wnsim(word1,word2,ps = wn.NOUN, metric='path',ic=wn_ic.ic('ic-brown.dat')):
#function to calculate wn similarity of two words
#maximises similarity over all senses of the given part of speech (by default noun)
ss1=wn.synsets(word1,pos=ps)
ss2=wn.synsets(word2,pos=ps)
maxsim = 0
for s1 in ss1:
for s2 in ss2:
thissim = sssim(s1,s2,metric,ic)
if thissim>maxsim:
maxsim=thissim
#print maxsim
return maxsim
开发者ID:julieweeds,项目名称:ANLE,代码行数:17,代码来源:wnsim.py
示例17: computeLinSimilarity
def computeLinSimilarity(term1, term2):
global ic
if not ic:
#ic = wordnet_ic.ic('ic-semcor.dat')
ic = wordnet_ic.ic('ic-brown.dat')
w1_syns = wn.synsets(term1)
w2_syns = wn.synsets(term2)
maxsim = 0
for w1s in w1_syns:
for w2s in w2_syns:
try:
sim = wn.lin_similarity(w1s, w2s, ic)
if sim > maxsim:
maxsim = sim
except Exception:
pass
return maxsim
开发者ID:Athiq,项目名称:word2vecautomation,代码行数:17,代码来源:SimilarityEval.py
示例18: lin_truth
def lin_truth():
semcor_ic = wordnet_ic.ic('ic-semcor.dat')
content = [word.strip() for word in open(Input2)]
truth_arr = []
for i in content:
similarity = []
synA = wordnet.synset(i + ".n.01")
for j in content:
synB = wordnet.synset(j + ".n.01")
sim = synA.lin_similarity(synB, semcor_ic)
similarity.append(sim)
truth_arr.append(similarity)
D = ss.csr_matrix(np.array(truth_arr, dtype=np.float64))
return D
开发者ID:f00barin,项目名称:distrib,代码行数:18,代码来源:distrib_test.py
示例19: main
def main():
brown_ic = wordnet_ic.ic('ic-brown.dat')
human_sims = parseFile("input.txt")
lin_sims = linSimilarities(human_sims.keys(), brown_ic)
res_sims = resSimilarities(human_sims.keys(), brown_ic)
model = None
model = gensim.models.Word2Vec()
model = model.load_word2vec_format(RESOURCES+'glove_model.txt', binary=False)
vec_sims = vecSimilarities(human_sims.keys(), model)
lin_score = 0
res_score = 0
vec_score = 0
print '{0:15} {1:15} {2:10} {3:20} {4:20} {5:20}'.format('word1','word2',
'human', 'Lin',
'Resnik', 'Word2Vec')
for key, human in human_sims.items():
try:
lin = lin_sims[key]
except:
lin = 0
lin_score += (lin - human) ** 2
try:
res = res_sims[key]
except:
res = 0
res_score += (res - human) ** 2
try:
vec = vec_sims[key]
except:
vec = 0
vec_score += (vec - human) ** 2
print '{0:15} {1:15} {2:10} {3:20} {4:20} {5:20}'.format(key[0], key[1], human,
lin, res, vec)
num_examples = len(human_sims)
print "\nMean Squared Errors"
print "Lin method error: %0.2f" % (lin_score/num_examples)
print "Resnick method error: %0.2f" % (res_score/num_examples)
print "Vector-based method error: %0.2f" % (vec_score/num_examples)
开发者ID:saniaarif22,项目名称:NLP,代码行数:44,代码来源:B.py
示例20: main
def main(fname):
lyrics = preprocess_lyrics(fname)
collection = nltk.TextCollection(nltk.corpus.brown)
ic = wordnet_ic.ic('ic-brown.dat')
thresh_counts = {}
for similarity in SimilarityCalculator.SIMILARITIES.keys() + ['bp_adj']:
scores = []
output_fname = os.path.join('output', similarity + '.txt')
pickled_fname = output_fname + '.pickled'
img_fname = os.path.join('output', similarity + '_hist.png')
if os.path.exists(output_fname):
continue
now = datetime.datetime.now()
print '[{}] Starting calculation on {}'.format(str(now), similarity)
if similarity == 'bp':
adjust_bp()
if os.path.exists(pickled_fname):
scores = [score for couplet, score in
pickle.load(open(pickled_fname, 'r'))]
else:
sc = SimilarityCalculator(collection, similarity, ic)
for lyric1, lyric2 in pairwise(lyrics):
scores.append(sc.similarity_bidirectional(lyric1, lyric2))
thresh_counts[similarity] = print_report(open(output_fname, 'w'),
scores,
open(fname, 'r').read().
split('\n'),
open(pickled_fname, 'w'),
img_fname)
now = datetime.datetime.now()
print '[{}] Finished calculation on {}'.format(str(now), similarity)
plt.clf()
for similarity in thresh_counts.keys():
res = list(thresh_counts[similarity].iteritems())
res.sort()
res = zip(*res)
plt.plot(res[0], res[1], label=similarity, zorder=1)
plt.scatter(res[0], res[1], zorder=2)
plt.legend()
plt.xlabel("threshold")
plt.ylabel("no. lyrics selected")
plt.savefig(os.path.join("output", "thresholds.png"))
开发者ID:varzan,项目名称:semantic-longestpoem,代码行数:42,代码来源:main.py
注:本文中的nltk.corpus.wordnet_ic.ic函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论