本文整理汇总了Python中nltk.pos_tag函数的典型用法代码示例。如果您正苦于以下问题:Python pos_tag函数的具体用法?Python pos_tag怎么用?Python pos_tag使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了pos_tag函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: create_synonyms
def create_synonyms(orig_word):
'''
funation for creating synonyms by passing word
'''
try:
headers = {
"X-Mashape-Key": "aIder4iWr4msh5Scn073WRoddmAEp1qA0I3jsnSR8lfJwtyzpg",
"Accept": "application/json"}
response = requests.get("https://wordsapiv1.p.mashape.com/words/{}/synonyms".format(orig_word), headers=headers)
if response.status_code == 200:
json = response.json()
synonyms = json['synonyms']
# synonyms = nltk.word_tokenize(synonyms)
synonyms = nltk.pos_tag(synonyms)
word = nltk.word_tokenize(orig_word)
word = nltk.pos_tag(word)[0]
print(synonyms)
good_syns = []
for syn in synonyms:
print(word[1], syn[1])
if word[1] == syn[1]:
print('*')
good_syns.append(syn[0])
word = Word.objects.get_or_create(word=orig_word)
for syn in good_syns[:2]:
try:
new_word = Word.objects.create(word=syn.lower(), is_synonym=True)
except Exception:
new_word = Word.objects.get(word=word)
syn = Synonym.objects.create(word=new_word)
syn.synonym_to.add(word)
return good_syns
except Exception as e:
print(e)
开发者ID:Dambre,项目名称:social_advisor,代码行数:35,代码来源:dictionary.py
示例2: extract_pos_pair
def extract_pos_pair(event_mention_1, event_mention_2):
trigger1=""
extent1=""
trigger2=""
extent2=""
for one_anchor in event_mention_1.findall("anchor"):
trigger1=one_anchor[0].text
for one_anchor in event_mention_2.findall("anchor"):
trigger2=one_anchor[0].text
for one_extent in event_mention_1.findall("extent"):
extent1=one_extent[0].text
for one_extent in event_mention_2.findall("extent"):
extent2=one_extent[0].text
text1 = nltk.word_tokenize(extent1)
dict1 = nltk.pos_tag(text1)
for one_pair in dict1:
if one_pair[0] in trigger1 or trigger1 in one_pair[0]:
pos1=one_pair[1]
break
text2 = nltk.word_tokenize(extent2)
dict2 = nltk.pos_tag(text2)
for one_pair in dict2:
if one_pair[0] in trigger2 or trigger2 in one_pair[0]:
pos2=one_pair[1]
break
return (pos1, pos2)
开发者ID:wtl-zju,项目名称:KBP2015,代码行数:26,代码来源:coref_feature_extraction.py
示例3: writeOut
def writeOut(lsummary_out, allwordsphrases=[], outputpath='.', gridset=''):
# Write data out for the last folder (gridset) encountered - MUST BE A BETTER WAY THAN THIS?
uWordsPhrases = uniqueSet(allwordsphrases) # Set of unique words.
uwords =[]
uphrases = []
words = []
phrases =[]
wordtypes =[]
wordtypes =[]
total_wordsphrases = total_uwordsphrases = total_words = total_phrases = 0
ldata_out = UnicodeWriter(open(outputpath + '/'+ gridset +'/language-data.csv', 'wb'), delimiter=',', quotechar='"')
ldata_out.writerow(["WORD", "NUMBER OF WORDS", "COUNT", "TYPE"])
# Output metrics to file.
for item in uWordsPhrases:
num_words = len(item.split())
item_count = allwordsphrases.count(item)
if num_words == 1: # Single word
word_type = nltk.pos_tag(item)[-1][-1]
#word_type_help = nltk.help.upenn_tagset(word_type)
# MAYBE CONVERT TAGS INTO MORE USEFUL WORDS?!
ldata_out.writerow([item, str(num_words), str(item_count), word_type])
uwords.append(item)
wordtypes.append(word_type)
elif num_words > 1: # Phrase
nltk_words = nltk.word_tokenize(item)
word_pos = nltk.pos_tag(nltk_words) ### HOW TO DEAL WITH PHRASES???
word_types = [x[1] for x in word_pos]
ldata_out.writerow([item, str(num_words), str(item_count), " ,".join(word_types)])
# HOW TO OUTPUT EACH POS TO A COLUMN???
uphrases.append(item)
for item in allwordsphrases:
num_words = len(item.split())
if num_words == 1:
words.append(item)
elif num_words > 1:
phrases.append(item)
uword_types = countDuplicatesInList(wordtypes)
total_wordsphrases = len(allwordsphrases)
total_uwordsphrases = len(uWordsPhrases)
total_uwords = len(uwords)
total_uphrases = len(uphrases)
total_words = len(words)
total_phrases = len(phrases)
#["File Name", "Total Words or Phrases", "Total Unique Words or Phrases", "Total Words", "Total Phrases", "Total Unique Words", "Total Unique Phrases", "Types of Word"])
lsummary_out.writerow([gridset, str(total_wordsphrases), str(total_uwordsphrases), str(total_words), str(total_phrases), str(total_uwords), str(total_uphrases), ', '.join(map(str, uword_types))])
raw_words_out = open(outputpath + '/'+ gridset +'/raw-unique-words.text', 'wb')
raw_words_out.writelines('\n'.join(uWordsPhrases).encode('utf-8'))
raw_phrases_out = open(outputpath + '/'+ gridset +'/raw-unique-phrases.txt', 'wb')
raw_phrases_out.writelines('\n'.join(uphrases).encode('utf-8'))
raw_words_out = open(outputpath + '/'+ gridset +'/raw-wordsphrases.text', 'wb')
raw_words_out.writelines('\n'.join(allwordsphrases).encode('utf-8'))
开发者ID:simonjudge,项目名称:AAC-Tools,代码行数:60,代码来源:wordlistMetrics.py
示例4: nltk_filter
def nltk_filter(sent):
b1, b2 = sent.split(blockSeparator)
b2 = b2.rstrip()
b1 = b1.lower()
tokens = word_tokenize(b1)
pos_tags = pos_tag(tokens)
filtered_sent = ' '
for token in tokens:
filtered_sent += '1'+token + ' '
# for pos_t in pos_tags:
# if pos_t[1] in filterList:
# #filtered_sent += stemmer.stem(pos_t[0]) + ' '
# filtered_sent += '1' + stemmer.stem(pos_t[0]) + ' '
#note: 1 concat stemmer(word) == stemmer(1 concat word)
b2 = b2.lower()
tokens = word_tokenize(b2)
pos_tags = pos_tag(tokens)
# filtered_sent = ' '
# for pos_t in pos_tags:
# if pos_t[1] in filterList:
# #filtered_sent += stemmer.stem(pos_t[0]) + ' '
# filtered_sent += '2' + stemmer.stem(pos_t[0]) + ' '
for token in tokens:
filtered_sent += '2' + token + ' '
return filtered_sent
开发者ID:gthandavam,项目名称:Recipes,代码行数:30,代码来源:builder.py
示例5: load_data
def load_data(path):
sentences_pos = []
r1 = re.compile(r'\<([^ ]+)\>')
r2 = re.compile(r'\$US(\d)')
for l in open(path):
if not l.strip():
continue
l = l.decode('utf-8')
l = l.replace(u'’', "'")
l = l.replace(u'``', '"')
l = l.replace(u"''", '"')
l = l.replace(u"—", '--')
l = l.replace(u"–", '--')
l = l.replace(u"´", "'")
l = l.replace(u"-", " ")
l = l.replace(u"/", " ")
l = r1.sub(r'\1', l)
l = r2.sub(r'$\1', l)
s = l.strip().split('\t')
sa, sb = tuple(nltk.word_tokenize(s)
for s in l.strip().split('\t') if s) # ignore double \t
sa, sb = ([x.encode('utf-8') for x in sa],
[x.encode('utf-8') for x in sb])
for s in (sa, sb):
for i in xrange(len(s)):
if s[i] == "n't":
s[i] = "not"
elif s[i] == "'m":
s[i] = "am"
sa, sb = fix_compounds(sa, sb), fix_compounds(sb, sa)
sentences_pos.append((nltk.pos_tag(sa), nltk.pos_tag(sb)))
return sentences_pos
开发者ID:STS-NTNU,项目名称:STS13,代码行数:33,代码来源:simpfeats.py
示例6: replace_proper_nouns
def replace_proper_nouns(self, o_sent, n_sent):
proper_nouns = []
p_pnouns = []
o_tagged = pos_tag(word_tokenize(o_sent))
n_tagged = pos_tag(word_tokenize(n_sent))
# print("\nTransforming the output:")
# print("Input sentence:", o_sent)
# print("Found sentence:", n_sent)
# print("Input sentence tagged:", o_tagged)
# print("Found sentence tagged:", n_tagged)
for o in o_tagged:
if o[1] == 'NNP' and o not in proper_nouns:
proper_nouns.append(o)
for n in n_tagged:
if (n[1] == 'PRP' or n[1] == 'PRP$' or n[1] == 'NNP') and n not in p_pnouns:
p_pnouns.append(n)
# print("")
if (len(proper_nouns) == 1) and (len(p_pnouns) > 0):
n_sent = sub(r"\b%s\b" %p_pnouns[0][0] , proper_nouns[0][0], n_sent, 1)
gender = self.gp.classify(proper_nouns[0][0])
# print(proper_nouns[0][0], "is classified as", gender)
for pnoun in p_pnouns:
n_pnoun = self.change_gender(pnoun[0], gender)
n_sent = sub(r"\b%s\b" %pnoun[0] , n_pnoun, n_sent)
elif len(proper_nouns) < 1:
print("No proper nouns to replace")
else:
print("Not yet implemented, :P")
return n_sent
开发者ID:theopak,项目名称:storytellingbot,代码行数:35,代码来源:Extrapolate.py
示例7: normalize_word
def normalize_word(word, lowercase=True, lemmatize=True):
"Normalize word by stripping plural nouns"
global NORMWORD_CACHE
global NORMWORD_POS
if NORMWORD_WNL is None:
init_normword_wnl()
if lowercase:
word = word.lower()
if word in NORMWORD_CACHE:
return NORMWORD_CACHE[word]
if not lemmatize:
return word
treebank_tag = nltk.pos_tag([word])[0][1]
newword = word
if ( len(newword) > 4 ) and ( treebank_tag == 'NNS' ):
# Only lemmatize plural nouns, leave verbs alone
wnpos = get_wordnet_pos(treebank_tag)
if wnpos:
newword = NORMWORD_WNL.lemmatize(newword, wnpos)
if newword != word:
LOGGER.debug('Changing %s to %s' % (word, newword))
NORMWORD_POS[newword] = nltk.pos_tag([newword])[0][1]
else:
NORMWORD_POS[word] = treebank_tag
NORMWORD_CACHE[word] = newword
return newword
开发者ID:markgraves,项目名称:sanal,代码行数:26,代码来源:sautil.py
示例8: test_nltkNERParsing
def test_nltkNERParsing(self):
testString = 'Natural Sciences and Engineering Research Council of Canada'
unigrams = TokenizeOnWhitespacePunctuation(testString, keepCaps=True).getUnigrams()
posTagged = nltk.pos_tag(unigrams)
chunked = nltk.ne_chunk(posTagged)
getGPEs = []
for treeBranch in chunked:
if hasattr(treeBranch, 'label') and treeBranch.label() == 'GPE':
getGPEs.append(str(treeBranch))
self.assertEqual(1, len(getGPEs))
testString = 'Milwaukee Foundation'
unigrams = TokenizeOnWhitespacePunctuation(testString, keepCaps=True).getUnigrams()
posTagged = nltk.pos_tag(unigrams)
chunked = nltk.ne_chunk(posTagged)
# returns (S (PERSON Milwaukee/NNP) (ORGANIZATION Foundation/NNP))
testString = 'New England Board of Higher Education'
unigrams = TokenizeOnWhitespacePunctuation(testString, keepCaps=True).getUnigrams()
posTagged = nltk.pos_tag(unigrams)
chunked = nltk.ne_chunk(posTagged)
# returns (S (GPE New/NNP)(ORGANIZATION England/NNP Board/NNP) of/IN (PERSON Higher/NNP Education/NNP))
testString = 'New England Board of Higher Education'
unigrams = TokenizeOnWhitespacePunctuation(testString).getUnigrams()
posTagged = nltk.pos_tag(unigrams)
chunked = nltk.ne_chunk(posTagged)
开发者ID:kyajmiller,项目名称:Cerebro,代码行数:29,代码来源:TestClassifyBadScholarships.py
示例9: printer
def printer(sentencescorelist, sentenceList, wordscorelist, wordList):
outFile = open('./tldr/outFile.txt', 'w')
for s in range(0, len(sentenceList)):
if s in sentencescorelist:
printsentence(sentenceList[s], outFile)
outFile.write("Topics to research: ")
topics = []
numtopics = 3
poswords = nltk.pos_tag(wordList)
poskeep = ["NN", "NNS", "NNP", "NNPS"]
while numtopics > 0:
temp = max(wordscorelist.iteritems(), key=operator.itemgetter(1))[0]
templist = [temp]
templist = nltk.pos_tag(templist)
if templist[0][1] in poskeep:
numtopics -= 1
topics.append(temp)
del wordscorelist[temp]
for i in range(0, len(topics)):
if i != len(topics) - 1:
outFile.write(topics[i] + ", ")
else:
outFile.write(topics[i])
outFile.close()
开发者ID:fernandest,项目名称:TLDR_Twist,代码行数:26,代码来源:main.py
示例10: parse_stock_name
def parse_stock_name(self, stockname):
p = engine()
instruction_set = stockname.split(',')
word_list = instruction_set[0].split(' ')
index = 1
categories_ignored = ['RB', 'TO']
tokens = word_tokenize(instruction_set[0])
tags = pos_tag(tokens)
i=0
while i < len(tags):
if tags[i][1] in categories_ignored:
index += 1
i+= 1
else:
break
quantity = word_list[index-1]
disallowed = ['g', 'ml', 'x', 'kg', 'cups', 'cup', 'grams', 'can', 'tbsp', 'tsp', 'tbsps', 'tsps',
'small', 'bunch', 'piece', 'handful', 'pack', 'chopped', 'large', 'a', 'pinch',
'fresh', 'dried', 'heaped', 'thick', 'slices', 'slice', 'of', 'about']
while index < len(word_list):
if word_list[index] not in disallowed:
break
else:
index+=1
sentence = " ".join(word_list[index:])
tokens = word_tokenize(sentence)
categories = pos_tag(tokens)
words = []
for category in categories:
if category[1] not in ['NNS', 'VBN', 'VBG']:
words.append(category[0])
word = " ".join(words)
return quantity, word, None
开发者ID:Godley,项目名称:MealPlanner,代码行数:35,代码来源:pipelines.py
示例11: test
def test(ws,wf,s,pf,wm,alfa2):
f1=open('test_data.data','rb')
f2=open('test.csv','rb')
val_text=f1.read()
comt=f2.read().splitlines()
val_lines=val_text.splitlines()
acc=0
lc=0
for line in val_lines:
token = line.split(' | ')
token[2]="<S> "+token[2]+" <E>"
t_t =token[2].split(' %% ')
if t_t[0]!="<S> ":
bff = nltk.pos_tag(t_t[0].split(".")[-1].split(" "))[-1][1]
else:
bff="<S>"
if t_t[2]!=" <E>":
aff = nltk.pos_tag(t_t[2].split(".")[0].split(" "))[0][1]
else:
aff="<E>"
val_label = nb(ws,wf,s,token[0],pf,aff,bff,alfa2)
if val_label==comt[lc].split(",")[1]:
acc+=1
lc+=1
print float(acc)/len(val_lines)
f1.close()
f2.close()
开发者ID:saumyakb,项目名称:CS4740-NLP-Superwised-WSD,代码行数:27,代码来源:trainer2.py
示例12: m_surrounding
def m_surrounding(self):
D = {}
sent = self.sentence["form"]
l = len(sent)
#print sent
K = self.index
'''
for k in range(l):
if sent[k] == self.word:
K = k
break
'''
#print K, l
tagp = tagn = ""
if (K+1) < l:
tagn = nt.word_tokenize(sent[K+1])
tagn = nt.pos_tag(tagn)
if (K-1) >=0:
tagp = nt.word_tokenize(sent[K-1])
tagp = nt.pos_tag(tagp)
if tagp != "":
D["ptag"] = tagp[0][1]
else:
D["ptag"] = ""
if tagn != "":
D["ntag"] = tagn[0][1]
else:
D["ntag"] = ""
print D
return D
开发者ID:korlev91,项目名称:CWI---complex-word,代码行数:32,代码来源:WordFeatures.py
示例13: score_glove_pos
def score_glove_pos(src, dst, numpy_arrays, labels_array, g, normalize=True):
b1 = []
b2 = []
lines = 0
with open(src) as p:
for i, line in enumerate(p):
s = line.split('\t')
b1.append(s[0])
b2.append(s[1][:-1]) #remove \n
lines = i + 1
b1_pos = [nltk.pos_tag(nltk.word_tokenize(re.sub(r'[^\x00-\x7F]+',' ', text))) for text in b1]
b2_pos = [nltk.pos_tag(nltk.word_tokenize(re.sub(r'[^\x00-\x7F]+',' ', text))) for text in b2]
res = []
for i in range(lines):
tags1 = [tag[0] for tag in b1_pos[i] if tag[1] in NOUN]
tags2 = [tag[0] for tag in b2_pos[i] if tag[1] in NOUN]
r = [1 - spatial.distance.cosine(g[tag1], g[tag2]) for tag1 in tags1 for tag2 in tags2 if tag1 in labels_array and tag2 in labels_array]
if len(r) == 0:
res.append(0)
else:
res.append(round(5*max(r), 2))
if normalize:
res = normarlize_score(res)
with open(dst, 'w') as thefile:
thefile.write("\n".join(str(i) for i in res))
print src + ' finished!'
开发者ID:wintor12,项目名称:SemEval2015,代码行数:30,代码来源:run.py
示例14: test
def test(ws,wf,s,pf):
f1=open('validation_data.data','rb')
#f2=open('test_data.csv','w')
val_text=f1.read()
val_lines=val_text.splitlines()
acc=0
for line in val_lines:
token = line.split(' | ')
t_t =token[2].split(' %% ')
if t_t[0]!="<S>":
bff = nltk.pos_tag(t_t[0].split(".")[-1].split(" "))[-1][1]
else:
bff="<S>"
if t_t[2]!="<\S>":
aff = nltk.pos_tag(t_t[2].split(".")[0].split(" "))[0][1]
else:
aff="<\S>"
val_label = nb(ws,wf,s,token[0],pf,aff,bff)
#f2.write(token[0]+" | "+val_label+" | "+token[2])
#f1.close()
#f2.close()
#print "Done"
if val_label==token[1]:
acc+=1
print float(acc)/len(val_lines)
开发者ID:saumyakb,项目名称:CS4740-NLP-Superwised-WSD,代码行数:29,代码来源:MakeDictKaggle.py
示例15: expand_with_wordnet
def expand_with_wordnet(query):
"""
This function expands every contentful word in the query with its wordnet
definition. The word itself is not removed. Stop words are removed from the
word definition as well.
(Contentful means that it is not a stopword or punctuation sign)
INPUT:
query -- user query that is a simple string
OUTPUT:
expanded_query -- user query + definitions of contentful words
"""
stop = stopwords.words("english")
stop += EXCLUDED
contentful_tokens = [tok for tok in query.split() if tok not in stop]
# take the first definition for the current word
defs = []
for token in contentful_tokens:
syn1 = wn.synsets(token, pos=wn.ADJ)[:1]
syn2 = wn.synsets(token, pos=wn.NOUN)[:1]
# we take into account only adj defs
if syn1:
defs.append(token)
def_tokenized = word_tokenize(syn1[0].definition())
[defs.append(t[0]) for t in pos_tag(def_tokenized) if t[1] in ["NN", "JJ"]]
elif syn2:
defs.append(token)
def_tokenized = word_tokenize(syn2[0].definition())
[defs.append(t[0]) for t in pos_tag(def_tokenized) if t[1] in ["NN", "JJ"]]
# expansion can add some EXCLUDED words back in the query
defs = set(defs) - set(EXCLUDED) # removing again
expanded = " ".join(defs)
return expanded
开发者ID:tastyminerals,项目名称:cocktail_bot,代码行数:33,代码来源:cocktail_ir.py
示例16: extract_entities2
def extract_entities2(text):
entities = []
"""t0 = nltk.DefaultTagger('NN')
t1 = nltk.UnigramTagger(train_sents, backoff=t0)
t2 = nltk.BigramTagger(train_sents, backoff=t1)
t2.evaluate(test_sents)"""
for sentence in sent_tokenize(text):
#print pos_tag(nltk.word_tokenize(sentence))
print sentence
tags=pos_tag(nltk.word_tokenize(sentence))
tags=tagear(tags)
chunks = ne_chunk(pos_tag(nltk.word_tokenize(sentence)))
#chunks = ne_chunk(regexp_tagger.tag((nltk.word_tokenize(text))))
chunks = ne_chunk(tags)
#chunks.draw()
#print chunks
for chunk in chunks:
#print chunk
#if hasattr(chunk, 'node'):
# print chunk.node
if hasattr(chunk, 'node') :
print chunk
entities.extend([chunk for chunk in chunks if hasattr(chunk, 'node')])
return entities
开发者ID:jholoc,项目名称:proyectoScrapy,代码行数:26,代码来源:Tokenizacion.py
示例17: tokenizeme
def tokenizeme(self, LanguageSample):
self.tokenized_text=nltk.word_tokenize(LanguageSample)
self.unique_words=list(set(self.tokenized_text))
self.unique_words.sort()
self.unique_words=nltk.pos_tag(self.unique_words) #Unique words does not get rid of infectional morpheme duplicates
self.tagged_text = [i for i in nltk.pos_tag(self.tokenized_text) if i[1]!="."] #pos_tag gets the part of speech, loop removes punctuation
self.count = len(self.tagged_text)
开发者ID:theredwillow,项目名称:SLP_Assessment,代码行数:7,代码来源:NLTK_Info.py
示例18: make_pos
def make_pos(target_tag, edit_rev):
tags, srcs, dsts = edit_rev
sentence = ''
if target_tag == del_tag:
sentence = dsts
elif target_tag == add_tag:
sentence = srcs
if target_tag in tags:
tag_indexes = [i for i, x in enumerate(tags) if x == target_tag]
trimed = sentence
for tag_index in tag_indexes:
trimed = trimed[:tag_index] + trimed[tag_index+1:]
posed = pos_tag(trimed)
pos = [w[1] for w in posed]
for tag_index in tag_indexes:
pos.insert(tag_index, u'')
# debug
none_indexes = [i for i, x in enumerate(pos) if x == u'']
if tag_indexes != none_indexes:
print(tag_indexes, file=sys.stderr)
print(none_indexes, file=sys.stderr)
print(tags, file=sys.stderr)
print(pos, file=sys.stderr)
else:
posed = pos_tag(u' '.join(sentence).split())
pos = [w[1] for w in posed]
return pos
开发者ID:tkyf,项目名称:epair,代码行数:33,代码来源:englishword_edit_distance.py
示例19: glv_window_overlap
def glv_window_overlap(t1, t2, n = 5):
''' Looks for an alignment within the window between sentences
(non-overlapping within the sentence) and words
with compatible lemmas POS. Emits features regarding the distance between common words, and
finds the glv vector difference between pos-tag aligned words,
inversely weighted by sentence distance. '''
''' Looks within a window of influence around word matches for context, and compares the glove
vectors within the (n - 1) gram context. Produces dim * (n - 1) dense features.'''
features = Counter()
v_tagged = pos_tag(leaves(t1))
w_tagged = pos_tag(leaves(t2))
for v in ntuples(v_tagged, n):
for w in ntuples(w_tagged, n):
# Find alignment
alignments = find_exact_alignments(v, w)
for i, j in alignments:
''' Featurize the word alignment in the window '''
features[v[i][0] + str(i - j) ] += 1
if not alignments:
continue
else:
similar_align = find_tagged_alignments(v, w, alignments)
for i, j in similar_align:
word_diff = np.exp ( glvvec( v[i][0]) - glvvec( w[j][0]) )
for dim in range(word_diff.shape[0]):
features[ v[i][1] + ' aligned dim ' + str(dim)] += word_diff[dim]
return features
开发者ID:BinbinBian,项目名称:224UProject,代码行数:32,代码来源:features.py
示例20: text_to_pos_list
def text_to_pos_list(lst):
dpos_list = []
tpos_list = []
for line in lst:
if "IsTruthFul" in line:
continue
else:
if line[0] == "0": #If deceptive:
dpos_list.append("<r>")
for sent in nltk.tokenize.sent_tokenize(parse_line(line)):
dpos_list.append("<s>")
text = nltk.word_tokenize(sent)
tagged = nltk.pos_tag(text)
for t in tagged:
dpos_list.append(t)
dpos_list.append("</s>")
dpos_list.append("</r>")
else:
tpos_list.append("<r>")
for sent in nltk.tokenize.sent_tokenize(parse_line(line)):
tpos_list.append("<s>")
text = nltk.word_tokenize(sent)
tagged = nltk.pos_tag(text)
for t in tagged:
tpos_list.append(t)
tpos_list.append("</s>")
tpos_list.append("</r>")
return (dpos_list, tpos_list)
开发者ID:cheelan,项目名称:NLP,代码行数:28,代码来源:deception.py
注:本文中的nltk.pos_tag函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论