本文整理汇总了Python中nltk.ne_chunk函数的典型用法代码示例。如果您正苦于以下问题:Python ne_chunk函数的具体用法?Python ne_chunk怎么用?Python ne_chunk使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了ne_chunk函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: extract_entities2
def extract_entities2(text):
entities = []
"""t0 = nltk.DefaultTagger('NN')
t1 = nltk.UnigramTagger(train_sents, backoff=t0)
t2 = nltk.BigramTagger(train_sents, backoff=t1)
t2.evaluate(test_sents)"""
for sentence in sent_tokenize(text):
#print pos_tag(nltk.word_tokenize(sentence))
print sentence
tags=pos_tag(nltk.word_tokenize(sentence))
tags=tagear(tags)
chunks = ne_chunk(pos_tag(nltk.word_tokenize(sentence)))
#chunks = ne_chunk(regexp_tagger.tag((nltk.word_tokenize(text))))
chunks = ne_chunk(tags)
#chunks.draw()
#print chunks
for chunk in chunks:
#print chunk
#if hasattr(chunk, 'node'):
# print chunk.node
if hasattr(chunk, 'node') :
print chunk
entities.extend([chunk for chunk in chunks if hasattr(chunk, 'node')])
return entities
开发者ID:jholoc,项目名称:proyectoScrapy,代码行数:26,代码来源:Tokenizacion.py
示例2: test_nltkNERParsing
def test_nltkNERParsing(self):
testString = 'Natural Sciences and Engineering Research Council of Canada'
unigrams = TokenizeOnWhitespacePunctuation(testString, keepCaps=True).getUnigrams()
posTagged = nltk.pos_tag(unigrams)
chunked = nltk.ne_chunk(posTagged)
getGPEs = []
for treeBranch in chunked:
if hasattr(treeBranch, 'label') and treeBranch.label() == 'GPE':
getGPEs.append(str(treeBranch))
self.assertEqual(1, len(getGPEs))
testString = 'Milwaukee Foundation'
unigrams = TokenizeOnWhitespacePunctuation(testString, keepCaps=True).getUnigrams()
posTagged = nltk.pos_tag(unigrams)
chunked = nltk.ne_chunk(posTagged)
# returns (S (PERSON Milwaukee/NNP) (ORGANIZATION Foundation/NNP))
testString = 'New England Board of Higher Education'
unigrams = TokenizeOnWhitespacePunctuation(testString, keepCaps=True).getUnigrams()
posTagged = nltk.pos_tag(unigrams)
chunked = nltk.ne_chunk(posTagged)
# returns (S (GPE New/NNP)(ORGANIZATION England/NNP Board/NNP) of/IN (PERSON Higher/NNP Education/NNP))
testString = 'New England Board of Higher Education'
unigrams = TokenizeOnWhitespacePunctuation(testString).getUnigrams()
posTagged = nltk.pos_tag(unigrams)
chunked = nltk.ne_chunk(posTagged)
开发者ID:kyajmiller,项目名称:Cerebro,代码行数:29,代码来源:TestClassifyBadScholarships.py
示例3: extractNE
def extractNE(sentence, withClass):
words = nltk.word_tokenize(sentence) # Extract words from sentence: Stopwords removed, punctuations removed
if withClass:
tree = nltk.ne_chunk(nltk.pos_tag(words), binary=False)
return extractNEwithClass(tree)
else:
tree = nltk.ne_chunk(nltk.pos_tag(words), binary=True)
return extractNEwithoutClass(tree)
开发者ID:nytlabs,项目名称:linguo,代码行数:8,代码来源:lookup.py
示例4: nameEntityExtract
def nameEntityExtract(document):
sentences = nltk.sent_tokenize(document)
sentences = [nltk.word_tokenize(sent) for sent in sentences]
sentences = [nltk.pos_tag(sent) for sent in sentences]
print sentences[0]
print "the length of sentences is: " + str(len(sentences))
sent = sentences[0]
print nltk.ne_chunk(sent,binary=True)
开发者ID:yuqiaoyan,项目名称:Python,代码行数:8,代码来源:nltkExtract.py
示例5: English_NER
def English_NER(sentence):
# 命名实体只被标注为NE
print '命名实体只被标注为NE:'
print nltk.ne_chunk(sentence, binary=True)
# 命名实体会添加类型标签,例如PERSON,ORGANIZATION,GPE等
print '命名实体会添加类型标签,例如PERSON,ORGANIZATION,GPE等:'
print nltk.ne_chunk(sentence)
开发者ID:littlewilliam,项目名称:Natural-Language-process,代码行数:8,代码来源:1_Named_Entity_Recognition.py
示例6: main
def main():
sent = nltk.corpus.treebank.tagged_sents()[22]
print "sent (nltk):", sent
#print nltk.ne_chunk(sent, binary=True)
#print nltk.ne_chunk(sent)
sent = ie_preprocess("""Injured personnel consisting of six Schlum employees were immediately transported
to nearby hospitals and most of them (were)
discharged after having received treatment""")
print sent
print nltk.ne_chunk(sent[0])
开发者ID:attibalazs,项目名称:nltk-examples,代码行数:11,代码来源:7.5_Named_Entity_Recognition.py
示例7: process_contents
def process_contents():
try:
for i in tokenized[5:]:
words = nltk.word_tokenize(i)
tagged = nltk.pos_tag(words)
namedEnt = nltk.ne_chunk(tagged) #White, House
namedEnt = nltk.ne_chunk(tagged, binary = True) #White House
namedEnt.draw()
except Exception as e:
print(str(e))
开发者ID:matt-ice,项目名称:python_nltk_tutorial,代码行数:11,代码来源:Unit+07+-+Named+Entry+Recognition.py
示例8: process_content
def process_content():
try:
for i in tokenized:
words = nltk.word_tokenize(i);
tagged = nltk.pos_tag(words)
namedEnt1 = nltk.ne_chunk(tagged) #Give all named entities with category
namedEnt2 = nltk.ne_chunk(tagged, binary=True) #This gives named entity without category
namedEnt2.draw()
except Exception as e:
print(str(e))
开发者ID:MaryamZi,项目名称:WSO2_PYTHON_NLTK,代码行数:13,代码来源:NamedEntityEcognition.py
示例9: process_content
def process_content():
for i in custom_tokenized[5:]:
words = word_tokenize(i)
tagged = nltk.pos_tag(words)
namedEnt = nltk.ne_chunk(tagged);
print(namedEnt)
开发者ID:jmarthernandez,项目名称:py-nltk,代码行数:7,代码来源:mlk.py
示例10: get_entities
def get_entities(self,sentences):
""" The function returns the dictionary containing the results for
the Name Entity Recognition analyze.
Args:
sentences: the sentences list.
Returns:
dictionary:
"""
entities = dict([])
# Tokenization
tokens = [nltk.tokenize.word_tokenize(s) for s in sentences]
# Part-Of-Speech tagging
pos_tagged_tokens = [nltk.pos_tag(t) for t in tokens]
# Chunking
chunked_nes = [nltk.ne_chunk(c) for c in pos_tagged_tokens]
for tree in chunked_nes:
for s in tree.subtrees(lambda t: (t.height()==2)):
if s.label()!='S':
entity = ' '.join(i[0] for i in s.leaves())
if s.label() in entities.keys():
if entity not in entities[s.label()]:
entities[s.label()].append(entity)
entities[s.label()].sort()
else:
entities[s.label()] = [entity]
return entities
开发者ID:gdamdam,项目名称:sumo,代码行数:33,代码来源:analyzer.py
示例11: parse_questions
def parse_questions():
print "Parsing Questions..."
parsed_questions = {}
with open(DIR+'/questions.txt', 'r') as f:
data = f.read()
questions = re.split('[\s]*</top>[\s]*', data)
if len(questions[-1].strip()) == 0: questions.pop()
qc = QuestionClassifier.QuestionClassifier()
for question in questions:
question_number = int(re.search(r"<num>[\s]*Number:[\s]*([0-9]+)", question).group(1))
question = re.search(r"<desc>[\s]*Description:[\s]*([a-zA-Z0-9\-\?\'\. ]+)", question).group(1)
question_words = nltk.word_tokenize(question)
question_pos = nltk.pos_tag(question_words)
question_nes = nltk.ne_chunk(question_pos)
question_tree = Chunker.chunker.parse(question_pos)
question_classification = qc.classify(question)
qwords, nouns, nes = [], [], []
for part in question_nes:
try:
nes.append((part.node, part.leaves()[0][0]))
except:
if part[1] == 'WP' or part[1] == 'WRB':
qwords.append(part[0])
elif part[1] == 'NN' or part[1] == 'NNP':
nouns.append(part[0])
# print qwords, nouns, nes
# print question_pos
parsed_questions[question_number] = { "question": question, "pos": question_pos, "ne": question_nes, "parse_tree": question_tree, "question_classification": question_classification, "question_words": qwords, "nouns": nouns, "ne_words": nes }
with open(DIR+'/parsed_questions.txt', 'wb') as f:
pickle.dump(parsed_questions, f)
开发者ID:jcccf,项目名称:cs4740,代码行数:30,代码来源:Parser.py
示例12: extract_normal_ne
def extract_normal_ne(self, text):
result = []
for sent in sent_tokenize(text) if text else []:
for chunk in ne_chunk(pos_tag(word_tokenize(sent))):
if hasattr(chunk, "node"):
result.append(" ".join([c[0] for c in chunk.leaves()]))
return result
开发者ID:rchiba,项目名称:HipTrip,代码行数:7,代码来源:linkage.py
示例13: extract_concepts
def extract_concepts(text):
"""
Uses the NLTK natural language processing library to
extract from a text the essential terms that appeared in it.
"""
try:
ignored_words = corpus.stopwords.words('english')
ignored_words.append("n't")
appeared = {}
concepts = []
tokenized = nltk.word_tokenize(text)
tagged = nltk.pos_tag(tokenized)
named_entities = nltk.ne_chunk(tagged)
for ne in named_entities.leaves():
#if ne[1] in ('NNS', 'NNP', 'NN'):
if len(ne[0]) > 2 and ne[0].lower() not in ignored_words and not (ne[0].startswith("http") or ne[0].startswith("//")):
name = ne[0]
if name in appeared:
continue
concepts.append(name)
appeared[name] = True
except:
print "extract concepts failed:", sys.exc_info()
return concepts
开发者ID:dibaunaumh,项目名称:ikana1010,代码行数:25,代码来源:views.py
示例14: ne_tag
def ne_tag(sentences):
tagged = raw_trigram_tag(sentences, tagger_file="tagger.pkl")[1]
fin = []
for tagged_sent in tagged:
# print tagged_sent
fin.append(nltk.ne_chunk(tagged_sent))
return fin
开发者ID:atokop,项目名称:compling,代码行数:7,代码来源:named_entity_exec.py
示例15: processor
def processor(data):
try:
tokenized = nltk.word_tokenize(data)
tagged = nltk.pos_tag(tokenized)
namedEnt = nltk.ne_chunk(tagged, binary=True)
entities = re.findall(r'NE\s(.*?)/', str(namedEnt))
# ('
descriptives = re.findall(r'\(\'(\w*)\'.\s\'JJ\w?\'',str(tagged))
if len(entities) > 1:
pass
elif len(entities) == 0:
pass
elif str(entities) == '_blank':
pass
else:
print 'Named: ', entities[0]
print 'Description: '
for eachDesc in descriptives:
print eachDesc
currentTime = time.time()
dateStamp = datetime.datetime.fromtimestamp(currentTime).strftime('%Y-%m-%d %H:%M:%S')
namedEntity = entities[0]
relatedWord = eachDesc
c.execute("INSERT INTO knowledgeBase (unix, dateStamp, namedEntity, relatedWord) VALUES (?,?,?,?)",
(currentTime, dateStamp, namedEntity, relatedWord))
conn.commit()
except Exception, e:
print 'failed in the first try of processor'
print str(e)
开发者ID:gavve,项目名称:twitter-sentiment-analysis,代码行数:35,代码来源:KnowledgeBase.py
示例16: recuperarEntidadesEn
def recuperarEntidadesEn(texto):
ObjTag = Tokenizar()
ObjDes = Desambiguar()
Lista = []
Lista2= []
for sentence in sent_tokenize(texto):
#print sentence
tags=ObjTag.tagear(sentence)
#tags=tagear(traducir(word_tokenize(sentence)))
print tags
parsed = ne_chunk(tags)
print parsed
for chunk in parsed:
#print chunk
#if hasattr(chunk, 'node'):
# print chunk.node
if hasattr(chunk, 'node'):
#print chunk
#print chunk.leaves()
Lista2.append(chunk.leaves()[0])
#print ' '.join(c[0] for c in chunk.leaves())
Lista.append (' '.join(c[0] for c in chunk.leaves()))
print Lista2
print ObjDes.DesambiguarTexto(Lista2, sentence)
Lista2=[]
return Lista
开发者ID:jholoc,项目名称:proyectoScrapy,代码行数:26,代码来源:ExtEntidadesEstable.py
示例17: get_NERs
def get_NERs(path_to_seg):
NER_dict = {} # map entities to counts (i.e., # of occurences in this seg)
NERs_to_types = {} # map the NERs to the kinds of things they are
seg_text = open(path_to_seg).read()
# strip *all* tags
seg_text = strip_tags(seg_text, get_tags_in_text(seg_text))
# tokenize, then POS text
pos_tagged_seg = nltk.pos_tag(nltk.word_tokenize(seg_text))
# and now the NER
NERd_seg = nltk.ne_chunk(pos_tagged_seg)
# kind of hacky, but this is how I'm parsing
# the induced tree structure
for subtree in NERd_seg:
# then this is an NER
if type(subtree) == nltk.tree.Tree:
# ignoring the *type* of NER for now -- i can't think of a
# case in which we'd care (typically, entities with the same
# name *ought* to be of the same type, I think...)
entity = subtree[0][0] # this parses out the token (entity) itself
entity_type = subtree.node
# if we've already encountered it, just bump the count
if entity in NER_dict:
NER_dict[entity] += 1
else:
NER_dict[entity] = 1
NERs_to_types[entity] = subtree.node ### going to assume we always get this correct, I guess
return NER_dict, NERs_to_types
开发者ID:julietteseive,项目名称:InfiniteJest2.0,代码行数:33,代码来源:jest.py
示例18: get_entities
def get_entities(self, document):
"""
Extract entities from a single document using the
nltk.tree.ne_chunk method
This method is called multiple times by the tranform method
:param document: a list of lists of tuples
:return entities: a list of comma-separated strings
"""
entities = []
for paragraph in document:
for sentence in paragraph:
# classifier chunk the sentences, adds category labels, e.g. PERSON
trees = ne_chunk(sentence)
# select only trees with the kinds of entities we want
for tree in trees:
if hasattr(tree, 'label'):
if tree.label() in self.labels:
# entities is a list, each entry is a list of entities
# for a document
entities.append(
' '.join([child[0].lower() for child in tree])
)
return entities
开发者ID:yokeyong,项目名称:atap,代码行数:25,代码来源:ner.py
示例19: question_processing
def question_processing(ques):
global corpus, name, list_query
list_query = []
# corpus=[]
speak(random.choice(choices) + ' ' + name, False)
# Step1: Generate all tokens
tokens = nltk.word_tokenize(ques)
# Step2: Part of Speech tagging of the question
pos_tags = nltk.pos_tag(tokens)
# Step3: Named Entity Recoginition of the POS Tags
pos_tree = nltk.ne_chunk(pos_tags)
# filter all query words
for i in pos_tags:
if i[1] == 'NNP' or i[1] == 'NN' or i[1] == 'JJ' or i[1] == 'JJS' or i[1] == 'NNS' or i[1] == 'VBZ' or i[
1] == 'RBS':
list_query.append(i[0])
# list_query)
collection_name = []
# Get the Matching List of Collection(DBs) where the answer could be.
for i in list_query:
if dict_collections.get(i.lower()):
collection_name.append(dict_collections[i.lower()])
# print(collection_name)
# Aggerate all the Documents from the list of Collections
db.cursor = db.questions.find()
corpus = []
for i in db.cursor:
for t in collection_name:
if t in i:
corpus.append(i[t])
开发者ID:shubhamgupta28,项目名称:Lexis,代码行数:35,代码来源:LexisBro.py
示例20: named_entities
def named_entities(text, types=None):
"""This functions returns named entities from a text.
Adapted from emh's code (http://stackoverflow.com/users/2673189/emh)
Parameters
----------
text: str
UTF-8 string
types: list of strings
Currently the list can include only "PERSON" and "ORGANIZATION"
Returns
-------
dict
Dictionary with one entry for each type of entity. For each of these
entries, contains a list of strings with found entities
"""
if not types:
types = ["PERSON", "ORGANIZATION"]
named_entities = {"PERSON": [], "ORGANIZATION": []}
tokens = nltk.tokenize.word_tokenize(text)
pos = nltk.pos_tag(tokens)
sentt = nltk.ne_chunk(pos, binary=False)
for type_ in types:
for subtree in sentt.subtrees(filter=lambda t: t.label() == type_):
entity = ""
for leaf in subtree.leaves():
entity = entity + " " + leaf[0]
named_entities[type_].append(entity.strip())
return named_entities
开发者ID:cgoldammer,项目名称:simple-text-webtool,代码行数:30,代码来源:text_model.py
注:本文中的nltk.ne_chunk函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论