• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python nltk.ne_chunk函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中nltk.ne_chunk函数的典型用法代码示例。如果您正苦于以下问题:Python ne_chunk函数的具体用法?Python ne_chunk怎么用?Python ne_chunk使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了ne_chunk函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: extract_entities2

def extract_entities2(text):
	entities = []
	
	"""t0 = nltk.DefaultTagger('NN')
	t1 = nltk.UnigramTagger(train_sents, backoff=t0)
	t2 = nltk.BigramTagger(train_sents, backoff=t1)
	t2.evaluate(test_sents)"""
	
	for sentence in sent_tokenize(text):
	    #print pos_tag(nltk.word_tokenize(sentence))
	    print sentence
	    tags=pos_tag(nltk.word_tokenize(sentence))
	    tags=tagear(tags)
	    chunks = ne_chunk(pos_tag(nltk.word_tokenize(sentence)))
	    #chunks = ne_chunk(regexp_tagger.tag((nltk.word_tokenize(text))))
	    chunks = ne_chunk(tags)
	    #chunks.draw()
	    #print chunks
	    for chunk in chunks:
	    	#print chunk
	    	#if hasattr(chunk, 'node'):
	    	#	print chunk.node
	    	if hasattr(chunk, 'node') :
	    		print chunk	
	    		entities.extend([chunk for chunk in chunks if hasattr(chunk, 'node')])
	return entities
开发者ID:jholoc,项目名称:proyectoScrapy,代码行数:26,代码来源:Tokenizacion.py


示例2: test_nltkNERParsing

    def test_nltkNERParsing(self):
        testString = 'Natural Sciences and Engineering Research Council of Canada'
        unigrams = TokenizeOnWhitespacePunctuation(testString, keepCaps=True).getUnigrams()
        posTagged = nltk.pos_tag(unigrams)
        chunked = nltk.ne_chunk(posTagged)
        getGPEs = []

        for treeBranch in chunked:
            if hasattr(treeBranch, 'label') and treeBranch.label() == 'GPE':
                getGPEs.append(str(treeBranch))

        self.assertEqual(1, len(getGPEs))

        testString = 'Milwaukee Foundation'
        unigrams = TokenizeOnWhitespacePunctuation(testString, keepCaps=True).getUnigrams()
        posTagged = nltk.pos_tag(unigrams)
        chunked = nltk.ne_chunk(posTagged)
        # returns (S (PERSON Milwaukee/NNP) (ORGANIZATION Foundation/NNP))

        testString = 'New England Board of Higher Education'
        unigrams = TokenizeOnWhitespacePunctuation(testString, keepCaps=True).getUnigrams()
        posTagged = nltk.pos_tag(unigrams)
        chunked = nltk.ne_chunk(posTagged)
        # returns (S (GPE New/NNP)(ORGANIZATION England/NNP Board/NNP) of/IN (PERSON Higher/NNP Education/NNP))

        testString = 'New England Board of Higher Education'
        unigrams = TokenizeOnWhitespacePunctuation(testString).getUnigrams()
        posTagged = nltk.pos_tag(unigrams)
        chunked = nltk.ne_chunk(posTagged)
开发者ID:kyajmiller,项目名称:Cerebro,代码行数:29,代码来源:TestClassifyBadScholarships.py


示例3: extractNE

def extractNE(sentence, withClass):
    words = nltk.word_tokenize(sentence)  # Extract words from sentence: Stopwords removed, punctuations removed
    if withClass:
        tree = nltk.ne_chunk(nltk.pos_tag(words), binary=False)
        return extractNEwithClass(tree)
    else:
        tree = nltk.ne_chunk(nltk.pos_tag(words), binary=True)
        return extractNEwithoutClass(tree)
开发者ID:nytlabs,项目名称:linguo,代码行数:8,代码来源:lookup.py


示例4: nameEntityExtract

def nameEntityExtract(document):
	sentences = nltk.sent_tokenize(document)
	sentences = [nltk.word_tokenize(sent) for sent in sentences]
	sentences = [nltk.pos_tag(sent) for sent in sentences]
	print sentences[0]
	print "the length of sentences is: " + str(len(sentences))
	sent = sentences[0]
	print nltk.ne_chunk(sent,binary=True)
开发者ID:yuqiaoyan,项目名称:Python,代码行数:8,代码来源:nltkExtract.py


示例5: English_NER

def English_NER(sentence):
    # 命名实体只被标注为NE
    print '命名实体只被标注为NE:'
    print nltk.ne_chunk(sentence, binary=True)

    # 命名实体会添加类型标签,例如PERSON,ORGANIZATION,GPE等
    print '命名实体会添加类型标签,例如PERSON,ORGANIZATION,GPE等:'
    print nltk.ne_chunk(sentence)
开发者ID:littlewilliam,项目名称:Natural-Language-process,代码行数:8,代码来源:1_Named_Entity_Recognition.py


示例6: main

def main():
    sent = nltk.corpus.treebank.tagged_sents()[22]
    print "sent (nltk):", sent
    #print nltk.ne_chunk(sent, binary=True)
    #print nltk.ne_chunk(sent)

    sent = ie_preprocess("""Injured personnel consisting of six Schlum employees were immediately transported
                        to nearby hospitals and most of them (were)
                        discharged after having received treatment""")
    print sent
    print nltk.ne_chunk(sent[0])
开发者ID:attibalazs,项目名称:nltk-examples,代码行数:11,代码来源:7.5_Named_Entity_Recognition.py


示例7: process_contents

def process_contents():
    try:
        for i in tokenized[5:]:
            words = nltk.word_tokenize(i)
            tagged = nltk.pos_tag(words)
            namedEnt = nltk.ne_chunk(tagged) #White, House
            namedEnt = nltk.ne_chunk(tagged, binary = True) #White House
            namedEnt.draw()

    except Exception as e:
        print(str(e))
开发者ID:matt-ice,项目名称:python_nltk_tutorial,代码行数:11,代码来源:Unit+07+-+Named+Entry+Recognition.py


示例8: process_content

def process_content():
    try:
        for i in tokenized:
            words = nltk.word_tokenize(i);
            tagged = nltk.pos_tag(words)

            namedEnt1 = nltk.ne_chunk(tagged) #Give all named entities with category
            namedEnt2 = nltk.ne_chunk(tagged, binary=True) #This gives named entity without category

            namedEnt2.draw()

    except Exception as e:
        print(str(e))
开发者ID:MaryamZi,项目名称:WSO2_PYTHON_NLTK,代码行数:13,代码来源:NamedEntityEcognition.py


示例9: process_content

def process_content():
    for i in custom_tokenized[5:]:
        words = word_tokenize(i)
        tagged = nltk.pos_tag(words)
        namedEnt = nltk.ne_chunk(tagged);

        print(namedEnt)
开发者ID:jmarthernandez,项目名称:py-nltk,代码行数:7,代码来源:mlk.py


示例10: get_entities

	def get_entities(self,sentences):
		""" The function returns the dictionary containing the results for
		the Name Entity Recognition analyze.

		Args:
		   sentences: the sentences list.

		Returns:
			dictionary:
		"""
		entities = dict([])

		# Tokenization
		tokens = [nltk.tokenize.word_tokenize(s) for s in sentences]

		# Part-Of-Speech tagging
		pos_tagged_tokens = [nltk.pos_tag(t) for t in tokens]

		# Chunking
		chunked_nes = [nltk.ne_chunk(c) for c in pos_tagged_tokens]

		for tree in chunked_nes:
			for s in tree.subtrees(lambda t: (t.height()==2)):
				if s.label()!='S':
					entity = ' '.join(i[0] for i in s.leaves())
					if s.label() in entities.keys():
						if entity not in entities[s.label()]:
							entities[s.label()].append(entity)
							entities[s.label()].sort()
					else:	
						entities[s.label()] = [entity]

		return entities
开发者ID:gdamdam,项目名称:sumo,代码行数:33,代码来源:analyzer.py


示例11: parse_questions

def parse_questions():
  print "Parsing Questions..."
  parsed_questions = {}
  with open(DIR+'/questions.txt', 'r') as f:
    data = f.read()
    questions = re.split('[\s]*</top>[\s]*', data)
    if len(questions[-1].strip()) == 0: questions.pop()
    qc = QuestionClassifier.QuestionClassifier()
    for question in questions:
      question_number = int(re.search(r"<num>[\s]*Number:[\s]*([0-9]+)", question).group(1))
      question = re.search(r"<desc>[\s]*Description:[\s]*([a-zA-Z0-9\-\?\'\. ]+)", question).group(1)
      question_words = nltk.word_tokenize(question)
      question_pos = nltk.pos_tag(question_words)
      question_nes = nltk.ne_chunk(question_pos)
      question_tree = Chunker.chunker.parse(question_pos)
      question_classification = qc.classify(question)
      qwords, nouns, nes = [], [], []
      for part in question_nes:
        try:
          nes.append((part.node, part.leaves()[0][0]))
        except:
          if part[1] == 'WP' or part[1] == 'WRB':
            qwords.append(part[0])
          elif part[1] == 'NN' or part[1] == 'NNP':
            nouns.append(part[0])
      # print qwords, nouns, nes
      # print question_pos
      parsed_questions[question_number] = { "question": question, "pos": question_pos, "ne": question_nes, "parse_tree": question_tree, "question_classification": question_classification, "question_words": qwords, "nouns": nouns, "ne_words": nes }
  with open(DIR+'/parsed_questions.txt', 'wb') as f:
    pickle.dump(parsed_questions, f)
开发者ID:jcccf,项目名称:cs4740,代码行数:30,代码来源:Parser.py


示例12: extract_normal_ne

 def extract_normal_ne(self, text):
     result = []
     for sent in sent_tokenize(text) if text else []:
         for chunk in ne_chunk(pos_tag(word_tokenize(sent))):
             if hasattr(chunk, "node"):
                 result.append(" ".join([c[0] for c in chunk.leaves()]))
     return result
开发者ID:rchiba,项目名称:HipTrip,代码行数:7,代码来源:linkage.py


示例13: extract_concepts

def extract_concepts(text):
    """
    Uses the NLTK natural language processing library to 
    extract from a text the essential terms that appeared in it.
    """
    try:
        ignored_words = corpus.stopwords.words('english')
        ignored_words.append("n't")
        appeared = {}
        concepts = []
        tokenized = nltk.word_tokenize(text)
        tagged = nltk.pos_tag(tokenized)
        named_entities = nltk.ne_chunk(tagged)
        
        for ne in named_entities.leaves():
            #if ne[1] in ('NNS', 'NNP', 'NN'):
            if len(ne[0]) > 2 and ne[0].lower() not in ignored_words and not (ne[0].startswith("http") or ne[0].startswith("//")):
                name = ne[0]
                if name in appeared:
                    continue
                concepts.append(name)
                appeared[name] = True
    except:
        print "extract concepts failed:", sys.exc_info()
    return concepts
开发者ID:dibaunaumh,项目名称:ikana1010,代码行数:25,代码来源:views.py


示例14: ne_tag

def ne_tag(sentences):
    tagged = raw_trigram_tag(sentences, tagger_file="tagger.pkl")[1]
    fin = []
    for tagged_sent in tagged:
        # print tagged_sent
        fin.append(nltk.ne_chunk(tagged_sent))
    return fin
开发者ID:atokop,项目名称:compling,代码行数:7,代码来源:named_entity_exec.py


示例15: processor

def processor(data):
    try:
        tokenized = nltk.word_tokenize(data)
        tagged = nltk.pos_tag(tokenized)
        namedEnt = nltk.ne_chunk(tagged, binary=True)

        entities = re.findall(r'NE\s(.*?)/', str(namedEnt))
        #     ('
        descriptives = re.findall(r'\(\'(\w*)\'.\s\'JJ\w?\'',str(tagged))
        if len(entities) > 1:
            pass
        elif len(entities) == 0:
            pass
        elif str(entities) == '_blank':
            pass
        else:
            print 'Named: ', entities[0]
            print 'Description: '
            for eachDesc in descriptives:
                print eachDesc
                currentTime = time.time()
                dateStamp = datetime.datetime.fromtimestamp(currentTime).strftime('%Y-%m-%d %H:%M:%S')
                namedEntity = entities[0]
                relatedWord = eachDesc
                c.execute("INSERT INTO knowledgeBase (unix, dateStamp, namedEntity, relatedWord) VALUES (?,?,?,?)",
                          (currentTime, dateStamp, namedEntity, relatedWord))

                conn.commit()


                

    except Exception, e:
        print 'failed in the first try of processor'
        print str(e)
开发者ID:gavve,项目名称:twitter-sentiment-analysis,代码行数:35,代码来源:KnowledgeBase.py


示例16: recuperarEntidadesEn

	def recuperarEntidadesEn(texto):
		ObjTag = Tokenizar()
		ObjDes = Desambiguar()
		Lista = []
		Lista2= []
		for sentence in sent_tokenize(texto):
			#print sentence  
			tags=ObjTag.tagear(sentence)
			#tags=tagear(traducir(word_tokenize(sentence)))
			print tags
			parsed = ne_chunk(tags)
			print parsed
			for chunk in parsed:
				#print chunk
				#if hasattr(chunk, 'node'):
				#	print chunk.node
				if hasattr(chunk, 'node'):
					#print chunk	
					#print chunk.leaves()
					Lista2.append(chunk.leaves()[0])
					#print ' '.join(c[0] for c in chunk.leaves())
					Lista.append (' '.join(c[0] for c in chunk.leaves()))
			print Lista2
			print ObjDes.DesambiguarTexto(Lista2, sentence)
			Lista2=[]
		return Lista
开发者ID:jholoc,项目名称:proyectoScrapy,代码行数:26,代码来源:ExtEntidadesEstable.py


示例17: get_NERs

def get_NERs(path_to_seg):
    NER_dict = {} # map entities to counts (i.e., # of occurences in this seg)
    NERs_to_types = {} # map the NERs to the kinds of things they are

    seg_text = open(path_to_seg).read()
    
    # strip *all* tags 
    seg_text = strip_tags(seg_text, get_tags_in_text(seg_text))

    # tokenize, then POS text
    pos_tagged_seg = nltk.pos_tag(nltk.word_tokenize(seg_text))

    # and now the NER
    NERd_seg = nltk.ne_chunk(pos_tagged_seg)

    # kind of hacky, but this is how I'm parsing
    # the induced tree structure
    for subtree in NERd_seg:
        # then this is an NER
        if type(subtree) == nltk.tree.Tree:
            # ignoring the *type* of NER for now -- i can't think of a
            # case in which we'd care (typically, entities with the same
            # name *ought* to be of the same type, I think...)
            entity = subtree[0][0] # this parses out the token (entity) itself
            entity_type = subtree.node
            # if we've already encountered it, just bump the count
            if entity in NER_dict:
                NER_dict[entity] += 1
            else:
                NER_dict[entity] = 1
                NERs_to_types[entity] = subtree.node ### going to assume we always get this correct, I guess
    
    return NER_dict, NERs_to_types
开发者ID:julietteseive,项目名称:InfiniteJest2.0,代码行数:33,代码来源:jest.py


示例18: get_entities

    def get_entities(self, document):
        """
        Extract entities from a single document using the
        nltk.tree.ne_chunk method

        This method is called multiple times by the tranform method

        :param document: a list of lists of tuples
        :return entities: a list of comma-separated strings
        """
        entities = []
        for paragraph in document:
            for sentence in paragraph:
                # classifier chunk the sentences, adds category labels, e.g. PERSON
                trees = ne_chunk(sentence)
                # select only trees with the kinds of entities we want
                for tree in trees:
                    if hasattr(tree, 'label'):
                        if tree.label() in self.labels:
                            # entities is a list, each entry is a list of entities
                            # for a document
                            entities.append(
                                ' '.join([child[0].lower() for child in tree])
                                )
        return entities
开发者ID:yokeyong,项目名称:atap,代码行数:25,代码来源:ner.py


示例19: question_processing

def question_processing(ques):
    global corpus, name, list_query
    list_query = []
    # corpus=[]
    speak(random.choice(choices) + ' ' + name, False)
    # Step1: Generate all tokens
    tokens = nltk.word_tokenize(ques)
    # Step2: Part of Speech tagging of the question
    pos_tags = nltk.pos_tag(tokens)
    # Step3: Named Entity Recoginition of the POS Tags
    pos_tree = nltk.ne_chunk(pos_tags)

    # filter all query words
    for i in pos_tags:
        if i[1] == 'NNP' or i[1] == 'NN' or i[1] == 'JJ' or i[1] == 'JJS' or i[1] == 'NNS' or i[1] == 'VBZ' or i[
            1] == 'RBS':
            list_query.append(i[0])
    # list_query)

    collection_name = []

    # Get the Matching List of Collection(DBs) where the answer could be.
    for i in list_query:
        if dict_collections.get(i.lower()):
            collection_name.append(dict_collections[i.lower()])

    # print(collection_name)

    # Aggerate all the Documents from the list of Collections
    db.cursor = db.questions.find()
    corpus = []
    for i in db.cursor:
        for t in collection_name:
            if t in i:
                corpus.append(i[t])
开发者ID:shubhamgupta28,项目名称:Lexis,代码行数:35,代码来源:LexisBro.py


示例20: named_entities

def named_entities(text, types=None):
    """This functions returns named entities from a text.
    Adapted from emh's code (http://stackoverflow.com/users/2673189/emh)

    Parameters
    ----------
    text: str
        UTF-8 string
    types: list of strings
        Currently the list can include only "PERSON" and "ORGANIZATION"

    Returns
    -------
    dict
        Dictionary with one entry for each type of entity. For each of these 
        entries, contains a list of strings with found entities
    """
    if not types:
        types = ["PERSON", "ORGANIZATION"]
    named_entities = {"PERSON": [], "ORGANIZATION": []}
    tokens = nltk.tokenize.word_tokenize(text)
    pos = nltk.pos_tag(tokens)
    sentt = nltk.ne_chunk(pos, binary=False)
    for type_ in types:
        for subtree in sentt.subtrees(filter=lambda t: t.label() == type_):
            entity = ""
            for leaf in subtree.leaves():
                entity = entity + " " + leaf[0]
            named_entities[type_].append(entity.strip())
    return named_entities
开发者ID:cgoldammer,项目名称:simple-text-webtool,代码行数:30,代码来源:text_model.py



注:本文中的nltk.ne_chunk函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python nltk.ne_chunk_sents函数代码示例发布时间:2022-05-27
下一篇:
Python nltk.download函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap