Python tagger.Tagger类代码示例

OGeek|极客世界-中国程序员成长平台 › 门户 › 编程› Python›Python编程经验

原作者: [db:作者] 来自: [db:来源] 收藏邀请

本文整理汇总了Python中tagger.Tagger类的典型用法代码示例。如果您正苦于以下问题：Python Tagger类的具体用法？Python Tagger怎么用？Python Tagger使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了Tagger类的20个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: main_tag

def main_tag(featureSet, options):
    labelCounter, featCounter = BookKeeper(), BookKeeper()
    labelCounter.readFromFile('{0}.labelNumbers'.format(options.modelName))
    featCounter.readFromFile('{0}.featureNumbers'.format(options.modelName))
    optionsDict = vars(options)
    optionsDict['labelCounter'] = labelCounter
    optionsDict['featCounter'] = featCounter
    optionsDict['modelFile'] = '{0}.model'.format(options.modelName)
    tagger = Tagger(featureSet, optionsDict)
    if options.inFeatFile:
        tagger_func = lambda: tagger.tag_features(options.inFeatFile)
        writer_func = lambda s, c: writeSentence(s, comment=c)
    elif options.input_dir:
        assert isdir(options.input_dir), "--input-dir must be a directory"
        out_dir = "{}_out".format(options.input_dir)
        os.mkdir(out_dir)
        tagger_func = lambda: tagger.tag_dir(options.input_dir)
        writer_func = lambda s, c: writeSentence(
            s, out=open(join(out_dir, '{}.tagged'.format(c)), 'a'))
    else:
        tagger_func = lambda: tagger.tag_corp(sys.stdin)
        writer_func = lambda s, c: writeSentence(s, comment=c)

    for sen, other in tagger_func():
        writer_func(sen, other)

开发者ID:gabor-recski，项目名称:HunTag，代码行数:25，代码来源:huntag.py

示例2: genKeyWords

 def genKeyWords(self, question):
   questionToken = self.preProcess(question)
   tagger = Tagger('portugues')
   token = tagger.classify(questionToken)
   keyList = []
   for tok in token:
     if tok[1] == 'N' or re.match('ADJ', tok[1]) or re.match('V', tok[1]):
       keyList.append(tok)
   print keyList
   print len(keyList)
   return keyList

开发者ID:DarkCoder1，项目名称:FriggTest--v0.9.1，代码行数:11，代码来源:question.py

示例3: mainTag

def mainTag(featureSet, options):
    transModel = None
    if not (options['printWeights'] or options['toCRFsuite']):
        print('loading transition model...', end='', file=sys.stderr, flush=True)
        transModel = TransModel.getModelFromFile(options['transModelFileName'])
        print('done', file=sys.stderr, flush=True)

    tagger = Tagger(featureSet, transModel, options)
    if 'inFeatFile' in options and options['inFeatFile']:
        # Tag a featurized file to to outputStream
        for sen, comment in tagger.tagFeatures(options['inFeatFile']):
            writeSentence(sen, options['outputStream'], comment)
    elif 'ioDirs' in options and options['ioDirs']:
        # Tag all files in a directory file to to fileName.tagged
        for sen, fileName in tagger.tagDir(options['ioDirs'][0]):
            writeSentence(sen, open(join(options['ioDirs'][1], '{0}.tagged'.format(fileName)), 'a', encoding='UTF-8'))
    elif 'toCRFsuite' in options and options['toCRFsuite']:
        # Make CRFsuite format to outputStream for tagging
        tagger.toCRFsuite(options['inputStream'], options['outputStream'])
    elif 'printWeights' in options and options['printWeights']:
        # Print MaxEnt weights to STDOUT
        tagger.printWeights(options['printWeights'], options['outputStream'])
    else:
        # Tag inputStream to outputStream
        for sen, comment in tagger.tagCorp(options['inputStream']):
            writeSentence(sen, options['outputStream'], comment)

开发者ID:dlt-rilmta，项目名称:hunlp-GATE，代码行数:26，代码来源:huntag.py

示例4: gen_opt

 def gen_opt(self, file_text):
     '''
     método que gera o novo texto.
     cada palavra é classificada e concatenada com o tipo através do caractere /
     depois, é concatenado com o retorno do synset.
     '''
     tagger = Tagger('portugues')
     tok = word_tokenize(file_text.read().decode('utf-8'))
     clas = tagger.classify(tok)
     p_text = []
     for c in clas:
         if c[1] == 'N' or re.match('ADJ',c[1]) or re.match('V',c[1]) or c[1] == '.':
             gen_set = self.gen_synset(c)
             p_text.append(gen_set)
     optimized_text = ' '.join(p_text)
     return optimized_text

开发者ID:DarkCoder1，项目名称:FriggTest--v0.9.1，代码行数:16，代码来源:backend.py

示例5: init

class SearchEngine:
    """
    classe que considero a principal desse modulo. É a estrutura de dados que
    contém os arquivos para o processamento da engine.
    """

    def __init__(self):
        self.tagger = Tagger('portugues')

    def insert(self, files):
        """
        Esse método tem como entrada um array de arquivos, retornando uma lista de indexação reversa.
        """
        dataset = []
        for f in files:
            paragraph = sent_tokenize(f[1].lower())
            for index, p in enumerate(paragraph):
                words = word_tokenize(p)
                classes = self.tagger.classify(words)
                for c in classes:
                    if re.match('N', c[1]):
                        keysId = [item['_id'] for item in dataset]
                        print 'qtd chaves: ' + str(len(keysId))
                        if c[0] in keysId:
                            ind = keysId.index(c[0])
                            files = dataset[ind]
                            if os.path.basename(f[0]) in files.keys():
                                if not index in dataset[ind][os.path.basename(f[0])]:
                                    dataset[ind][os.path.basename(f[0])].append(index)
                            else:
                                dataset[ind][os.path.basename(f[0])] = [index]
                        else:
                            dataset.append({'_id':c[0], os.path.basename(f[0]):[index]})
        return dataset

    def extract(self, data):
        """
        Algoritmo de busca simples que retorna um tupla com os arquivos.
        """
        print 'vim pelo método extract'
        for d in data:
            paragraphs = []
            try:
                d.pop('_id')
            except KeyError:
                'ok'
            for k in d.keys(): # o [1:] para eliminar a primeira chave!
                #path_name = os.path.abspath('backend')
                path_name = os.path.abspath('backend') + '/texts/'
                #text = open(path_name+'\\texts\\'+ k + '.txt').read().decode('utf-8')
               # print path_name+'\\'+ k + '.txt'
                text = open(path_name+ k + '.txt').read().decode('utf-8')
                text_sent = sent_tokenize(text)
                for index in d[k]:
                    paragraphs.append(text_sent[index])
        print paragraphs
        return set(paragraphs)

开发者ID:DarkCoder1，项目名称:frigg-demo-1，代码行数:57，代码来源:TextDAO.py

示例6: pos_tag

def pos_tag(tweets):
    """
    Uses the POS tagger interface to tag part-of-speech in all the tweets texts, stores it as dict in the tweet objects.
    """
    print "Tagging..."
    untagged_texts = []
    for tweet in tweets:
        tagger = Tagger()
        textbody = tweet.text
        for phrase in re.split("\.|!|\?", textbody):
            if len(phrase)<2: continue
            phrase = string.replace(phrase, "?", "")
            phrase = string.replace(phrase, "!", "")
            phrase = string.replace(phrase, ".", "")
            tags = tagger.tag_text(phrase)
            if tags!=None:
                tweet.tagged_words.append(tags)
    print "Untagged texts: "
    for text in untagged_texts:
        print text
    print "Tagging done."
    return tweets

开发者ID:JohnArneOye，项目名称:twitter-sentiment，代码行数:22，代码来源:preprocessing.py

示例7: init

    def __init__(self, p):
        self.save_dir, _ = generate_directory(p.save_to)
        self.p = p

        print_p(self.p)

        self.tagger = Tagger.create_tagger(self.p)

        if 'load_from' in self.p and (self.p.load_from is not None):
            self.load_model(self.p.load_from)

        logger.info('Setting up data...')
        self.streams = setup_data(self.p, use_unlabeled=True, use_labeled=True)

开发者ID:CuriousAI，项目名称:tagger，代码行数:13，代码来源:tagger_exp.py

示例8: main

def main(training_file, training_dir, load_model, skip_train):
    logging.debug('Initializing random seed to 0.')
    random.seed(0)
    np.random.seed(0)

    if load_model:
        tagger = Tagger.load(load_model)
        data = TaggingDataset.load_from_file(training_file, vocab=tagger.vocab, tags=tagger.tags)
    else:
        assert not skip_train, 'Cannot --skip_train without a saved model.'
        logging.debug('Loading dataset from: %s' % training_file)
        data = TaggingDataset.load_from_file(training_file)
        logging.debug('Initializing model.')
        tagger = Tagger(data.vocab, data.tags)

    if not skip_train:
        train_data, dev_data = data.split(0.7)

        batches_train = train_data.prepare_batches(n_seqs_per_batch=10)
        batches_dev = dev_data.prepare_batches(n_seqs_per_batch=100)

        train_mgr = TrainingManager(
            avg_n_losses=len(batches_train),
            training_dir=training_dir,
            tagger_taste_fn=lambda: taste_tagger(tagger, batches_train),
            tagger_dev_eval_fn=lambda: eval_tagger(tagger, batches_dev),
            tagger_save_fn=lambda fname: tagger.save(fname)
        )

        logging.debug('Starting training.')
        while train_mgr.should_continue():
            mb_x, mb_y = random.choice(batches_train)
            mb_loss = tagger.learn(mb_x, mb_y)

            train_mgr.tick(mb_loss=mb_loss)

    evaluate_tagger_and_writeout(tagger)

开发者ID:oplatek，项目名称:rh_nntagging，代码行数:37，代码来源:run.py

示例9: generation

 def generation(self):
     self.tokenized = [nltk.word_tokenize(self.sentences[i]) for i in range(len(self.sentences))]
     self.generate_average_position()
     self.types = {}
     tagger = Tagger(False)
     for i in range(len(self.tokenized)):
         typess = tagger.tag_sent(self.tokenized[i])
         for j in range(len(typess)):
             word,val = typess[j]
             if(not word in self.types):
                 self.types[word] = []
                 self.types[word].append(val)
             else:
                 self.types[word].append(val)
     for element in self.types:
         most_common,num_most_common = Counter(self.types[element]).most_common(1)[0] 
         self.types[element] =most_common
     num_sent  = 1 
     for sent in self.tokenized:
         actual = sent
         num_word = 1
         last = None
         for mot in actual:
             actual = None
             if(not self.isWordIn(mot)):
                 tmp = Etiquette(mot,num_sent,num_word)
                 self.nodes.append(tmp)
                 actual = tmp
             else:
                  actual = self.get_node_with_value(mot)
                  actual.add_sid_pid(num_sent,num_word)
             if(num_word>1):
                 last.add_next(actual.get_id())
             last = actual
             num_word +=1
         num_sent +=1

开发者ID:Nicolas99-9，项目名称:TERApprentissage，代码行数:36，代码来源:summurization3.py

示例10: testExpressionValidatorPlusAndAsterisk

    def testExpressionValidatorPlusAndAsterisk(self):
        with self.assertRaises(SystemExit) as exit_val:
            Tagger.validateExpressions(['a+*b'])

        self.assertEqual(exit_val.exception.code, 4)

开发者ID:jvitasek，项目名称:VUT-FIT-IPP-Proj3，代码行数:5，代码来源:test.py

示例11: testExpressionValidatorNegationAndNegation

    def testExpressionValidatorNegationAndNegation(self):
        with self.assertRaises(SystemExit) as exit_val:
            Tagger.validateExpressions(['a!!b'])

        self.assertEqual(exit_val.exception.code, 4)

开发者ID:jvitasek，项目名称:VUT-FIT-IPP-Proj3，代码行数:5，代码来源:test.py

示例12: testExpressionValidatorTwoDots

    def testExpressionValidatorTwoDots(self):
        with self.assertRaises(SystemExit) as exit_val:
            Tagger.validateExpressions(['a..b'])

        self.assertEqual(exit_val.exception.code, 4)

开发者ID:jvitasek，项目名称:VUT-FIT-IPP-Proj3，代码行数:5，代码来源:test.py