• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python pymorphy.get_morph函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中pymorphy.get_morph函数的典型用法代码示例。如果您正苦于以下问题:Python get_morph函数的具体用法?Python get_morph怎么用?Python get_morph使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了get_morph函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: formalize

def formalize(filename, morph_dict=DICTS_DIR, stop_dict=stop_dict_path):
    morph = get_morph(morph_dict)
    stop_words = get_stop_words()
    dict = {}
    words = 0.0
    try:
        f = open(filename)
    except IOError:
        raise
    for line in f.readlines():
        for word in line.split():
            word = word.decode("utf-8", 'ignore')
            word = word.strip(u'[,.:;\"\')$«»(?<>!-_—//=]\n\t')
            word = word.replace('.', '_')
            word = morph.normalize(word.upper())
            if isinstance(word, set):
                word = word.pop()
            else:
                continue
            word = word.lower()
            words += 1
            if word in stop_words or not word:
                continue
            if not word in dict:
                dict[word] = 1.0
            else:
                dict[word] += 1.0
    for key in dict:
        dict[key] /= words
    return dict
开发者ID:xamvz,项目名称:deep_vlom,代码行数:30,代码来源:preprocessor.py


示例2: post

    def post(self, request):
        morph = get_morph(join(settings.PROJECT_DIR, 'morph'))
        last_name = self.DATA['last_name']
        first_name = self.DATA['first_name']
        patronymic = self.DATA['patronymic']

        # для склонения фамилии надо определить пол
        try:
            sex = morph.get_graminfo(first_name.upper())[0]['info'].split(',', 1)[0]
        except IndexError:
            # get_graminfo() вернул []
            # print 'get_graminfo failed on ', first_name
            sex = u'жр'
        # фамилия
        last_name_inflated = firstcaps(lastnames_ru.inflect(morph,
            last_name.upper(), sex + u',дт'))
        # имя
        first_name_inflated = firstcaps(morph.inflect_ru(first_name.upper(), u'дт'))
        # отчество
        patronymic_inflated = firstcaps(morph.inflect_ru(patronymic.upper(), sex + u',дт'))
        return {
            'last_name': last_name_inflated,
            'first_name': first_name_inflated,
            'patronymic': patronymic_inflated,
            'user': self.get_user(),
        }
开发者ID:summerisgone,项目名称:umc-kurator,代码行数:26,代码来源:resources.py


示例3: __init__

 def __init__(self, corpus=None):
     """Initialize your data structures in the constructor."""
     self.unigramCounts = collections.defaultdict(lambda: 0)
     self.bigramCounts = collections.defaultdict(lambda: 0)
     self.V = 0
     self.morph = get_morph(DICTS_DIR)
     if corpus:
         self.train(corpus)
开发者ID:xamvz,项目名称:deep_vlom,代码行数:8,代码来源:LaplaceBigramLanguageModel.py


示例4: __init__

 def __init__(self, page, working_list, mutex, dbname = 'crawler_db'):
     Db_manager.__init__(self, dbname)
     threading.Thread.__init__(self)
     self.working_list = working_list
     self.page = page
     self.mutex = mutex
     self.morph = get_morph('dicts')
     with self.mutex:
         working_list.append(page)
开发者ID:Qolt,项目名称:search_engine,代码行数:9,代码来源:crawler.py


示例5: download_morph

def download_morph():
    # скачиваем и используем словарь для получения грамматической информации о слове (часть речи)
    path_to_dictionary = os.path.realpath(os.path.curdir)
    morph_path = join(path_to_dictionary, 'morph_dicts')
    if not os.path.exists(morph_path):
        subprocess.call(['wget', 'https://bitbucket.org/kmike/pymorphy/downloads/ru.sqlite-json.zip'])
        subprocess.call(['unzip', 'ru.sqlite-json.zip', '-d', 'morph_dicts'])
    morph = get_morph(morph_path)
    return morph
开发者ID:kewtree1408,项目名称:search,代码行数:9,代码来源:3.py


示例6: get

    def get(self, request, *args, **kwargs):
        params = request.GET
        COUNT_ELEMENTS = 5
        errors = []

        limit = COUNT_ELEMENTS
        offset = 0

        form = forms.SearchForm(params)
        if form.is_valid():
            #pointsreq = MainModels.Person.objects;

            name = form.cleaned_data.get("s")
            users_list = []
            morph = get_morph('/home/tenoclock/yasenput/dicts')
            if name:
                #pointsreq = MainModels.Person.search.query(params.get("s"))
                #search = SphinxSearch()
                search = SphinxQuerySet(index="auth_user")
                name_morph = morph.normalize(name.upper())
                file1 = open('file1.txt','w')
                file1.write(str(list(name_morph)))
                file1.close()
                phrase_list = name.split(' ')
                for phrase in phrase_list:
                    if phrase != '':
                        name_morph = morph.normalize(phrase.upper())
                        for name_m in name_morph:
                            search_query = search.query(name_m)
                            for splited_item in search_query:
                                if not MainModels.Person.objects.get(id = splited_item['id']) in users_list:
                                   users_list.append(MainModels.Person.objects.get(id = splited_item['id']))





            content = form.cleaned_data.get("content")
            if content == 'new':
                pointsreq  = pointsreq.order_by('-id')
            elif content == "popular":
                pointsreq  = pointsreq.annotate(usfiliwers=Count('followers__id')).order_by('-usfiliwers', '-id')
            else:
                pointsreq  = users_list


            points = users_list[offset:limit]

            YpJson = YpSerialiser()
            return HttpResponse(YpJson.serialize(points, fields=("username", "first_name", "last_name")),
                                mimetype="application/json")
        else:
            e = form.errors
            for er in e:
                errors.append(er +':'+e[er][0])
            return JsonHTTPResponse({"status": 0, "txt": ", ".join(errors)});
开发者ID:zaebee,项目名称:yasenput,代码行数:56,代码来源:views.py


示例7: __init__

 def __init__(self):
     super(NumWordRU,self).__init__()
     # initializing morphology module for inflecting
     from pymorphy import get_morph
     import ConfigParser
     config = ConfigParser.RawConfigParser()
     config.read('/home/soshial/text-normalization/normalization.cfg')
     dicts_folder = config.get('lms','dicts')
     import os
     if not os.path.exists(dicts_folder): quit('Please put existing dictionaries into "'+dicts_folder+'" folder!')
     self.morph = get_morph(dicts_folder)
     self.inflection_case = u"им" # todo add gender for the ending of numeral ('жр')
开发者ID:hexflashor,项目名称:text-normalization,代码行数:12,代码来源:numword_ru.py


示例8: main

def main():
	(options, args) = parser.parse_args()

	if not options.word or not options.dict:
		print 'inflect -h for help.'
		return
		
	morph = get_morph(options.dict)

	word 	= options.word.decode(chardet.detect(options.word)['encoding']).upper()
	word 	= unicode(word)

	a 		= morph.inflect_ru(word, u'пр', u'С')
	print a.encode('utf8')
开发者ID:aderyabin,项目名称:GoFree,代码行数:14,代码来源:inflect.py


示例9: misc_utilites

def misc_utilites():
	morpher = get_morph('static/res/pymorphy/')

	def pluralize(number, word):
		return morpher.pluralize_inflected_ru(word.upper(), number).lower()

	def is_logged_in():
		try:
			int(session['logged'])
			return True
		except (ValueError, KeyError):
			return False

	return {'pluralize': pluralize, 'is_logged_in': is_logged_in }
开发者ID:colorfulfool,项目名称:storegallery,代码行数:14,代码来源:flask_app.py


示例10: handle

    def handle(self, *args, **options):
        morph = get_morph(join(settings.PROJECT_DIR, 'morph'))
        self.dialog = Dialog()
        listeners = Listener.objects.filter(first_name__exact=u'')

        total = listeners.count()
        index = 0
        self.dialog.gauge_start()

        for listener in listeners:
            listener.normalize_name(morph)
            text = u'Склонение: %s %s %s' % (listener.last_name, listener.first_name, listener.patronymic)
            self.dialog.gauge_update(int(float(index)/total*100),
                text=text.encode('utf-8'),
                update_text=True)
            index += 1

        self.dialog.gauge_stop()
开发者ID:summerisgone,项目名称:umc-kurator,代码行数:18,代码来源:normalize_names.py


示例11: initializeResources

    def initializeResources(self):
        
        """Pre-initialization"""
        self.animationTimer = ()
        self.progressTimer = ()
        self.grid_layout =()
                       
        """Initialize Options"""
        self.options = Options()
        
        """Initialize Statistics"""
        self.stats = Stats()
        
        """Config Here"""
        self.initializeComposition()
        self.initializeComponents()
        self.setMenus()
        self.trayIcon.show()
        #self.startTrayLoading()
        
        """"Initialize Dictionaries    (will take a some time!)"""
        time_start = datetime.now()
        self.dict = EdictParser()
        self.dict.loadDict()
        
        self.morphy = get_morph(PATH_TO_RES + DICT_EN)
        
        self.trayIcon.showMessage('Loading...', 'Initializing dictionaries', QSystemTrayIcon.MessageIcon.Information, 20000 )     #TODO: change into loading dialog... or not
        
        """Initializing srs system"""
        self.trayIcon.showMessage('Loading...', 'Initializing databases', QSystemTrayIcon.MessageIcon.Information, 20000 )
        self.srs = srsScheduler()
        self.srs.initializeAll()
        self.srs.initializeCurrentSession(self.options.getSessionSize())
        
        """Global hotkeys hook"""
        #TODO: add multiple hotkeys and fix stop()
        #self.hooker = GlobalHotkeyManager(toggleQDictFlag, 'Q')
#        self.hooker = GlobalHotkeyManager(toggleWidgetFlag(self.qdict), 'Q')
#        self.hooker.setDaemon(True) #temporarily, should work using stop()
#        self.hooker.start()
        
        time_end = datetime.now()
        self.loadingTime =  time_end - time_start
开发者ID:Xifax,项目名称:suzu_ei,代码行数:44,代码来源:guiMain.py


示例12: create_triads

def create_triads(path_item, path_rel, path_attr):
    dicts = "c:\\Python27\\Lib\\site-packages\\pymorphy\\ru.sqlite-json\\"
    morph = get_morph(dicts)

    # read items
    with open(path_item) as f:
        items = f.readlines()
    # read relations
    with open(path_rel) as f:
        relations = f.readlines()
    # read attributes
    with open(path_attr) as f:
        attributes = f.readlines()

    # split attributes according to different parts of speech
    attrsN, attrsV, attrsAdj, attrsIs = [[],[],[],[]]
    for at in attributes:
        if 'N' in at: attrsN.append(re.split(',', at)[0].decode('cp1251').lower())
        if 'V' in at: attrsV.append(re.split(',', at)[0].decode('cp1251').lower())
        if 'Adj' in at: attrsAdj.append(re.split(',', at)[0].decode('cp1251').lower())
        if 'Is' in at: attrsIs.append(re.split(',', at)[0].decode('cp1251').lower())

    # assemble triads
    triads = []
    for it in items:
        it = it.replace('\n', '').decode('cp1251')
        for rel in relations:
            rel = rel.replace('\n', '').decode('cp1251')
            if rel == u'может':
                for attr in attrsV: triads.append([it, rel, attr])
            if rel == u'имеет':
                for attr in attrsN: triads.append([it, rel, attr])
            if rel == u'является':
                for attr in attrsIs: triads.append([it, rel, attr])
            if u'как' in rel:
                for attr in attrsAdj: triads.append([it, '', attr])

    # test
    for triad in triads:
        print triad[0] + ', ' + triad[1] + ', ' + triad[2]

    return triads
开发者ID:tsundokum,项目名称:snn,代码行数:42,代码来源:create_triads.py


示例13: get_words

def get_words(file_name, index):
    
    morph = get_morph('')
    print "Getting words from " + file_name + "..."

    words = []
    pattern = re.compile("(([\w]+[-'])*[\w']+'?)", re.U)

    # try:
    f = open(file_name, 'r')
    file_text = f.read()
    f.close()
    file_text = unicode(file_text, 'utf8').upper()
    file_text = file_text.replace('--', ' -- ')
    tokens = file_text.split()
    previous_percentage = -1
    for idx, token in enumerate(tokens):
        m = pattern.match(token)
        if m:
            word = m.group()
	    info = morph.get_graminfo(word)
	    if len(info) < 2:
		continue
	    if not info[0]['class'] in [u"П", u"С", u"Г"]:
		continue
	    norm = info[0]['norm']
            words.append(norm)
            if norm in index:
                index[norm] += 1
            else:
                index[norm] = 1
        percentage = 100 * idx / len(tokens)
        if percentage != previous_percentage and percentage % 5 == 0:
            print "Getting words: " + str(percentage) + "% done"
            previous_percentage = percentage
    # except:
    #     print "error occured"

    return words
开发者ID:pombredanne,项目名称:search,代码行数:39,代码来源:script.py


示例14: get_morph

# coding=utf-8

from pymorphy import get_morph

morph = get_morph('/home/ilya/github/ru.sqlite-json') #dict path

ins = open("adjective_opinion_words.txt", "r")

array = []
for line in ins:
    ind = line.index(' ')
    if ind!=-1:
        line = line[0:ind]
        array.append(line)

ins.close()

file = open("pyDict", "w")

for i in range(len(array)):
    word = array[i]
    word = word.decode("utf-8").upper()

    info1= morph.inflect_ru(unicode(word), u'мр')
    info2 = morph.inflect_ru(unicode(word), u'жр')
    info3 = morph.inflect_ru(unicode(word), u'ср')

    res = word.lower().encode("utf-8")+" "+info1.lower().encode("utf-8")+" "+info2.lower().encode("utf-8")+" "+info3.lower().encode("utf-8")
#    print res
    file.write(res+"\n")
开发者ID:NlogN,项目名称:goods-review-ng,代码行数:30,代码来源:makePymorphyDict.py


示例15: get_morph

    (u'SOCIAL', u'ACTIVITY'),
    (u'CURRENT', u'FRESHMAN'),
    (u'CURRENT', u'SOPHOMORE'),
    (u'FOUR-YEAR', u'UNIVERSITY'),
    (u'ACADEMIC', u'RECORD'),
    (u'DEMONSTRATE', u'PASSION'),
    (u'HIGH', u'STUDENT'),
    (u'POTENTIAL', u'STUDENT'),
    (u'EXCITING', u'PROGRAM'),
    (u'FAST-PACED', u'PROGRAM'),
    (u'INTERACTIVE', u'COURCE'),
    (u'FORMER', u'CAMPER'),
    (u'MANY', u'INFORMATION')
]

morph = get_morph("../dicts/en")


def get_grammars_precision_and_recall(grammar, dir_path):
    retrieved = 0.0
    relevant = 0.0
    for root, dirs, files in os.walk(dir_path):
        for file_name in files:
            path = os.path.join(root, file_name)
            for result in data_gathering_iterator(path, morph, grammar):
                for subresult in result:
                    if subresult in normilized_right_result_list:
                        relevant += 1.0
                    retrieved += 1.0

    return relevant / retrieved, relevant / len(normilized_right_result_list)
开发者ID:madcat1991,项目名称:AutoOntology,代码行数:31,代码来源:check_grammar.py


示例16: get_data_and_statistic

def get_data_and_statistic(file_path, morph, rd):
    for adj_noun_list in data_gathering_iterator(file_path, morph):
        for adj_noun in adj_noun_list:
            adj_noun_str = u" ".join(adj_noun)
            rd.incr(adj_noun_str, 1)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()

    parser.add_argument("-f", "--file", dest="file_path", type=str, required=True,
                        help="Файл из которого будет браться текст")
    parser.add_argument("-m", "--morph_dir", dest="morph_dir", type=str, required=True,
                        help="Директория в которой лежат словари pymorphy")
    parser.add_argument("-s", "--host", dest="host", default="localhost", type=str,
                        help="Хост на котором находится Redis. По умолчанию 'localhost'")
    parser.add_argument("-p", "--port", dest="port", default=6379, type=int,
                        help="Порт на котором находится Redis. По умолчанию 6379")
    parser.add_argument("-d", "--db", dest="db", default=0, type=int,
                        help="БД в редисе. По умолчанию - 0")

    args = parser.parse_args()
    morph = get_morph(args.morph_dir)
    rd = Redis(host=args.host, port=args.port, db=args.db)
    rd.flushdb()

    get_data_and_statistic(args.file_path, morph, rd)

    rd.save()
开发者ID:madcat1991,项目名称:AutoOntology,代码行数:29,代码来源:text_worker.py


示例17: get_morph

# -*- coding: utf-8 -*-

import random, re
import logging
from pymorphy import get_morph

morph = get_morph("dict")


def rev(x):
    revmap = {
        u"Я": u"ТЫ",
        u"МЕНЯ": u"ТЕБЯ",
        u"МНЕ": u"ТЕБЕ",
        u"МНОЙ": u"ТОБОЙ",
        u"МЫ": u"ВЫ",
        u"НАС": u"ВАС",
        u"НАМ": u"ВАМ",
        u"НАС": u"ВАС",
        u"НАМИ": u"ВАМИ",
    }
    for k, v in revmap.items():
        revmap[v] = k

    xstr = x.replace(",", "").replace(".", "")
    if xstr in revmap:
        return x.replace(xstr, revmap[xstr]).lower()

    global morph
    info = morph.get_graminfo(x)
    if len(info):
开发者ID:rkfg,项目名称:Talho,代码行数:31,代码来源:orchoice.py


示例18: get_morph

# -*- coding: utf-8 -*-
from pymorphy import get_morph
morph = get_morph('dicts/ru')
print morph.normalize(u"ЛЮДЕЙ")
开发者ID:kilirobbs,项目名称:python-fiddle,代码行数:4,代码来源:test.py


示例19: tokenizeMe

import nltk
import string
import pymorphy
#from pymorphy.contrib import tokenizers
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
import os
from collections import Counter

import numpy as np
import cv2

corpus_dir = 'corpus_1836'
#corpus_dir = 'poems'
CLUSTERS_COUNT = 2
morph = pymorphy.get_morph('C:/DB/ru.sqlite-json')
stop_words = stopwords.words('russian')
stop_words.extend(['что', 'это', 'так', 'вот', 'быть', 'как'])
corpus = {}
res_txt = []

def tokenizeMe(sentence):
    all_words = []
    #sentence = "меня! это,. что? похоже; на - пирамиду"
    #sentence = sentence.encode('utf-8').upper()
    #sentence = sentence.upper()
    #print(sentence)
    #
    #words = tokenizers.extract_words(sentence)
    #print(words)
开发者ID:zab88,项目名称:bar_poem,代码行数:30,代码来源:main.py


示例20: __init__

 def __init__(self, dbname = 'crawler_db'):
     self.conn = MySQLdb.connect(user = 'crawler', db = dbname, passwd = 'crawler', unix_socket = '/var/run/mysqld/mysqld.sock')
     self.cursor = self.conn.cursor()
     self.morph = get_morph('dicts')
开发者ID:Qolt,项目名称:search_engine,代码行数:4,代码来源:searcher.py



注:本文中的pymorphy.get_morph函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python base.runmodule函数代码示例发布时间:2022-05-27
下一篇:
Python numpy.NumpyVectorSpace类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap