本文整理汇总了Python中pymorphy.get_morph函数的典型用法代码示例。如果您正苦于以下问题:Python get_morph函数的具体用法?Python get_morph怎么用?Python get_morph使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了get_morph函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: formalize
def formalize(filename, morph_dict=DICTS_DIR, stop_dict=stop_dict_path):
morph = get_morph(morph_dict)
stop_words = get_stop_words()
dict = {}
words = 0.0
try:
f = open(filename)
except IOError:
raise
for line in f.readlines():
for word in line.split():
word = word.decode("utf-8", 'ignore')
word = word.strip(u'[,.:;\"\')$«»(?<>!-_—//=]\n\t')
word = word.replace('.', '_')
word = morph.normalize(word.upper())
if isinstance(word, set):
word = word.pop()
else:
continue
word = word.lower()
words += 1
if word in stop_words or not word:
continue
if not word in dict:
dict[word] = 1.0
else:
dict[word] += 1.0
for key in dict:
dict[key] /= words
return dict
开发者ID:xamvz,项目名称:deep_vlom,代码行数:30,代码来源:preprocessor.py
示例2: post
def post(self, request):
morph = get_morph(join(settings.PROJECT_DIR, 'morph'))
last_name = self.DATA['last_name']
first_name = self.DATA['first_name']
patronymic = self.DATA['patronymic']
# для склонения фамилии надо определить пол
try:
sex = morph.get_graminfo(first_name.upper())[0]['info'].split(',', 1)[0]
except IndexError:
# get_graminfo() вернул []
# print 'get_graminfo failed on ', first_name
sex = u'жр'
# фамилия
last_name_inflated = firstcaps(lastnames_ru.inflect(morph,
last_name.upper(), sex + u',дт'))
# имя
first_name_inflated = firstcaps(morph.inflect_ru(first_name.upper(), u'дт'))
# отчество
patronymic_inflated = firstcaps(morph.inflect_ru(patronymic.upper(), sex + u',дт'))
return {
'last_name': last_name_inflated,
'first_name': first_name_inflated,
'patronymic': patronymic_inflated,
'user': self.get_user(),
}
开发者ID:summerisgone,项目名称:umc-kurator,代码行数:26,代码来源:resources.py
示例3: __init__
def __init__(self, corpus=None):
"""Initialize your data structures in the constructor."""
self.unigramCounts = collections.defaultdict(lambda: 0)
self.bigramCounts = collections.defaultdict(lambda: 0)
self.V = 0
self.morph = get_morph(DICTS_DIR)
if corpus:
self.train(corpus)
开发者ID:xamvz,项目名称:deep_vlom,代码行数:8,代码来源:LaplaceBigramLanguageModel.py
示例4: __init__
def __init__(self, page, working_list, mutex, dbname = 'crawler_db'):
Db_manager.__init__(self, dbname)
threading.Thread.__init__(self)
self.working_list = working_list
self.page = page
self.mutex = mutex
self.morph = get_morph('dicts')
with self.mutex:
working_list.append(page)
开发者ID:Qolt,项目名称:search_engine,代码行数:9,代码来源:crawler.py
示例5: download_morph
def download_morph():
# скачиваем и используем словарь для получения грамматической информации о слове (часть речи)
path_to_dictionary = os.path.realpath(os.path.curdir)
morph_path = join(path_to_dictionary, 'morph_dicts')
if not os.path.exists(morph_path):
subprocess.call(['wget', 'https://bitbucket.org/kmike/pymorphy/downloads/ru.sqlite-json.zip'])
subprocess.call(['unzip', 'ru.sqlite-json.zip', '-d', 'morph_dicts'])
morph = get_morph(morph_path)
return morph
开发者ID:kewtree1408,项目名称:search,代码行数:9,代码来源:3.py
示例6: get
def get(self, request, *args, **kwargs):
params = request.GET
COUNT_ELEMENTS = 5
errors = []
limit = COUNT_ELEMENTS
offset = 0
form = forms.SearchForm(params)
if form.is_valid():
#pointsreq = MainModels.Person.objects;
name = form.cleaned_data.get("s")
users_list = []
morph = get_morph('/home/tenoclock/yasenput/dicts')
if name:
#pointsreq = MainModels.Person.search.query(params.get("s"))
#search = SphinxSearch()
search = SphinxQuerySet(index="auth_user")
name_morph = morph.normalize(name.upper())
file1 = open('file1.txt','w')
file1.write(str(list(name_morph)))
file1.close()
phrase_list = name.split(' ')
for phrase in phrase_list:
if phrase != '':
name_morph = morph.normalize(phrase.upper())
for name_m in name_morph:
search_query = search.query(name_m)
for splited_item in search_query:
if not MainModels.Person.objects.get(id = splited_item['id']) in users_list:
users_list.append(MainModels.Person.objects.get(id = splited_item['id']))
content = form.cleaned_data.get("content")
if content == 'new':
pointsreq = pointsreq.order_by('-id')
elif content == "popular":
pointsreq = pointsreq.annotate(usfiliwers=Count('followers__id')).order_by('-usfiliwers', '-id')
else:
pointsreq = users_list
points = users_list[offset:limit]
YpJson = YpSerialiser()
return HttpResponse(YpJson.serialize(points, fields=("username", "first_name", "last_name")),
mimetype="application/json")
else:
e = form.errors
for er in e:
errors.append(er +':'+e[er][0])
return JsonHTTPResponse({"status": 0, "txt": ", ".join(errors)});
开发者ID:zaebee,项目名称:yasenput,代码行数:56,代码来源:views.py
示例7: __init__
def __init__(self):
super(NumWordRU,self).__init__()
# initializing morphology module for inflecting
from pymorphy import get_morph
import ConfigParser
config = ConfigParser.RawConfigParser()
config.read('/home/soshial/text-normalization/normalization.cfg')
dicts_folder = config.get('lms','dicts')
import os
if not os.path.exists(dicts_folder): quit('Please put existing dictionaries into "'+dicts_folder+'" folder!')
self.morph = get_morph(dicts_folder)
self.inflection_case = u"им" # todo add gender for the ending of numeral ('жр')
开发者ID:hexflashor,项目名称:text-normalization,代码行数:12,代码来源:numword_ru.py
示例8: main
def main():
(options, args) = parser.parse_args()
if not options.word or not options.dict:
print 'inflect -h for help.'
return
morph = get_morph(options.dict)
word = options.word.decode(chardet.detect(options.word)['encoding']).upper()
word = unicode(word)
a = morph.inflect_ru(word, u'пр', u'С')
print a.encode('utf8')
开发者ID:aderyabin,项目名称:GoFree,代码行数:14,代码来源:inflect.py
示例9: misc_utilites
def misc_utilites():
morpher = get_morph('static/res/pymorphy/')
def pluralize(number, word):
return morpher.pluralize_inflected_ru(word.upper(), number).lower()
def is_logged_in():
try:
int(session['logged'])
return True
except (ValueError, KeyError):
return False
return {'pluralize': pluralize, 'is_logged_in': is_logged_in }
开发者ID:colorfulfool,项目名称:storegallery,代码行数:14,代码来源:flask_app.py
示例10: handle
def handle(self, *args, **options):
morph = get_morph(join(settings.PROJECT_DIR, 'morph'))
self.dialog = Dialog()
listeners = Listener.objects.filter(first_name__exact=u'')
total = listeners.count()
index = 0
self.dialog.gauge_start()
for listener in listeners:
listener.normalize_name(morph)
text = u'Склонение: %s %s %s' % (listener.last_name, listener.first_name, listener.patronymic)
self.dialog.gauge_update(int(float(index)/total*100),
text=text.encode('utf-8'),
update_text=True)
index += 1
self.dialog.gauge_stop()
开发者ID:summerisgone,项目名称:umc-kurator,代码行数:18,代码来源:normalize_names.py
示例11: initializeResources
def initializeResources(self):
"""Pre-initialization"""
self.animationTimer = ()
self.progressTimer = ()
self.grid_layout =()
"""Initialize Options"""
self.options = Options()
"""Initialize Statistics"""
self.stats = Stats()
"""Config Here"""
self.initializeComposition()
self.initializeComponents()
self.setMenus()
self.trayIcon.show()
#self.startTrayLoading()
""""Initialize Dictionaries (will take a some time!)"""
time_start = datetime.now()
self.dict = EdictParser()
self.dict.loadDict()
self.morphy = get_morph(PATH_TO_RES + DICT_EN)
self.trayIcon.showMessage('Loading...', 'Initializing dictionaries', QSystemTrayIcon.MessageIcon.Information, 20000 ) #TODO: change into loading dialog... or not
"""Initializing srs system"""
self.trayIcon.showMessage('Loading...', 'Initializing databases', QSystemTrayIcon.MessageIcon.Information, 20000 )
self.srs = srsScheduler()
self.srs.initializeAll()
self.srs.initializeCurrentSession(self.options.getSessionSize())
"""Global hotkeys hook"""
#TODO: add multiple hotkeys and fix stop()
#self.hooker = GlobalHotkeyManager(toggleQDictFlag, 'Q')
# self.hooker = GlobalHotkeyManager(toggleWidgetFlag(self.qdict), 'Q')
# self.hooker.setDaemon(True) #temporarily, should work using stop()
# self.hooker.start()
time_end = datetime.now()
self.loadingTime = time_end - time_start
开发者ID:Xifax,项目名称:suzu_ei,代码行数:44,代码来源:guiMain.py
示例12: create_triads
def create_triads(path_item, path_rel, path_attr):
dicts = "c:\\Python27\\Lib\\site-packages\\pymorphy\\ru.sqlite-json\\"
morph = get_morph(dicts)
# read items
with open(path_item) as f:
items = f.readlines()
# read relations
with open(path_rel) as f:
relations = f.readlines()
# read attributes
with open(path_attr) as f:
attributes = f.readlines()
# split attributes according to different parts of speech
attrsN, attrsV, attrsAdj, attrsIs = [[],[],[],[]]
for at in attributes:
if 'N' in at: attrsN.append(re.split(',', at)[0].decode('cp1251').lower())
if 'V' in at: attrsV.append(re.split(',', at)[0].decode('cp1251').lower())
if 'Adj' in at: attrsAdj.append(re.split(',', at)[0].decode('cp1251').lower())
if 'Is' in at: attrsIs.append(re.split(',', at)[0].decode('cp1251').lower())
# assemble triads
triads = []
for it in items:
it = it.replace('\n', '').decode('cp1251')
for rel in relations:
rel = rel.replace('\n', '').decode('cp1251')
if rel == u'может':
for attr in attrsV: triads.append([it, rel, attr])
if rel == u'имеет':
for attr in attrsN: triads.append([it, rel, attr])
if rel == u'является':
for attr in attrsIs: triads.append([it, rel, attr])
if u'как' in rel:
for attr in attrsAdj: triads.append([it, '', attr])
# test
for triad in triads:
print triad[0] + ', ' + triad[1] + ', ' + triad[2]
return triads
开发者ID:tsundokum,项目名称:snn,代码行数:42,代码来源:create_triads.py
示例13: get_words
def get_words(file_name, index):
morph = get_morph('')
print "Getting words from " + file_name + "..."
words = []
pattern = re.compile("(([\w]+[-'])*[\w']+'?)", re.U)
# try:
f = open(file_name, 'r')
file_text = f.read()
f.close()
file_text = unicode(file_text, 'utf8').upper()
file_text = file_text.replace('--', ' -- ')
tokens = file_text.split()
previous_percentage = -1
for idx, token in enumerate(tokens):
m = pattern.match(token)
if m:
word = m.group()
info = morph.get_graminfo(word)
if len(info) < 2:
continue
if not info[0]['class'] in [u"П", u"С", u"Г"]:
continue
norm = info[0]['norm']
words.append(norm)
if norm in index:
index[norm] += 1
else:
index[norm] = 1
percentage = 100 * idx / len(tokens)
if percentage != previous_percentage and percentage % 5 == 0:
print "Getting words: " + str(percentage) + "% done"
previous_percentage = percentage
# except:
# print "error occured"
return words
开发者ID:pombredanne,项目名称:search,代码行数:39,代码来源:script.py
示例14: get_morph
# coding=utf-8
from pymorphy import get_morph
morph = get_morph('/home/ilya/github/ru.sqlite-json') #dict path
ins = open("adjective_opinion_words.txt", "r")
array = []
for line in ins:
ind = line.index(' ')
if ind!=-1:
line = line[0:ind]
array.append(line)
ins.close()
file = open("pyDict", "w")
for i in range(len(array)):
word = array[i]
word = word.decode("utf-8").upper()
info1= morph.inflect_ru(unicode(word), u'мр')
info2 = morph.inflect_ru(unicode(word), u'жр')
info3 = morph.inflect_ru(unicode(word), u'ср')
res = word.lower().encode("utf-8")+" "+info1.lower().encode("utf-8")+" "+info2.lower().encode("utf-8")+" "+info3.lower().encode("utf-8")
# print res
file.write(res+"\n")
开发者ID:NlogN,项目名称:goods-review-ng,代码行数:30,代码来源:makePymorphyDict.py
示例15: get_morph
(u'SOCIAL', u'ACTIVITY'),
(u'CURRENT', u'FRESHMAN'),
(u'CURRENT', u'SOPHOMORE'),
(u'FOUR-YEAR', u'UNIVERSITY'),
(u'ACADEMIC', u'RECORD'),
(u'DEMONSTRATE', u'PASSION'),
(u'HIGH', u'STUDENT'),
(u'POTENTIAL', u'STUDENT'),
(u'EXCITING', u'PROGRAM'),
(u'FAST-PACED', u'PROGRAM'),
(u'INTERACTIVE', u'COURCE'),
(u'FORMER', u'CAMPER'),
(u'MANY', u'INFORMATION')
]
morph = get_morph("../dicts/en")
def get_grammars_precision_and_recall(grammar, dir_path):
retrieved = 0.0
relevant = 0.0
for root, dirs, files in os.walk(dir_path):
for file_name in files:
path = os.path.join(root, file_name)
for result in data_gathering_iterator(path, morph, grammar):
for subresult in result:
if subresult in normilized_right_result_list:
relevant += 1.0
retrieved += 1.0
return relevant / retrieved, relevant / len(normilized_right_result_list)
开发者ID:madcat1991,项目名称:AutoOntology,代码行数:31,代码来源:check_grammar.py
示例16: get_data_and_statistic
def get_data_and_statistic(file_path, morph, rd):
for adj_noun_list in data_gathering_iterator(file_path, morph):
for adj_noun in adj_noun_list:
adj_noun_str = u" ".join(adj_noun)
rd.incr(adj_noun_str, 1)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-f", "--file", dest="file_path", type=str, required=True,
help="Файл из которого будет браться текст")
parser.add_argument("-m", "--morph_dir", dest="morph_dir", type=str, required=True,
help="Директория в которой лежат словари pymorphy")
parser.add_argument("-s", "--host", dest="host", default="localhost", type=str,
help="Хост на котором находится Redis. По умолчанию 'localhost'")
parser.add_argument("-p", "--port", dest="port", default=6379, type=int,
help="Порт на котором находится Redis. По умолчанию 6379")
parser.add_argument("-d", "--db", dest="db", default=0, type=int,
help="БД в редисе. По умолчанию - 0")
args = parser.parse_args()
morph = get_morph(args.morph_dir)
rd = Redis(host=args.host, port=args.port, db=args.db)
rd.flushdb()
get_data_and_statistic(args.file_path, morph, rd)
rd.save()
开发者ID:madcat1991,项目名称:AutoOntology,代码行数:29,代码来源:text_worker.py
示例17: get_morph
# -*- coding: utf-8 -*-
import random, re
import logging
from pymorphy import get_morph
morph = get_morph("dict")
def rev(x):
revmap = {
u"Я": u"ТЫ",
u"МЕНЯ": u"ТЕБЯ",
u"МНЕ": u"ТЕБЕ",
u"МНОЙ": u"ТОБОЙ",
u"МЫ": u"ВЫ",
u"НАС": u"ВАС",
u"НАМ": u"ВАМ",
u"НАС": u"ВАС",
u"НАМИ": u"ВАМИ",
}
for k, v in revmap.items():
revmap[v] = k
xstr = x.replace(",", "").replace(".", "")
if xstr in revmap:
return x.replace(xstr, revmap[xstr]).lower()
global morph
info = morph.get_graminfo(x)
if len(info):
开发者ID:rkfg,项目名称:Talho,代码行数:31,代码来源:orchoice.py
示例18: get_morph
# -*- coding: utf-8 -*-
from pymorphy import get_morph
morph = get_morph('dicts/ru')
print morph.normalize(u"ЛЮДЕЙ")
开发者ID:kilirobbs,项目名称:python-fiddle,代码行数:4,代码来源:test.py
示例19: tokenizeMe
import nltk
import string
import pymorphy
#from pymorphy.contrib import tokenizers
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
import os
from collections import Counter
import numpy as np
import cv2
corpus_dir = 'corpus_1836'
#corpus_dir = 'poems'
CLUSTERS_COUNT = 2
morph = pymorphy.get_morph('C:/DB/ru.sqlite-json')
stop_words = stopwords.words('russian')
stop_words.extend(['что', 'это', 'так', 'вот', 'быть', 'как'])
corpus = {}
res_txt = []
def tokenizeMe(sentence):
all_words = []
#sentence = "меня! это,. что? похоже; на - пирамиду"
#sentence = sentence.encode('utf-8').upper()
#sentence = sentence.upper()
#print(sentence)
#
#words = tokenizers.extract_words(sentence)
#print(words)
开发者ID:zab88,项目名称:bar_poem,代码行数:30,代码来源:main.py
示例20: __init__
def __init__(self, dbname = 'crawler_db'):
self.conn = MySQLdb.connect(user = 'crawler', db = dbname, passwd = 'crawler', unix_socket = '/var/run/mysqld/mysqld.sock')
self.cursor = self.conn.cursor()
self.morph = get_morph('dicts')
开发者ID:Qolt,项目名称:search_engine,代码行数:4,代码来源:searcher.py
注:本文中的pymorphy.get_morph函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论