本文整理汇总了C++中Wtroka类的典型用法代码示例。如果您正苦于以下问题:C++ Wtroka类的具体用法?C++ Wtroka怎么用?C++ Wtroka使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Wtroka类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: AddQuoteMultiWord
void CMultiWordCreator::AddQuoteMultiWord(CWordSequence& ws, const TArticleRef& article)
{
SWordHomonymNum wh;
Wtroka str;
CWord* pNewWord = GetWordForMultiWord(ws, str, wh);
if (pNewWord->m_SourceWords.Size() == 1 && pNewWord->HasOnlyUnknownPOS()) {
size_t firstId = pNewWord->IterHomonyms().GetID();
CHomonym& h = pNewWord->GetRusHomonym(firstId);
h.SetSourceWordSequence(&ws);
h.PutArticle(article);
wh.m_HomNum = firstId;
} else {
pNewWord->m_SourceWords.SetPair(ws.FirstWord(), ws.LastWord());
if (str.size() == 0)
str = pNewWord->m_txt;
TMorph::ToLower(str);
CHomonym* pNewHom = new CHomonym(TMorph::GetMainLanguage(), str);
pNewHom->SetSourceWordSequence(&ws);
pNewHom->PutArticle(article);
wh.m_HomNum = pNewWord->AddRusHomonym(pNewHom);
}
if (article.AuxDic().IsValid()) {
const article_t* pArt = GlobalDictsHolder->GetAuxArticle(article.AuxDic());
YASSERT(pArt != NULL);
AddFoundArticle(pArt->get_kw_type(), pArt->get_title(), wh);
} else {
YASSERT(!article.Gzt().Empty());
AddFoundArticle(article.Gzt().GetType(), article.Gzt().GetTitle(), wh);
}
m_wordSequences.push_back(&ws);
}
开发者ID:dubrousky,项目名称:tomita-parser,代码行数:33,代码来源:multiwordcreator.cpp
示例2: HasMorphNounWithGrammems_i
bool CWord::RightPartIsSurname(int& iH, THomonymGrammems& grammems, Wtroka& strLemma)
{
iH = HasMorphNounWithGrammems_i(TGramBitSet(gSurname));
if (iH != -1) {
CHomonym& h = GetRusHomonym(iH);
grammems = h.Grammems;
strLemma = h.GetLemma();
size_t ii = strLemma.find('-');
YASSERT(ii != Wtroka::npos);
strLemma = strLemma.substr(ii + 1);
return true;
}
//if this word is in morphology - do not try to predict
if (IsDictionary())
return false;
size_t ii = m_txt.find('-');
if (ii == Wtroka::npos)
return false;
Wtroka strRightPart = m_txt.substr(ii + 1);
TMorph::ToLower(strRightPart);
yvector<TSurnamePredictor::TPredictedSurname> out;
if (!TMorph::PredictSurname(strRightPart, out))
return false;
TGrammarBunch newForms;
NSpike::ToGrammarBunch(out[0].StemGrammar, out[0].FlexGrammars, newForms);
grammems.Reset(newForms);
strLemma = out[0].Lemma;
return true;
}
开发者ID:Frankie-666,项目名称:tomita-parser,代码行数:33,代码来源:word.cpp
示例3: SetText
CWordBase::CWordBase(docLanguage lang,const CPrimGroup &prim,const Wtroka &strWord)
: m_lang(lang)
{
m_bHasAltWordPart = false;
m_typ = prim.m_gtyp;
m_num = prim.m_prim.size();
if (strWord.size() >= MAXWORD_LEN)
SetText(strWord.substr(0, MAXWORD_LEN - 2));
else
SetText(strWord);
m_pos = prim.m_pos;
m_len = prim.m_len;
m_bUp = false;
m_bHasUnusefulPostfix = false;
m_bHasOpenQuote = false;
m_bHasCloseQuote = false;
m_bSingleOpenQuote = false;
m_bSingleCloseQuote = false;
m_bIsPunct = false;
InitPunc();
}
开发者ID:Frankie-666,项目名称:tomita-parser,代码行数:25,代码来源:wordbase.cpp
示例4: ToFirstUpper
void ToFirstUpper(Wtroka& str)
{
if (!str.empty() && ::IsLower(str[0]))
*(str.begin()) = static_cast<wchar16>(::ToUpper(str[0]));
// TODO: this does not work properly for surrogate pairs (UTF-16)
}
开发者ID:Frankie-666,项目名称:tomita-parser,代码行数:7,代码来源:utilit.cpp
示例5: EqualCiRus
bool EqualCiRus(const Wtroka& s1, const char* s2) {
static const CodePage& cp = *CodePageByCharset(CODES_WIN);
const ui16* w = s1.begin();
for (; w != s1.end() && *s2 != 0; ++w, ++s2)
if (::ToLower(*w) != ::ToLower(cp.unicode[static_cast<ui8>(*s2)]))
return false;
return w == s1.end() && *s2 == 0;
}
开发者ID:Frankie-666,项目名称:tomita-parser,代码行数:8,代码来源:utilit.cpp
示例6: GetLemma
Wtroka CHomonym::GetLemma() const
{
if (m_pSourceWordSequence && (m_pSourceWordSequence->Size() > 1)) {
Wtroka s = m_pSourceWordSequence->GetLemma();
if (!s.empty())
return s;
}
return CHomonymBase::GetLemma();
}
开发者ID:dubrousky,项目名称:tomita-parser,代码行数:9,代码来源:homonym.cpp
示例7: AddHyphenSurnameLemma
//Try to predict double-word surname if the last part is found in dictionary.
//Then check if the first part is found in dictionary as surname.
//If it is not then try predicting it.
bool CWord::PredictHyphenSurname()
{
if (!((m_typ == Hyphen || m_typ == HypDiv || m_typ == DivWord) && m_variant.size() > 0))
return false;
size_t ii = m_txt.find('-');
if (ii == Wtroka::npos)
return false;
//only one hyphen is allowed
if (ii != m_txt.rfind('-'))
return false;
Wtroka strRightPart = m_txt.substr(ii + 1);
if (strRightPart.size() < 1 || !::IsUpper(strRightPart[0]))
return false;
int iH = -1;
THomonymGrammems rightPartGrammems;
Wtroka strRightPartLemma;
if (!RightPartIsSurname(iH, rightPartGrammems, strRightPartLemma))
return false;
ii = m_txt.find('-'); //unnecessary call?
if (ii == Wtroka::npos)
return false;
Wtroka strFirstPart = m_txt.substr(0, ii);
TMorph::ToLower(strFirstPart);
//look in morphology
THomonymVector res;
TMorph::GetDictHomonyms(strFirstPart, res);
bool found = false;
for (size_t i = 0; i < res.size(); ++i) {
if (!found && res[i]->HasGrammem(gSurname) &&
NGleiche::Gleiche(res[i]->Grammems, rightPartGrammems, NGleiche::GenderNumberCaseCheck)) {
found = true;
Wtroka joined_lemma = res[i]->GetLemma() + '-' + strRightPartLemma;
AddHyphenSurnameLemma(iH, rightPartGrammems, joined_lemma);
}
}
if (found)
return true;
//if the word was in morphology then do not do any further predictions
if (res.size() > 0)
return false;
yvector<TSurnamePredictor::TPredictedSurname> out;
TMorph::PredictSurname(strFirstPart, out);
if (out.size() > 0 && NGleiche::Gleiche(out[0].FlexGrammars, rightPartGrammems.Forms(), NGleiche::GenderNumberCaseCheck)) {
Wtroka joined_lemma = out[0].Lemma + '-' + strRightPartLemma;
AddHyphenSurnameLemma(iH, rightPartGrammems, joined_lemma);
return true;
}
return false;
}
开发者ID:Frankie-666,项目名称:tomita-parser,代码行数:59,代码来源:word.cpp
示例8: Strip
void Strip(Wtroka& w) {
const wchar16* p = w.c_str();
const wchar16* pe = p + w.size();
while (p != pe) {
if (!IsWhitespace(*p)) {
if (p != w.c_str()) {
w.erase(w.c_str(), p);
}
pe = w.c_str() - 1;
p = pe + w.size();
while (p != pe) {
if (!IsWhitespace(*p))
break;
--p;
}
w.remove(p - pe); // it will not change the string if (p - pe) is not less than size
return;
}
++p;
}
// all characters are spaces
w.clear();
}
开发者ID:Frankie-666,项目名称:tomita-parser,代码行数:27,代码来源:wide.cpp
示例9: ReplaceSubstr
size_t ReplaceSubstr(Wtroka& str, const TWtringBuf& from, const TWtringBuf& to)
{
size_t count = 0;
size_t pos = str.off(TCharTraits<wchar16>::Find(~str, +str, ~from, +from));
while (pos != Wtroka::npos) {
str.replace(pos, +from, ~to, 0, Wtroka::npos, +to);
++count;
size_t next = pos + to.size();
pos = str.off(TCharTraits<wchar16>::Find(~str + next, +str - next, ~from, +from));
}
return count;
}
开发者ID:Frankie-666,项目名称:tomita-parser,代码行数:13,代码来源:utilit.cpp
示例10: CreateTextField
void CQuotesFinder::CreateTextField(const Stroka& TextFieldName, CSentence* pSent, const SWordHomonymNum& WH,
CFactFields& factFields, const Wtroka& Postfix)
{
CNormalization InterpNorm(pSent->m_Words);
const CHomonym& h = pSent->m_Words[WH];
const CFioWordSequence* pFioWS = dynamic_cast<const CFioWordSequence*>(h.GetSourceWordSequence());
if (pFioWS != NULL) {
CFioWS newFioWS(*pFioWS);
newFioWS.SetMainWord(WH);
newFioWS.AddLemma(SWordSequenceLemma (InterpNorm.GetArtificialLemma(WH)));
CTextWS newTextWS;
newTextWS.SetPair(*pFioWS);
newTextWS.SetMainWord(WH);
newTextWS.ResetLemmas(newFioWS.GetLemmas(), true);
if (!Postfix.empty())
newTextWS.AddLemma(SWordSequenceLemma(Postfix));
factFields.AddValue(TextFieldName, newTextWS);
} else {
const CFactsWS* pFactWS = dynamic_cast<const CFactsWS*>(h.GetSourceWordSequence());
if (pFactWS) {
CTextWS newTextWS;
newTextWS.SetPair(*pFactWS);
newTextWS.SetMainWord(WH);
newTextWS.ResetLemmas(pFactWS->GetLemmas(), true);
if (!Postfix.empty())
newTextWS.AddLemma(SWordSequenceLemma(Postfix));
factFields.AddValue(TextFieldName, newTextWS);
} else {
const CTextWS* pTextWS = dynamic_cast<const CTextWS*>(h.GetSourceWordSequence());
if (pTextWS) {
CTextWS newTextWS = *pTextWS;
if (!Postfix.empty())
newTextWS.AddLemma(SWordSequenceLemma(Postfix));
factFields.AddValue(TextFieldName, newTextWS);
} else {
if (h.GetSourceWordSequence() == 0 || h.GetSourceWordSequence()->Size() == 1) {
CTextWS newTextWS;
newTextWS.SetPair(pSent->m_Words.GetWord(WH).GetSourcePair());
newTextWS.SetMainWord(WH);
newTextWS.AddLemma(SWordSequenceLemma(h.GetLemma()));
if (!Postfix.empty())
newTextWS.AddLemma(SWordSequenceLemma(Postfix));
factFields.AddValue(TextFieldName, newTextWS);
}
}
}
}
}
开发者ID:Frankie-666,项目名称:tomita-parser,代码行数:50,代码来源:quotesfinder.cpp
示例11: PutStrType
void SArtPointer::PutStrType(const Wtroka& s)
{
if (s.empty())
return;
m_strArt = s;
m_KWType = NULL;
}
开发者ID:Frankie-666,项目名称:tomita-parser,代码行数:7,代码来源:toma_constants.cpp
示例12: getWord
bool CSentence::GetWSLemmaString(Wtroka& sLemmas, const CWordSequence& ws, bool bLem) const
{
sLemmas = ws.GetCapitalizedLemma();
if (bLem)
return !sLemmas.empty();
if (sLemmas.empty())
for (int j = ws.FirstWord(); j <= ws.LastWord(); j++) {
if (!sLemmas.empty())
sLemmas += ' ';
sLemmas += getWord(j)->GetOriginalText();
}
static const Wtroka trim_chars = CharToWide(" \"\'");
TWtringBuf res = sLemmas;
while (!res.empty() && trim_chars.find(res[0]) != TWtringBuf::npos)
res.Skip(1);
while (!res.empty() && trim_chars.find(res.back()) != TWtringBuf::npos)
res.Chop(1);
if (sLemmas.size() != res.size())
sLemmas = ::ToWtring(res);
return true;
}
开发者ID:dubrousky,项目名称:tomita-parser,代码行数:26,代码来源:sentence.cpp
示例13: GetSentPrc
// обычно функция возвращает clause->ToString(),
// если в клаузе есть открывающая кавычка, а нет закрывающей, тогда функция добавляет к результату
// все слова до закрывающей кавычки
Wtroka CQuotesFinder::FindRightQuoteIfHas(const CWordsPair& PeriodToPrint, int SentNo,
const CWordsPair& GroupToExclude, yvector<SFactAddress>& FioInQuotes)
{
FioInQuotes.clear();
CSentenceRusProcessor* pSentPrc = GetSentPrc(SentNo);
yset<int> QuoteWords;
Wtroka ClauseStr;
static const Wtroka COMMA = Wtroka::FromAscii(",");
for (int i = PeriodToPrint.FirstWord(); i <= PeriodToPrint.LastWord(); i++) {
const CWord& w = pSentPrc->m_Words.GetOriginalWord(i);
if (!GroupToExclude.Contains(i) &&
!(GroupToExclude.Contains(i - 1) && w.GetText() == COMMA) &&
!(GroupToExclude.Contains(i + 1) && w.GetText() == COMMA)) {
ClauseStr += w.GetOriginalText() + ' ';
QuoteWords.insert(i);
}
}
size_t index = ClauseStr.find('"');
if (index != Wtroka::npos && ClauseStr.rfind('"') == index) {
yset<int> AddQuoteWords;
Wtroka Add;
// если только одна кавычка, то пойдем искать вторую кавычку
int i;
for (i = PeriodToPrint.LastWord()+1; i < (int)pSentPrc->getWordsCount(); i++) {
const CWord& w = pSentPrc->m_Words.GetOriginalWord(i);
if (!GroupToExclude.Contains(i)) {
Add += w.GetOriginalText() + ' ';
AddQuoteWords.insert(i);
}
if (w.HasCloseQuote())
break;
}
if (i != (int)pSentPrc->getWordsCount()) {
ClauseStr += ' ';
ClauseStr += Add;
QuoteWords.insert(AddQuoteWords.begin(), AddQuoteWords.end());
}
}
AddFios(SentNo, QuoteWords, FioInQuotes);
return ClauseStr;
};
开发者ID:Frankie-666,项目名称:tomita-parser,代码行数:48,代码来源:quotesfinder.cpp
示例14: Collapse
void Collapse(Wtroka& w) {
size_t len = w.size();
for (size_t start = 0; start < len; ++start)
{
size_t n = 0;
for (; start + n < len; ++n)
{
if (!IsWhitespace(w[start + n]))
break;
}
if (n > 1 || (n == 1 && w[start] != ' ')) {
w.replace(start, n, 1, ' ');
len = w.size();
}
}
}
开发者ID:Frankie-666,项目名称:tomita-parser,代码行数:18,代码来源:wide.cpp
示例15: IsDoubleQuote
bool CWordBase::IsDoubleQuote(const Wtroka& str)
{
return str.size() == 1 && ::IsQuotation(str[0]) && str[0] != SINGLE_QUOTE_CHAR;
/*return IsChar(str, DOUBLE_QUOTE_CHAR) ||
IsChar(str, 127) || // wtf?
IsChar(str, 0x00BB) || // »
IsChar(str, 0x00AB) || // «
IsChar(str, 0x201C) || // “
IsChar(str, 0x201D); // ”
*/
}
开发者ID:Frankie-666,项目名称:tomita-parser,代码行数:11,代码来源:wordbase.cpp
示例16: RequiresSpace
bool RequiresSpace(const Wtroka& w1, const Wtroka& w2)
{
#define SHIFT(i) (ULL(1)<<(i))
if(w1.length() == 1) {
if (NUnicode::CharHasType(w1[0],
SHIFT(Ps_START) | SHIFT(Ps_SINGLE_QUOTE) | SHIFT(Pi_SINGLE_QUOTE) |
SHIFT(Ps_QUOTE) | SHIFT(Pi_QUOTE)))
return false;
}
if(w2.length() == 1) {
if (NUnicode::CharHasType(w2[0],
SHIFT(Pe_END) | SHIFT(Po_TERMINAL) | SHIFT(Pe_SINGLE_QUOTE) | SHIFT(Pf_SINGLE_QUOTE) |
SHIFT(Pe_QUOTE) | SHIFT(Pf_QUOTE)))
return false;
}
#undef SHIFT
return true;
}
开发者ID:Frankie-666,项目名称:tomita-parser,代码行数:21,代码来源:utilit.cpp
示例17: Wtroka
Wtroka CWordBase::GetOriginalText() const
{
//затычка для дурацких символов, порождаемых для <BR> после конца предложения
if (m_typ == Punct && IsChar(m_txt, 'W'))
return Wtroka();
size_t quoteCount = (m_bHasOpenQuote ? 1 : 0) + (m_bHasCloseQuote ? 1 : 0);
// optimize most frequent case
if (quoteCount == 0)
return m_txt;
Wtroka res;
res.reserve(m_txt.size() + quoteCount);
if (m_bHasOpenQuote)
res += m_bSingleOpenQuote ? SINGLE_QUOTE_CHAR : DOUBLE_QUOTE_CHAR;
res += m_txt;
if (m_bHasCloseQuote)
res += m_bSingleCloseQuote ? SINGLE_QUOTE_CHAR : DOUBLE_QUOTE_CHAR;
return res;
}
开发者ID:Frankie-666,项目名称:tomita-parser,代码行数:21,代码来源:wordbase.cpp
示例18: StripString
bool CQuotesFinder::AddQuoteFact(const SValenciesValues& VerbCommunic, CSentenceRusProcessor* pSent,
const Wtroka& QuoteStr, const yvector<SFactAddress>& FioInQuotes, SLeadInfo LeadInfo)
{
Wtroka qstr = StripString(QuoteStr);
if (qstr.size() < 3)
return false;
if (!(LeadInfo.m_iLastSent == -1 && LeadInfo.m_iFirstSent == -1) &&
!(LeadInfo.m_iFirstSent >= 0 && LeadInfo.m_iLastSent >= 0))
ythrow yexception() << "CQuotesFinder::AddQuoteFact : bad lead info";
if (m_bCreateDBFact) {
DECLARE_STATIC_RUS_WORD(kSUB, "СУБ");
if (!AddQuoteDBFact(VerbCommunic.GetValue(kSUB), pSent, qstr, LeadInfo, true))
return false;
for (size_t i=0; i < FioInQuotes.size(); i++)
if (!AddQuoteDBFact(FioInQuotes[i], GetSentPrc(FioInQuotes[i].m_iSentNum), qstr, LeadInfo, false))
return false;
} else
AddSimpleQuoteFact(VerbCommunic, pSent, qstr, LeadInfo);
return true;
}
开发者ID:Frankie-666,项目名称:tomita-parser,代码行数:23,代码来源:quotesfinder.cpp
示例19: ResolveArticleByTitle
bool CParserOptions::ResolveArticleByTitle(const Wtroka& title, yset<SArtPointer>& resolved) const {
typedef NGzt::TArticlePool::TTitleMap TTitleMap;
const wchar16 STAR = '*';
size_t maxSize = resolved.size() + 100;
bool found = false;
TWtringBuf suffix = title;
TWtringBuf preffix = suffix.NextTok(STAR);
TTitleMap subtrie = Singleton<CDictsHolder>()->GetGztArticleIndex()->FindTails(~preffix, +preffix);
for (TTitleMap::TConstIterator it = subtrie.Begin(); it != subtrie.End(); ++it) {
if (!it.IsEmpty()) {
Wtroka key = it.GetKey();
key.prepend(preffix);
if (CDictsHolder::IsTitleMatch(key, title)) {
if (title != key)
Cerr << "XML parameters: " << NStr::DebugEncode(title) << " -> " << NStr::DebugEncode(key) << Endl;
resolved.insert(SArtPointer(key));
found = true;
}
}
if (resolved.size() >= maxSize) {
Cerr << "XML parameters: too many gzt-articles correspond to \"" << NStr::DebugEncode(title)
<< "\". Please use more specific name." << Endl;
ythrow yexception() << "Too many gzt-articles resolved.";
}
}
// it also could be a situation article name from aux_dic_kw.cxx
if (Singleton<CDictsHolder>()->GetDict(KW_DICT).has_article(title)) {
resolved.insert(SArtPointer(title));
found = true;
}
return found;
}
开发者ID:Frankie-666,项目名称:tomita-parser,代码行数:37,代码来源:parseroptions.cpp
示例20: DECLARE_STATIC_RUS_WORD
bool CQuotesFinder::CreateQuoteValue(const Wtroka& quoteStr, CFactFields& factFields) const
{
DECLARE_STATIC_RUS_WORD(kChto, "что ");
DECLARE_STATIC_RUS_WORD(kChtoby, "чтобы ");
Wtroka qstr = quoteStr;
CTextWS QuoteWS;
if (qstr.has_prefix(kChto))
qstr.erase(0,4);
if (qstr.has_prefix(kChtoby))
qstr.erase(0,6);
if (qstr.empty())
return false;
qstr = StripString(qstr);
if (qstr.size() > 1 && qstr[0] =='"' && qstr.back() =='"')
qstr = qstr.substr(1, qstr.size() - 2);
qstr = StripString(qstr);
QuoteWS.AddLemma(SWordSequenceLemma(qstr));
QuoteWS.SetArtificialPair(CWordsPair(0,0));
factFields.AddValue(CFactFields::QuoteValue, QuoteWS);
return true;
}
开发者ID:Frankie-666,项目名称:tomita-parser,代码行数:24,代码来源:quotesfinder.cpp
注:本文中的Wtroka类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论