本文整理汇总了C++中Unicode类的典型用法代码示例。如果您正苦于以下问题:C++ Unicode类的具体用法?C++ Unicode怎么用?C++ Unicode使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Unicode类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: tag
bool tag(const string& src, vector<pair<string, string> >& res) const {
vector<string> cutRes;
if (!segment_.cut(src, cutRes)) {
LogError("mixSegment_ cut failed");
return false;
}
const DictUnit *tmp = NULL;
Unicode unico;
const DictTrie * dict = segment_.getDictTrie();
assert(dict != NULL);
for (vector<string>::iterator itr = cutRes.begin(); itr != cutRes.end(); ++itr) {
if (!TransCode::decode(*itr, unico)) {
LogError("decode failed.");
return false;
}
tmp = dict->find(unico.begin(), unico.end());
if(tmp == NULL || tmp->tag.empty()) {
res.push_back(make_pair(*itr, specialRule_(unico)));
} else {
res.push_back(make_pair(*itr, tmp->tag));
}
}
return !res.empty();
}
开发者ID:AllanXiang,项目名称:cppjieba,代码行数:25,代码来源:PosTagger.hpp
示例2: IsSingleWord
bool IsSingleWord(const string& str) const {
Unicode unicode;
TransCode::Decode(str, unicode);
if (unicode.size() == 1)
return true;
return false;
}
开发者ID:amikey,项目名称:cppjieba,代码行数:7,代码来源:KeywordExtractor.hpp
示例3: _loadEmitProb
bool _loadEmitProb(const string &line, EmitProbMap &mp)
{
if (line.empty())
{
return false;
}
vector<string> tmp, tmp2;
Unicode unicode;
split(line, tmp, ",");
for (size_t i = 0; i < tmp.size(); i++)
{
split(tmp[i], tmp2, ":");
if (2 != tmp2.size())
{
Rcout << "_emitProb illegal." << std::endl;
return false;
}
if (!TransCode::decode(tmp2[0], unicode) || unicode.size() != 1)
{
Rcout << "TransCode failed." << std::endl;
return false;
}
mp[unicode[0]] = atof(tmp2[1].c_str());
}
return true;
}
开发者ID:hoyoung2015,项目名称:jiebaR,代码行数:26,代码来源:HMMSegment.hpp
示例4: tag
bool tag(const string& src, vector<pair<string, string> >& res) const
{
vector<string> cutRes;
if (!_segment.cut(src, cutRes))
{
Rcout<<"_mixSegment cut failed"<<std::endl;
return false;
}
const DictUnit *tmp = NULL;
Unicode unico;
for (vector<string>::iterator itr = cutRes.begin(); itr != cutRes.end(); ++itr)
{
if (!TransCode::decode(*itr, unico))
{
Rcout<<"decode failed."<<std::endl;
return false;
}
tmp = _dictTrie->find(unico.begin(), unico.end());
if(tmp == NULL || tmp->tag.empty())
{
res.push_back(make_pair(*itr, _specialRule(unico)));
}
else
{
res.push_back(make_pair(*itr, tmp->tag));
} }
return !res.empty();
}
开发者ID:hoyoung2015,项目名称:jiebaR,代码行数:31,代码来源:PosTagger.hpp
示例5: tag
bool tag(const string& src, vector<pair<string, string> >& res)
{
assert(_getInitFlag());
vector<string> cutRes;
if (!_segment.cut(src, cutRes))
{
LogError("_mixSegment cut failed");
return false;
}
const DictUnit *tmp = NULL;
Unicode unico;
for (vector<string>::iterator itr = cutRes.begin(); itr != cutRes.end(); ++itr)
{
if (!TransCode::decode(*itr, unico))
{
LogError("decode failed.");
return false;
}
tmp = _dictTrie.find(unico.begin(), unico.end());
res.push_back(make_pair(*itr, tmp == NULL ? "x" : tmp->tag));
}
tmp = NULL;
return !res.empty();
}
开发者ID:Axure,项目名称:nodejieba,代码行数:25,代码来源:PosTagger.hpp
示例6: cut
virtual bool cut(const string& str, vector<vector<string> >& vres) const {
vres.clear();
Unicode unicode;
unicode.reserve(str.size());
TransCode::decode(str, unicode);
Unicode::const_iterator left = unicode.begin();
Unicode::const_iterator right;
for(right = unicode.begin(); right != unicode.end(); right++) {
if(isIn(specialSymbols_, *right)) {
if(left != right) {
cut(left, right, vres);
}
for (vector<vector<string> >::iterator itr = vres.begin(); itr != vres.end(); ++itr) {
itr->resize(itr->size() + 1);
TransCode::encode(right, right + 1, itr->back());
}
left = right + 1;
}
}
if(left != right) {
cut(left, right, vres);
}
return true;
}
开发者ID:FireBrother,项目名称:CPPJiebaSE,代码行数:29,代码来源:MultiSegmentBase.hpp
示例7: TEST
TEST(DictTrieTest, automation) {
DictTrie trie(DICT_FILE, "../test/testdata/userdict.utf8");
//string word = "yasherhs";
string word = "abcderf";
Unicode unicode;
ASSERT_TRUE(TransCode::decode(word, unicode));
vector<struct SegmentChar> res;
trie.find(unicode.begin(), unicode.end(), res);
}
开发者ID:songcheng,项目名称:cppjieba,代码行数:9,代码来源:TTrie.cpp
示例8: TEST
TEST(DictTrieTest, Test1) {
string s1, s2;
DictTrie trie(DICT_FILE);
ASSERT_LT(trie.GetMinWeight() + 15.6479, 0.001);
string word("来到");
Unicode uni;
ASSERT_TRUE(TransCode::Decode(word, uni));
DictUnit nodeInfo;
nodeInfo.word = uni;
nodeInfo.tag = "v";
nodeInfo.weight = -8.87033;
s1 << nodeInfo;
s2 << (*trie.Find(uni.begin(), uni.end()));
EXPECT_EQ("[\"26469\", \"21040\"] v -8.870", s2);
word = "清华大学";
LocalVector<pair<size_t, const DictUnit*> > res;
const char * words[] = {"清", "清华", "清华大学"};
for (size_t i = 0; i < sizeof(words)/sizeof(words[0]); i++) {
ASSERT_TRUE(TransCode::Decode(words[i], uni));
res.push_back(make_pair(uni.size() - 1, trie.Find(uni.begin(), uni.end())));
//resMap[uni.size() - 1] = trie.Find(uni.begin(), uni.end());
}
vector<pair<size_t, const DictUnit*> > vec;
vector<struct Dag> dags;
ASSERT_TRUE(TransCode::Decode(word, uni));
trie.Find(uni.begin(), uni.end(), dags);
ASSERT_EQ(dags.size(), uni.size());
ASSERT_NE(dags.size(), 0u);
s1 << res;
s2 << dags[0].nexts;
ASSERT_EQ(s1, s2);
}
开发者ID:arrack,项目名称:cppjieba,代码行数:34,代码来源:trie_test.cpp
示例9: DecodeRunesInString
inline bool DecodeRunesInString(const char* s, size_t len, Unicode& unicode) {
unicode.clear();
RuneStrArray runes;
if (!DecodeRunesInString(s, len, runes)) {
return false;
}
unicode.reserve(runes.size());
for (size_t i = 0; i < runes.size(); i++) {
unicode.push_back(runes[i].rune);
}
return true;
}
开发者ID:yanyiwu,项目名称:cppjieba,代码行数:12,代码来源:Unicode.hpp
示例10: cut
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const
{
assert(_getInitFlag());
vector<Unicode> words;
if(!_mpSeg.cut(begin, end, words))
{
LogError("mpSeg cutDAG failed.");
return false;
}
vector<Unicode> hmmRes;
Unicode piece;
for (size_t i = 0, j = 0; i < words.size(); i++)
{
//if mp get a word, it's ok, put it into result
if (1 != words[i].size() || (words[i].size() == 1 && _mpSeg.isUserDictSingleChineseWord(words[i][0])))
{
res.push_back(words[i]);
continue;
}
// if mp get a single one and it is not in userdict, collect it in sequence
j = i;
while (j < words.size() && 1 == words[j].size() && !_mpSeg.isUserDictSingleChineseWord(words[j][0]))
{
piece.push_back(words[j][0]);
j++;
}
// cut the sequence with hmm
if (!_hmmSeg.cut(piece.begin(), piece.end(), hmmRes))
{
LogError("_hmmSeg cut failed.");
return false;
}
//put hmm result to result
for (size_t k = 0; k < hmmRes.size(); k++)
{
res.push_back(hmmRes[k]);
}
//clear tmp vars
piece.clear();
hmmRes.clear();
//let i jump over this piece
i = j - 1;
}
return true;
}
开发者ID:sdu2011,项目名称:cppjieba,代码行数:52,代码来源:MixSegment.hpp
示例11: LogFatal
bool Trie::find(const Unicode& unico, vector<pair<uint, const TrieNodeInfo*> >& res)const
{
if(!_getInitFlag())
{
LogFatal("trie not initted!");
return false;
}
TrieNode* p = _root;
//for(Unicode::const_iterator it = begin; it != end; it++)
for(uint i = 0; i < unico.size(); i++)
{
if(p->hmap.find(unico[i]) == p-> hmap.end())
{
break;
}
p = p->hmap[unico[i]];
if(p->isLeaf)
{
uint pos = p->nodeInfoVecPos;
if(pos < _nodeInfoVec.size())
{
res.push_back(make_pair(i, &_nodeInfoVec[pos]));
}
else
{
LogFatal("node's nodeInfoVecPos is out of _nodeInfoVec's range");
return false;
}
}
}
return !res.empty();
}
开发者ID:Alienfeel,项目名称:cppjieba,代码行数:32,代码来源:Trie.cpp
示例12: cut
bool cut(const string& sentence,
vector<string>& words,
size_t max_word_len) const {
Unicode unicode;
if (!TransCode::decode(sentence, unicode)) {
return false;
}
vector<Unicode> unicodeWords;
cut(unicode.begin(), unicode.end(),
unicodeWords, max_word_len);
words.resize(unicodeWords.size());
for (size_t i = 0; i < words.size(); i++) {
TransCode::encode(unicodeWords[i], words[i]);
}
return true;
}
开发者ID:Sandy4321,项目名称:chinese_nlp,代码行数:16,代码来源:MPSegment.hpp
示例13: IsAllAscii
bool IsAllAscii(const Unicode& s) const {
for(size_t i = 0; i < s.size(); i++) {
if (s[i] >= 0x80) {
return false;
}
}
return true;
}
开发者ID:dreamsxin,项目名称:pg_jieba,代码行数:8,代码来源:QuerySegment.hpp
示例14: cut
bool cut(const string& str, vector<TrieNodeInfo>& segWordInfos)const
{
if(!_getInitFlag())
{
LogError("not inited.");
return false;
}
if(str.empty())
{
return false;
}
Unicode sentence;
if(!TransCode::decode(str, sentence))
{
LogError("TransCode::decode failed.");
return false;
}
return cut(sentence.begin(), sentence.end(), segWordInfos);
}
开发者ID:fc13240,项目名称:cppjieba,代码行数:21,代码来源:MPSegment.hpp
示例15: LoadEmitProb
bool LoadEmitProb(const std::string& line, EmitProbMap& mp) {
if (line.empty()) {
return false;
}
std::vector<std::string> tmp, tmp2;
Unicode unicode;
limonp::Split(line, tmp, ",");
for (size_t i = 0; i < tmp.size(); i++) {
limonp::Split(tmp[i], tmp2, ":");
if (2 != tmp2.size()) {
// limonp::LOG(ERROR) << "emitProb illegal.";
return false;
}
if (!TransCode::Decode(tmp2[0], unicode) || unicode.size() != 1) {
// limonp::LOG(ERROR) << "TransCode failed.";
return false;
}
mp[unicode[0]] = atof(tmp2[1].c_str());
}
return true;
}
开发者ID:samevers,项目名称:posTrunk,代码行数:21,代码来源:HMMModel.hpp
示例16: Tag
bool Tag(const string& src, vector<pair<string, string> >& res) const {
vector<string> CutRes;
segment_.Cut(src, CutRes);
const DictUnit *tmp = NULL;
Unicode unico;
const DictTrie * dict = segment_.GetDictTrie();
assert(dict != NULL);
for (vector<string>::iterator itr = CutRes.begin(); itr != CutRes.end(); ++itr) {
if (!TransCode::Decode(*itr, unico)) {
LOG(ERROR) << "Decode failed.";
return false;
}
tmp = dict->Find(unico.begin(), unico.end());
if (tmp == NULL || tmp->tag.empty()) {
res.push_back(make_pair(*itr, SpecialRule(unico)));
} else {
res.push_back(make_pair(*itr, tmp->tag));
}
}
return !res.empty();
}
开发者ID:1271281914,项目名称:simhash,代码行数:22,代码来源:PosTagger.hpp
示例17: encode
inline Bytes
encode(const Unicode& object) const
{
int state;
size_t offset = 0;
size_t enclen = 0;
bytechar* encptr = NULL;
size_t declen = object.length();
const unicode* decptr = static_cast<const unicode*>(object);
state = u8_encode(decptr, declen, encptr, enclen, offset);
if (state != UNICODE_STATE_SUCCESS)
throw EncodeError(state, offset, "UTF-8");
return Bytes(encptr, enclen);
}
开发者ID:pombredanne,项目名称:quirinus,代码行数:14,代码来源:UTF8Codec.hpp
示例18: cut
virtual bool cut(const string& str, vector<string>& res)const
{
assert(_getInitFlag());
Unicode unicode;
TransCode::decode(str, unicode);
res.clear();
Unicode::const_iterator left = unicode.begin();
Unicode::const_iterator right = unicode.begin();
string oneword;
while(right != unicode.end())
{
if(isIn(_specialSymbols, *right))
{
if(left != right)
{
cut(left, right, res);
}
TransCode::encode(right, right + 1, oneword);
res.push_back(oneword);
right ++;
left = right;
}
else
{
right ++;
}
}
if(left != right)
{
cut(left, right, res);
}
return true;
}
开发者ID:anboqing,项目名称:MiniSearchEngine,代码行数:37,代码来源:SegmentBase.hpp
示例19: InsertNode
void InsertNode(const Unicode& key, const DictUnit* ptValue) {
if (key.begin() == key.end()) {
return;
}
TrieNode::NextMap::const_iterator kmIter;
Unicode::const_iterator citer= key.begin();
TrieNode *ptNode = _base + (*(citer++));
for (; citer != key.end(); citer++) {
if (NULL == ptNode->next) {
ptNode->next = new TrieNode::NextMap;
}
kmIter = ptNode->next->find(*citer);
if (ptNode->next->end() == kmIter) {
TrieNode *nextNode = new TrieNode;
(*(ptNode->next))[*citer] = nextNode;
ptNode = nextNode;
} else {
ptNode = kmIter->second;
}
}
ptNode->ptValue = ptValue;
}
开发者ID:samevers,项目名称:posTrunk,代码行数:24,代码来源:Trie.hpp
示例20: encode
inline Bytes
encode(const Unicode& object) const
{
unicode code = 0;
size_t offset = 0;
bytechar* encptr = NULL;
size_t len = object.length();
const unicode* decptr = object;
encptr = new bytechar[len];
for (size_t i = 0; i < len; ++i)
{
code = this->uctobyte(decptr[i]);
if (code == 0x110000)
{
delete[] decptr;
throw EncodeError(UNICODE_STATE_ILLEGAL, offset, *this);
}
encptr[i] = static_cast<bytechar>(code);
++offset;
}
return Bytes(encptr, len);
}
开发者ID:pombredanne,项目名称:quirinus,代码行数:22,代码来源:SBCodec.hpp
注:本文中的Unicode类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论