本文整理汇总了C++中tokenizer类的典型用法代码示例。如果您正苦于以下问题:C++ tokenizer类的具体用法?C++ tokenizer怎么用?C++ tokenizer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了tokenizer类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: read_scene
void read_scene(tokenizer& t,document_type& doc)
{
//std::cerr << "unsupported data: Scene" << std::endl;
OutputDebugStringA("Scene>>>>");
t.expect_literal( "{" );
for( ;; ) {
substr token = t();
OutputDebugStringA((token.str() + "\n").c_str());
if( token == "}" ) { break; }
if( token == "amb" ) {
doc.scene.ambient.red = t.expect_float(0,1);
doc.scene.ambient.green = t.expect_float(0,1);
doc.scene.ambient.blue = t.expect_float(0,1);
doc.scene.ambient.alpha = 1;
t.expect_linefeed();
} else if( token == "dirlights" ) {
t();
skip_chunk(t);
skip_to_linefeed(t);
} else {
skip_to_linefeed(t);
}
}
OutputDebugStringA("Scene<<<<");
}
开发者ID:jonigata,项目名称:partix,代码行数:25,代码来源:mqoreader.cpp
示例2: recognize_vertical
void recognize_vertical(istream& is, ostream& os, const ner& recognizer, tokenizer& tokenizer) {
string para;
vector<string_piece> forms;
vector<named_entity> entities;
unsigned total_tokens = 0;
string entity_ids, entity_text;
while (getpara(is, para)) {
// Tokenize and tag
tokenizer.set_text(para);
while (tokenizer.next_sentence(&forms, nullptr)) {
recognizer.recognize(forms, entities);
sort_entities(entities);
for (auto&& entity : entities) {
entity_ids.clear();
entity_text.clear();
for (auto i = entity.start; i < entity.start + entity.length; i++) {
if (i > entity.start) {
entity_ids += ',';
entity_text += ' ';
}
entity_ids += to_string(total_tokens + i + 1);
entity_text.append(forms[i].str, forms[i].len);
}
os << entity_ids << '\t' << entity.type << '\t' << entity_text << '\n';
}
os << flush;
total_tokens += forms.size() + 1;
}
}
}
开发者ID:ufal,项目名称:nametag,代码行数:32,代码来源:run_ner.cpp
示例3: extract_identifier
template <typename R> bool extract_identifier(R& result,tokenizer& tokenizer)
{
result.clear();
//buffer
array<ascii> buffer;
array<ascii> current;
//identifier
if(!tokenizer.identifier(current))
return false;
buffer.append(current);
//word
if(tokenizer.word(current))
buffer.append(current);
//delimited
if(!tokenizer.is_delimited())
return false;
//commit
result=buffer;
update(symbols()._identifier,buffer);
return true;
}
开发者ID:vmorgulys,项目名称:sandbox,代码行数:34,代码来源:class.token.h.extract.cpp
示例4: extract_word
template <typename R> bool extract_word(R& result,tokenizer& tokenizer)
{
result.clear();
//buffer
array<ascii> buffer;
//word
if(!tokenizer.word(buffer))
return false;
//delimited
if(!tokenizer.is_delimited())
return false;
//commit
result=buffer;
update(symbols()._word,buffer);
return true;
}
开发者ID:vmorgulys,项目名称:sandbox,代码行数:26,代码来源:class.token.h.extract.cpp
示例5: tag_xml
void tag_xml(istream& is, ostream& os, const tagger& tagger, tokenizer& tokenizer, const tagset_converter& tagset_converter, const derivation_formatter& derivation, morpho::guesser_mode guesser) {
string para;
vector<string_piece> forms;
vector<tagged_lemma> tags;
while (getpara(is, para)) {
// Tokenize and tag
tokenizer.set_text(para);
const char* unprinted = para.c_str();
while (tokenizer.next_sentence(&forms, nullptr)) {
tagger.tag(forms, tags, guesser);
for (unsigned i = 0; i < forms.size(); i++) {
tagset_converter.convert(tags[i]);
derivation.format_derivation(tags[i].lemma);
os << xml_encoded(string_piece(unprinted, forms[i].str - unprinted));
if (!i) os << "<sentence>";
os << "<token lemma=\"" << xml_encoded(tags[i].lemma, true) << "\" tag=\"" << xml_encoded(tags[i].tag, true) << "\">"
<< xml_encoded(forms[i]) << "</token>";
if (i + 1 == forms.size()) os << "</sentence>";
unprinted = forms[i].str + forms[i].len;
}
}
os << xml_encoded(string_piece(unprinted, para.c_str() + para.size() - unprinted)) << flush;
}
}
开发者ID:ufal,项目名称:morphodita,代码行数:27,代码来源:run_tagger.cpp
示例6: wowEvent
spellEvent::spellEvent(tokenizer& t) : wowEvent(t)
{
spellID = asInt(t.token(9));
string spellName = t.token(10); trimQuotes(spellName);
spells[spellID] = spellName;
spellSchool = asuIntFromHexa(t.token(11));
}
开发者ID:alhunor,项目名称:projects,代码行数:7,代码来源:events.cpp
示例7: is_next
bool number::is_next(tokenizer &tokens, int i, void *data)
{
while (tokens.peek_char(i) == '-' || tokens.peek_char(i) == '+' || tokens.peek_char(i) == '.')
i++;
return (tokens.peek_char(i) >= '0' && tokens.peek_char(i) <= '9');
}
开发者ID:yuchien302,项目名称:skeleton,代码行数:7,代码来源:number.cpp
示例8: is_next
bool statement::is_next(tokenizer &tokens, int i, void *data)
{
return (node_id::is_next(tokens, i, data) ||
tokens.is_next("subgraph") ||
tokens.is_next("graph") ||
tokens.is_next("node") ||
tokens.is_next("edge"));
}
开发者ID:nbingham1,项目名称:parse_dot,代码行数:8,代码来源:statement.cpp
示例9: tokenize_vertical
void tokenize_vertical(istream& is, ostream& os, tokenizer& tokenizer) {
string para;
vector<string_piece> forms;
while (getpara(is, para)) {
// Tokenize
tokenizer.set_text(para);
while (tokenizer.next_sentence(&forms, nullptr)) {
for (auto&& form : forms) {
os << form << '\n';
}
os << '\n' << flush;
}
}
}
开发者ID:ufal,项目名称:nametag,代码行数:14,代码来源:run_tokenizer.cpp
示例10: stmt_def_field
inline stmt_def_field(const statement&parent,const token&tk,tokenizer&t):
statement{parent,tk},
ident_{t.next_token()}
{
if(ident_.is_name(""))
throw compiler_error(ident_,"expected field name");
if(!t.is_next_char('{'))
throw compiler_error(ident_,"expected '{' initial value then '}' ",ident_.name());
while(true){
if(t.is_next_char('}'))break;
tokens_.push_back(t.next_token());
}
}
开发者ID:calint,项目名称:compiler-2,代码行数:15,代码来源:stmt_def_field.hpp
示例11: extract_control
template <typename R> bool extract_control(R& result,tokenizer& tokenizer)
{
result.clear();
//controls
dictionary<string,id<string>> controls=
{
"\r",symbols()._cr,
"\n",symbols()._lf
};
//buffer
array<ascii> buffer;
//any
if(!tokenizer.any(buffer,controls.keys()))
return false;
//commit
result=buffer;
update(controls[buffer.join("")],buffer);
return true;
}
开发者ID:vmorgulys,项目名称:sandbox,代码行数:29,代码来源:class.token.h.extract.cpp
示例12: stmt_def_func_param
inline stmt_def_func_param(const statement&parent,tokenizer&t):
statement{parent,t.next_token()}
{
assert(!tok().is_name(""));
if(!t.is_next_char(':'))
return;
while(true){
if(t.is_eos())throw compiler_error(*this,"unexpected end of stream",tok().name_copy());
keywords_.push_back(t.next_token());
if(t.is_next_char(':'))
continue;
break;
}
}
开发者ID:calint,项目名称:compiler-2,代码行数:16,代码来源:stmt_def_func_param.hpp
示例13: parse
void attribute_list::parse(tokenizer &tokens, void *data)
{
tokens.syntax_start(this);
tokens.increment(false);
tokens.expect<assignment_list>();
while (tokens.decrement(__FILE__, __LINE__, data))
{
attributes.push_back(assignment_list(tokens, data));
tokens.increment(false);
tokens.expect<assignment_list>();
}
tokens.syntax_end(this);
}
开发者ID:nbingham1,项目名称:parse_dot,代码行数:17,代码来源:attribute_list.cpp
示例14: read_scene
void read_scene(tokenizer& t,document_type& doc)
{
//std::cerr << "unsupported data: Scene" << std::endl;
t.expect_literal( "{" );
for( ;; ) {
substr token = t();
if( token == "}" ) { break; }
if( token == "amb" ) {
doc.scene.ambient.red = t.expect_float(0,1);
doc.scene.ambient.green = t.expect_float(0,1);
doc.scene.ambient.blue = t.expect_float(0,1);
doc.scene.ambient.alpha = 1;
t.expect_linefeed();
} else {
skip_to_linefeed(t);
}
}
}
开发者ID:jonigata,项目名称:yamadumi,代码行数:18,代码来源:mqoreader.cpp
示例15: tokenize_xml
static void tokenize_xml(istream& is, ostream& os, tokenizer& tokenizer) {
string para;
vector<string_piece> forms;
while (getpara(is, para)) {
// Tokenize
tokenizer.set_text(para);
const char* unprinted = para.c_str();
while (tokenizer.next_sentence(&forms, nullptr))
for (unsigned i = 0; i < forms.size(); i++) {
if (unprinted < forms[i].str) os << xml_encoded(string_piece(unprinted, forms[i].str - unprinted));
if (!i) os << "<sentence>";
os << "<token>" << xml_encoded(forms[i]) << "</token>";
if (i + 1 == forms.size()) os << "</sentence>";
unprinted = forms[i].str + forms[i].len;
}
if (unprinted < para.c_str() + para.size()) os << xml_encoded(string_piece(unprinted, para.c_str() + para.size() - unprinted));
os << flush;
}
}
开发者ID:ufal,项目名称:nametag,代码行数:20,代码来源:run_tokenizer.cpp
示例16: tag_vertical
void tag_vertical(istream& is, ostream& os, const tagger& tagger, tokenizer& tokenizer, const tagset_converter& tagset_converter, const derivation_formatter& derivation, morpho::guesser_mode guesser) {
string para;
vector<string_piece> forms;
vector<tagged_lemma> tags;
while (getpara(is, para)) {
// Tokenize and tag
tokenizer.set_text(para);
while (tokenizer.next_sentence(&forms, nullptr)) {
tagger.tag(forms, tags, guesser);
for (unsigned i = 0; i < tags.size(); i++) {
tagset_converter.convert(tags[i]);
derivation.format_derivation(tags[i].lemma);
os << forms[i] << '\t' << tags[i].lemma << '\t' << tags[i].tag << '\n';
}
os << endl;
}
}
}
开发者ID:ufal,项目名称:morphodita,代码行数:20,代码来源:run_tagger.cpp
示例17: asInt
damage::damage(tokenizer& t, int offset)
{
dmgDone = asInt(t.token(22+offset));
overkill = asInt(t.token(23+offset));
magicSchool = asInt(t.token(24+offset));
resisted = asInt(t.token(25 + offset));
blocked = asInt(t.token(26 + offset));
absorbed = asInt(t.token(27 + offset));
critical = asInt(t.token(28 + offset));
glancing = asInt(t.token(29 + offset));
crushing = asInt(t.token(30 + offset));
multistrike = asInt(t.token(31 + offset));
}
开发者ID:alhunor,项目名称:projects,代码行数:13,代码来源:events.cpp
示例18: recognize_untokenized
void recognize_untokenized(istream& is, ostream& os, const ner& recognizer, tokenizer& tokenizer) {
string para;
vector<string_piece> forms;
vector<named_entity> entities;
vector<size_t> entity_ends;
while (getpara(is, para)) {
// Tokenize the text and find named entities
tokenizer.set_text(para);
const char* unprinted = para.c_str();
while (tokenizer.next_sentence(&forms, nullptr)) {
recognizer.recognize(forms, entities);
sort_entities(entities);
for (unsigned i = 0, e = 0; i < forms.size(); i++) {
if (unprinted < forms[i].str) os << xml_encoded(string_piece(unprinted, forms[i].str - unprinted));
if (i == 0) os << "<sentence>";
// Open entities starting at current token
for (; e < entities.size() && entities[e].start == i; e++) {
os << "<ne type=\"" << xml_encoded(entities[e].type, true) << "\">";
entity_ends.push_back(entities[e].start + entities[e].length - 1);
}
// The token itself
os << "<token>" << xml_encoded(forms[i]) << "</token>";
// Close entities ending after current token
while (!entity_ends.empty() && entity_ends.back() == i) {
os << "</ne>";
entity_ends.pop_back();
}
if (i + 1 == forms.size()) os << "</sentence>";
unprinted = forms[i].str + forms[i].len;
}
}
// Write rest of the text (should be just spaces)
if (unprinted < para.c_str() + para.size()) os << xml_encoded(string_piece(unprinted, para.c_str() + para.size() - unprinted));
os << flush;
}
}
开发者ID:ufal,项目名称:nametag,代码行数:41,代码来源:run_ner.cpp
示例19: cur
std::list<toSQLParse::statement> toSQLParse::parse(tokenizer &tokens)
{
std::list<toSQLParse::statement> ret;
statement cur(statement::Statement);
for (cur = parseStatement(tokens, false, false);
cur.subTokens().begin() != cur.subTokens().end();
cur = parseStatement(tokens, false, false))
{
if (cur.Type == statement::List)
{
QMessageBox::warning(QApplication::activeWindow(), "Sqliteman",
"toSQLparse: Unbalanced parenthesis (Too many ')')");
}
ret.insert(ret.end(), cur);
}
QString str = tokens.remaining(false);
if (!str.isEmpty())
ret.insert(ret.end(), statement(statement::Raw,
str, tokens.line()));
return ret;
}
开发者ID:MatiasNAmendola,项目名称:sqliteman,代码行数:21,代码来源:tosqlparse.cpp
示例20: recognize_conll
void recognize_conll(istream& is, ostream& os, const ner& recognizer, tokenizer& tokenizer) {
string para;
vector<string_piece> forms;
vector<named_entity> entities;
while (getpara(is, para)) {
// Tokenize and tag
tokenizer.set_text(para);
while (tokenizer.next_sentence(&forms, nullptr)) {
recognizer.recognize(forms, entities);
sort_entities(entities);
string entity_type;
unsigned in_entity = 0;
bool entity_start;
for (unsigned i = 0, e = 0; i < forms.size(); i++) {
if (!in_entity && e < entities.size() && entities[e].start == i) {
in_entity = entities[e].length;
entity_start = true;
entity_type = entities[e].type;
e++;
}
os << forms[i] << '\t';
if (in_entity) {
os << (entity_start ? "B-" : "I-") << entity_type;
entity_start = false;
in_entity--;
} else {
os << '_';
}
os << '\n';
}
os << '\n' << flush;
}
}
}
开发者ID:ufal,项目名称:nametag,代码行数:38,代码来源:run_ner.cpp
注:本文中的tokenizer类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论