本文整理汇总了C++中WERD_CHOICE类的典型用法代码示例。如果您正苦于以下问题:C++ WERD_CHOICE类的具体用法?C++ WERD_CHOICE怎么用?C++ WERD_CHOICE使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了WERD_CHOICE类的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: TextLength
// Make a text string from the internal data structures.
// The input page_res is deleted.
char* TessBaseAPI::TesseractToText(PAGE_RES* page_res) {
if (page_res != NULL) {
int total_length = TextLength(page_res);
PAGE_RES_IT page_res_it(page_res);
char* result = new char[total_length];
char* ptr = result;
for (page_res_it.restart_page(); page_res_it.word () != NULL;
page_res_it.forward()) {
WERD_RES *word = page_res_it.word();
WERD_CHOICE* choice = word->best_choice;
if (choice != NULL) {
strcpy(ptr, choice->string().string());
ptr += strlen(ptr);
if (word->word->flag(W_EOL))
*ptr++ = '\n';
else
*ptr++ = ' ';
}
}
*ptr++ = '\n';
*ptr = '\0';
delete page_res;
return result;
}
return NULL;
}
开发者ID:GaryShearer,项目名称:BasicOCR,代码行数:28,代码来源:baseapi.cpp
示例2: AcceptableChoice
bool Dict::AcceptableChoice(const WERD_CHOICE& best_choice,
XHeightConsistencyEnum xheight_consistency) {
float CertaintyThreshold = stopper_nondict_certainty_base;
int WordSize;
if (stopper_no_acceptable_choices) return false;
if (best_choice.length() == 0) return false;
bool no_dang_ambigs = !best_choice.dangerous_ambig_found();
bool is_valid_word = valid_word_permuter(best_choice.permuter(), false);
bool is_case_ok = case_ok(best_choice, getUnicharset());
if (stopper_debug_level >= 1) {
const char *xht = "UNKNOWN";
switch (xheight_consistency) {
case XH_GOOD: xht = "NORMAL"; break;
case XH_SUBNORMAL: xht = "SUBNORMAL"; break;
case XH_INCONSISTENT: xht = "INCONSISTENT"; break;
default: xht = "UNKNOWN";
}
tprintf("\nStopper: %s (word=%c, case=%c, xht_ok=%s=[%g,%g])\n",
best_choice.unichar_string().string(),
(is_valid_word ? 'y' : 'n'),
(is_case_ok ? 'y' : 'n'),
xht,
best_choice.min_x_height(),
best_choice.max_x_height());
}
// Do not accept invalid words in PASS1.
if (reject_offset_ <= 0.0f && !is_valid_word) return false;
if (is_valid_word && is_case_ok) {
WordSize = LengthOfShortestAlphaRun(best_choice);
WordSize -= stopper_smallword_size;
if (WordSize < 0)
WordSize = 0;
CertaintyThreshold += WordSize * stopper_certainty_per_char;
}
if (stopper_debug_level >= 1)
tprintf("Stopper: Rating = %4.1f, Certainty = %4.1f, Threshold = %4.1f\n",
best_choice.rating(), best_choice.certainty(), CertaintyThreshold);
if (no_dang_ambigs &&
best_choice.certainty() > CertaintyThreshold &&
xheight_consistency < XH_INCONSISTENT &&
UniformCertainties(best_choice)) {
return true;
} else {
if (stopper_debug_level >= 1) {
tprintf("AcceptableChoice() returned false"
" (no_dang_ambig:%d cert:%.4g thresh:%g uniform:%d)\n",
no_dang_ambigs, best_choice.certainty(),
CertaintyThreshold,
UniformCertainties(best_choice));
}
return false;
}
}
开发者ID:ming-hai,项目名称:tesseract,代码行数:59,代码来源:stopper.cpp
示例3: count_alphas
inT16 Tesseract::count_alphas(const WERD_CHOICE &word) {
int count = 0;
for (int i = 0; i < word.length(); ++i) {
if (word.unicharset()->get_isalpha(word.unichar_id(i)))
count++;
}
return count;
}
开发者ID:mehulsbhatt,项目名称:MyOCRTEST,代码行数:8,代码来源:output.cpp
示例4: absolute_garbage
bool Dict::absolute_garbage(const WERD_CHOICE &word,
const UNICHARSET &unicharset) {
if (word.length() < kMinAbsoluteGarbageWordLength) return false;
int num_alphanum = 0;
for (int x = 0; x < word.length(); ++x) {
num_alphanum += (unicharset.get_isalpha(word.unichar_id(x)) ||
unicharset.get_isdigit(word.unichar_id(x)));
}
return (static_cast<float>(num_alphanum) /
static_cast<float>(word.length()) < kMinAbsoluteGarbageAlphanumFrac);
}
开发者ID:0ximDigital,项目名称:appsScanner,代码行数:11,代码来源:context.cpp
示例5: UniformCertainties
int Dict::UniformCertainties(const WERD_CHOICE& word) {
float Certainty;
float WorstCertainty = MAX_FLOAT32;
float CertaintyThreshold;
FLOAT64 TotalCertainty;
FLOAT64 TotalCertaintySquared;
FLOAT64 Variance;
FLOAT32 Mean, StdDev;
int word_length = word.length();
if (word_length < 3)
return true;
TotalCertainty = TotalCertaintySquared = 0.0;
for (int i = 0; i < word_length; ++i) {
Certainty = word.certainty(i);
TotalCertainty += Certainty;
TotalCertaintySquared += Certainty * Certainty;
if (Certainty < WorstCertainty)
WorstCertainty = Certainty;
}
// Subtract off worst certainty from statistics.
word_length--;
TotalCertainty -= WorstCertainty;
TotalCertaintySquared -= WorstCertainty * WorstCertainty;
Mean = TotalCertainty / word_length;
Variance = ((word_length * TotalCertaintySquared -
TotalCertainty * TotalCertainty) /
(word_length * (word_length - 1)));
if (Variance < 0.0)
Variance = 0.0;
StdDev = sqrt(Variance);
CertaintyThreshold = Mean - stopper_allowable_character_badness * StdDev;
if (CertaintyThreshold > stopper_nondict_certainty_base)
CertaintyThreshold = stopper_nondict_certainty_base;
if (word.certainty() < CertaintyThreshold) {
if (stopper_debug_level >= 1)
tprintf("Stopper: Non-uniform certainty = %4.1f"
" (m=%4.1f, s=%4.1f, t=%4.1f)\n",
word.certainty(), Mean, StdDev, CertaintyThreshold);
return false;
} else {
return true;
}
}
开发者ID:ming-hai,项目名称:tesseract,代码行数:49,代码来源:stopper.cpp
示例6: WERD_CHOICE
// Update hyphen_word_, and copy the given DawgPositionVectors into
// hyphen_active_dawgs_.
void Dict::set_hyphen_word(const WERD_CHOICE &word,
const DawgPositionVector &active_dawgs) {
if (hyphen_word_ == NULL) {
hyphen_word_ = new WERD_CHOICE(word.unicharset());
hyphen_word_->make_bad();
}
if (hyphen_word_->rating() > word.rating()) {
*hyphen_word_ = word;
// Remove the last unichar id as it is a hyphen, and remove
// any unichar_string/lengths that are present.
hyphen_word_->remove_last_unichar_id();
hyphen_active_dawgs_ = active_dawgs;
}
if (hyphen_debug_level) {
hyphen_word_->print("set_hyphen_word: ");
}
}
开发者ID:0ximDigital,项目名称:appsScanner,代码行数:19,代码来源:hyphen.cpp
示例7: case_ok
int Dict::case_ok(const WERD_CHOICE &word, const UNICHARSET &unicharset) {
int state = 0;
int x;
for (x = 0; x < word.length(); ++x) {
UNICHAR_ID ch_id = word.unichar_id(x);
if (unicharset.get_isupper(ch_id))
state = case_state_table[state][1];
else if (unicharset.get_islower(ch_id))
state = case_state_table[state][2];
else if (unicharset.get_isdigit(ch_id))
state = case_state_table[state][3];
else
state = case_state_table[state][0];
if (state == -1) return false;
}
return state != 5; // single lower is bad
}
开发者ID:0ximDigital,项目名称:appsScanner,代码行数:17,代码来源:context.cpp
示例8: LengthOfShortestAlphaRun
int Dict::LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice) const {
int shortest = INT32_MAX;
int curr_len = 0;
for (int w = 0; w < WordChoice.length(); ++w) {
if (getUnicharset().get_isalpha(WordChoice.unichar_id(w))) {
curr_len++;
} else if (curr_len > 0) {
if (curr_len < shortest) shortest = curr_len;
curr_len = 0;
}
}
if (curr_len > 0 && curr_len < shortest) {
shortest = curr_len;
} else if (shortest == INT32_MAX) {
shortest = 0;
}
return shortest;
}
开发者ID:ming-hai,项目名称:tesseract,代码行数:18,代码来源:stopper.cpp
示例9: page_res_it
// Return the maximum length that the output text string might occupy.
int TessBaseAPI::TextLength(PAGE_RES* page_res) {
PAGE_RES_IT page_res_it(page_res);
int total_length = 2;
// Iterate over the data structures to extract the recognition result.
for (page_res_it.restart_page(); page_res_it.word () != NULL;
page_res_it.forward()) {
WERD_RES *word = page_res_it.word();
WERD_CHOICE* choice = word->best_choice;
if (choice != NULL) {
total_length += choice->string().length() + 1;
for (int i = 0; i < word->reject_map.length(); ++i) {
if (word->reject_map[i].rejected())
++total_length;
}
}
}
return total_length;
}
开发者ID:GaryShearer,项目名称:BasicOCR,代码行数:19,代码来源:baseapi.cpp
示例10: AcceptableResult
bool Dict::AcceptableResult(const WERD_CHOICE &BestChoice) {
float CertaintyThreshold = stopper_nondict_certainty_base - reject_offset_;
int WordSize;
if (stopper_debug_level >= 1) {
tprintf("\nRejecter: %s (word=%c, case=%c, unambig=%c)\n",
BestChoice.debug_string(getUnicharset()).string(),
(valid_word(BestChoice) ? 'y' : 'n'),
(case_ok(BestChoice, getUnicharset()) ? 'y' : 'n'),
((list_rest (best_choices_) != NIL_LIST) ? 'n' : 'y'));
}
if (BestChoice.length() == 0 || CurrentWordAmbig())
return false;
if (BestChoice.fragment_mark()) {
if (stopper_debug_level >= 1) {
cprintf("AcceptableResult(): a choice with fragments beats BestChoice\n");
}
return false;
}
if (valid_word(BestChoice) && case_ok(BestChoice, getUnicharset())) {
WordSize = LengthOfShortestAlphaRun(BestChoice);
WordSize -= stopper_smallword_size;
if (WordSize < 0)
WordSize = 0;
CertaintyThreshold += WordSize * stopper_certainty_per_char;
}
if (stopper_debug_level >= 1)
cprintf ("Rejecter: Certainty = %4.1f, Threshold = %4.1f ",
BestChoice.certainty(), CertaintyThreshold);
if (BestChoice.certainty() > CertaintyThreshold &&
!stopper_no_acceptable_choices) {
if (stopper_debug_level >= 1)
cprintf("ACCEPTED\n");
return true;
}
else {
if (stopper_debug_level >= 1)
cprintf("REJECTED\n");
return false;
}
}
开发者ID:0359xiaodong,项目名称:tess-two,代码行数:44,代码来源:stopper.cpp
示例11: prefix_in_dawg
bool Dawg::prefix_in_dawg(const WERD_CHOICE &word,
bool requires_complete) const {
if (word.length() == 0) return !requires_complete;
NODE_REF node = 0;
int end_index = word.length() - 1;
for (int i = 0; i < end_index; i++) {
EDGE_REF edge = edge_char_of(node, word.unichar_id(i), false);
if (edge == NO_EDGE) {
return false;
}
if ((node = next_node(edge)) == 0) {
// This only happens if all words following this edge terminate --
// there are no larger words. See Trie::add_word_to_dawg()
return false;
}
}
// Now check the last character.
return edge_char_of(node, word.unichar_id(end_index), requires_complete) !=
NO_EDGE;
}
开发者ID:Shreeshrii,项目名称:tesseract,代码行数:20,代码来源:dawg.cpp
示例12: res_it
// Returns an array of all word confidences, terminated by -1.
int* TessBaseAPI::AllTextConfidences(PAGE_RES* page_res) {
if (!page_res) return NULL;
int n_word = 0;
PAGE_RES_IT res_it(page_res);
for (res_it.restart_page(); res_it.word () != NULL; res_it.forward())
n_word++;
int* conf = new int[n_word+1];
n_word = 0;
for (res_it.restart_page(); res_it.word () != NULL; res_it.forward()) {
WERD_RES *word = res_it.word();
WERD_CHOICE* choice = word->best_choice;
int w_conf = static_cast<int>(100 + 5 * choice->certainty());
// This is the eq for converting Tesseract confidence to 1..100
if (w_conf < 0) w_conf = 0;
if (w_conf > 100) w_conf = 100;
conf[n_word++] = w_conf;
}
conf[n_word] = -1;
return conf;
}
开发者ID:GaryShearer,项目名称:BasicOCR,代码行数:22,代码来源:baseapi.cpp
示例13: pr_it
/** Creates a fake best_choice entry in each WERD_RES with the correct text.*/
void Tesseract::CorrectClassifyWords(PAGE_RES* page_res) {
PAGE_RES_IT pr_it(page_res);
for (WERD_RES *word_res = pr_it.word(); word_res != NULL;
word_res = pr_it.forward()) {
WERD_CHOICE* choice = new WERD_CHOICE(word_res->uch_set,
word_res->correct_text.size());
for (int i = 0; i < word_res->correct_text.size(); ++i) {
// The part before the first space is the real ground truth, and the
// rest is the bounding box location and page number.
GenericVector<STRING> tokens;
word_res->correct_text[i].split(' ', &tokens);
UNICHAR_ID char_id = unicharset.unichar_to_id(tokens[0].string());
choice->append_unichar_id_space_allocated(char_id,
word_res->best_state[i],
0.0f, 0.0f);
}
word_res->ClearWordChoices();
word_res->LogNewRawChoice(choice);
word_res->LogNewCookedChoice(1, false, choice);
}
}
开发者ID:0xkasun,项目名称:tesseract,代码行数:22,代码来源:applybox.cpp
示例14: EqualIgnoringCaseAndTerminalPunct
bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1,
const WERD_CHOICE &word2) {
const UNICHARSET *uchset = word1.unicharset();
if (word2.unicharset() != uchset) return false;
int w1start, w1end;
word1.punct_stripped(&w1start, &w1end);
int w2start, w2end;
word2.punct_stripped(&w2start, &w2end);
if (w1end - w1start != w2end - w2start) return false;
for (int i = 0; i < w1end - w1start; i++) {
if (uchset->to_lower(word1.unichar_id(w1start + i)) !=
uchset->to_lower(word2.unichar_id(w2start + i))) {
return false;
}
}
return true;
}
开发者ID:11110101,项目名称:tess-two,代码行数:17,代码来源:ratngs.cpp
注:本文中的WERD_CHOICE类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论