本文整理汇总了C++中UNICHARSET类的典型用法代码示例。如果您正苦于以下问题:C++ UNICHARSET类的具体用法?C++ UNICHARSET怎么用?C++ UNICHARSET使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了UNICHARSET类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: StrLen
char_32 *CubeUtils::ToUpper(const char_32 *str32, CharSet *char_set) {
if (!char_set) {
return NULL;
}
UNICHARSET *unicharset = char_set->InternalUnicharset();
int len = StrLen(str32);
char_32 *upper = new char_32[len + 1];
if (!upper)
return NULL;
for (int i = 0; i < len; ++i) {
char_32 ch = str32[i];
if (ch == INVALID_UNICHAR_ID) {
delete[] upper;
return NULL;
}
// convert lower-case characters to upper-case
if (unicharset->get_islower(char_set->ClassID(ch))) {
UNICHAR_ID uid_upper = unicharset->get_other_case(char_set->ClassID(ch));
const char_32 *str32_upper = char_set->ClassString(uid_upper);
// expect upper-case version of character to be a single character
if (!str32_upper || StrLen(str32_upper) != 1) {
delete[] upper;
return NULL;
}
upper[i] = str32_upper[0];
} else {
upper[i] = ch;
}
}
upper[len] = 0;
return upper;
}
开发者ID:mehulsbhatt,项目名称:MyOCRTEST,代码行数:32,代码来源:cube_utils.cpp
示例2: SetPropertiesForInputFile
// Helper to set the properties for an input unicharset file, writes to the
// output file. If an appropriate script unicharset can be found in the
// script_dir directory, then the tops and bottoms are expanded using the
// script unicharset.
// If non-empty, xheight data for the fonts are written to the xheights_file.
void SetPropertiesForInputFile(const std::string& script_dir,
const std::string& input_unicharset_file,
const std::string& output_unicharset_file,
const std::string& output_xheights_file) {
UNICHARSET unicharset;
// Load the input unicharset
unicharset.load_from_file(input_unicharset_file.c_str());
tprintf("Loaded unicharset of size %d from file %s\n", unicharset.size(),
input_unicharset_file.c_str());
// Set unichar properties
tprintf("Setting unichar properties\n");
SetupBasicProperties(true, false, &unicharset);
tprintf("Setting script properties\n");
SetScriptProperties(script_dir, &unicharset);
if (!output_xheights_file.empty()) {
std::string xheights_str = GetXheightString(script_dir, unicharset);
File::WriteStringToFileOrDie(xheights_str, output_xheights_file);
}
// Write the output unicharset
tprintf("Writing unicharset to file %s\n", output_unicharset_file.c_str());
unicharset.save_to_file(output_unicharset_file.c_str());
}
开发者ID:jan-ruzicka,项目名称:tesseract,代码行数:30,代码来源:unicharset_training_utils.cpp
示例3: PartialSetPropertiesFromOther
// Sets all the properties for this unicharset given a src unicharset with
// everything set. The unicharsets don't have to be the same, and graphemes
// are correctly accounted for.
void UNICHARSET::PartialSetPropertiesFromOther(int start_index,
const UNICHARSET& src) {
for (int ch = start_index; ch < size_used; ++ch) {
const char* utf8 = id_to_unichar(ch);
UNICHAR_PROPERTIES properties;
if (src.GetStrProperties(utf8, &properties)) {
// Setup the script_id, other_case, and mirror properly.
const char* script = src.get_script_from_script_id(properties.script_id);
properties.script_id = add_script(script);
const char* other_case = src.id_to_unichar(properties.other_case);
if (contains_unichar(other_case)) {
properties.other_case = unichar_to_id(other_case);
} else {
properties.other_case = ch;
}
const char* mirror_str = src.id_to_unichar(properties.mirror);
if (contains_unichar(mirror_str)) {
properties.mirror = unichar_to_id(mirror_str);
} else {
properties.mirror = ch;
}
unichars[ch].properties.CopyFrom(properties);
set_normed_ids(ch);
} else {
tprintf("Failed to get properties for index %d = %s\n", ch, utf8);
}
}
}
开发者ID:0ximDigital,项目名称:appsScanner,代码行数:31,代码来源:unicharset.cpp
示例4: main
int main(int argc, char** argv) {
int option;
const char* output_directory = ".";
STRING unicharset_file_name;
// Special characters are now included by default.
UNICHARSET unicharset;
setlocale(LC_ALL, "");
// Print usage
if (argc <= 1) {
printf("Usage: %s [-D DIRECTORY] FILE...\n", argv[0]);
exit(1);
}
// Parse arguments
while ((option = tessopt(argc, argv, "D" )) != EOF) {
switch (option) {
case 'D':
output_directory = tessoptarg;
++tessoptind;
break;
}
}
// Save file name
unicharset_file_name = output_directory;
unicharset_file_name += "/";
unicharset_file_name += kUnicharsetFileName;
// Load box files
for (; tessoptind < argc; ++tessoptind) {
printf("Extracting unicharset from %s\n", argv[tessoptind]);
FILE* box_file = fopen(argv[tessoptind], "rb");
if (box_file == NULL) {
printf("Cannot open box file %s\n", argv[tessoptind]);
return -1;
}
TBOX box;
STRING unichar_string;
int line_number = 0;
while (ReadNextBox(&line_number, box_file, &unichar_string, &box)) {
unicharset.unichar_insert(unichar_string.string());
set_properties(&unicharset, unichar_string.string());
}
}
// Write unicharset file
if (unicharset.save_to_file(unicharset_file_name.string())) {
printf("Wrote unicharset file %s.\n", unicharset_file_name.string());
}
else {
printf("Cannot save unicharset file %s.\n", unicharset_file_name.string());
return -1;
}
return 0;
}
开发者ID:ArnoldWu,项目名称:tess-two,代码行数:60,代码来源:unicharset_extractor.cpp
示例5: print_ratings_info
/**
* print_ratings_info
*
* Send all the ratings out to the logfile.
*
* @param fp file to use
* @param ratings list of results
* @param current_unicharset unicharset that can be used
* for id-to-unichar conversion
*/
void print_ratings_info(FILE *fp,
BLOB_CHOICE_LIST *ratings,
const UNICHARSET ¤t_unicharset) {
inT32 index; // to list
inT32 best_index; // to list
FLOAT32 best_rat; // rating
FLOAT32 best_cert; // certainty
const char* first_char = NULL; // character
FLOAT32 first_rat; // rating
FLOAT32 first_cert; // certainty
const char* sec_char = NULL; // character
FLOAT32 sec_rat = 0.0f; // rating
FLOAT32 sec_cert = 0.0f; // certainty
BLOB_CHOICE_IT c_it = ratings; // iterator
index = ratings->length();
if (index > 0) {
first_char = current_unicharset.id_to_unichar(c_it.data()->unichar_id());
first_rat = c_it.data()->rating();
first_cert = -c_it.data()->certainty();
if (index > 1) {
sec_char = current_unicharset.id_to_unichar(
c_it.data_relative(1)->unichar_id());
sec_rat = c_it.data_relative(1)->rating();
sec_cert = -c_it.data_relative(1)->certainty();
} else {
sec_char = NULL;
sec_rat = -1;
sec_cert = -1;
}
} else {
first_char = NULL;
first_rat = -1;
first_cert = -1;
}
best_index = -1;
best_rat = -1;
best_cert = -1;
for (index = 0, c_it.mark_cycle_pt(); !c_it.cycled_list();
c_it.forward(), index++) {
if (strcmp(current_unicharset.id_to_unichar(c_it.data()->unichar_id()),
blob_answer) == 0) {
best_index = index;
best_rat = c_it.data()->rating();
best_cert = -c_it.data()->certainty();
}
}
if (first_char != NULL && (*first_char == '\0' || *first_char == ' '))
first_char = NULL;
if (sec_char != NULL && (*sec_char == '\0' || *sec_char == ' '))
sec_char = NULL;
fprintf(matcher_fp,
" " INT32FORMAT " " INT32FORMAT " %g %g %s %g %g %s %g %g\n",
ratings->length(), best_index, best_rat, best_cert,
first_char != NULL ? first_char : "~",
first_rat, first_cert, sec_char != NULL ? sec_char : "~",
sec_rat, sec_cert);
}
开发者ID:AngusHardie,项目名称:TesseractOCR-For-Mac,代码行数:68,代码来源:ratngs.cpp
示例6:
// Constructor is private. Only anticipated use of ErrorCounter is via
// the static ComputeErrorRate.
ErrorCounter::ErrorCounter(const UNICHARSET& unicharset, int fontsize)
: scaled_error_(0.0), rating_epsilon_(kRatingEpsilon),
unichar_counts_(unicharset.size(), unicharset.size(), 0),
ok_score_hist_(0, 101), bad_score_hist_(0, 101),
unicharset_(unicharset) {
Counts empty_counts;
font_counts_.init_to_size(fontsize, empty_counts);
multi_unichar_counts_.init_to_size(unicharset.size(), 0);
}
开发者ID:xmarston,项目名称:BillRecognizer,代码行数:11,代码来源:errorcounter.cpp
示例7: AddAllScriptsConverted
// Helper adds all the scripts from sid_set converted to ids from osd_set to
// allowed_ids.
static void AddAllScriptsConverted(const UNICHARSET& sid_set,
const UNICHARSET& osd_set,
GenericVector<int>* allowed_ids) {
for (int i = 0; i < sid_set.get_script_table_size(); ++i) {
if (i != sid_set.null_sid()) {
const char* script = sid_set.get_script_from_script_id(i);
allowed_ids->push_back(osd_set.get_script_id_from_name(script));
}
}
}
开发者ID:Kailigithub,项目名称:tesseract,代码行数:12,代码来源:pagesegmain.cpp
示例8: absolute_garbage
bool Dict::absolute_garbage(const WERD_CHOICE &word,
const UNICHARSET &unicharset) {
if (word.length() < kMinAbsoluteGarbageWordLength) return false;
int num_alphanum = 0;
for (int x = 0; x < word.length(); ++x) {
num_alphanum += (unicharset.get_isalpha(word.unichar_id(x)) ||
unicharset.get_isdigit(word.unichar_id(x)));
}
return (static_cast<float>(num_alphanum) /
static_cast<float>(word.length()) < kMinAbsoluteGarbageAlphanumFrac);
}
开发者ID:0ximDigital,项目名称:appsScanner,代码行数:11,代码来源:context.cpp
示例9: GetXheightString
// Helper gets the combined x-heights string.
std::string GetXheightString(const std::string& script_dir,
const UNICHARSET& unicharset) {
std::string xheights_str;
for (int s = 0; s < unicharset.get_script_table_size(); ++s) {
// Load the xheights for the script if available.
std::string filename = script_dir + "/" +
unicharset.get_script_from_script_id(s) + ".xheights";
std::string script_heights;
if (File::ReadFileToString(filename, &script_heights))
xheights_str += script_heights;
}
return xheights_str;
}
开发者ID:jan-ruzicka,项目名称:tesseract,代码行数:14,代码来源:unicharset_training_utils.cpp
示例10: print
// Print the best guesses out of the match rating matrix.
void MATRIX::print(const UNICHARSET &unicharset) const {
tprintf("Ratings Matrix (top 3 choices)\n");
int dim = dimension();
int band_width = bandwidth();
int row, col;
for (col = 0; col < dim; ++col) {
for (row = col; row < dim && row < col + band_width; ++row) {
BLOB_CHOICE_LIST *rating = this->get(col, row);
if (rating == NOT_CLASSIFIED) continue;
BLOB_CHOICE_IT b_it(rating);
tprintf("col=%d row=%d ", col, row);
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
tprintf("%s rat=%g cert=%g " ,
unicharset.id_to_unichar(b_it.data()->unichar_id()),
b_it.data()->rating(), b_it.data()->certainty());
}
tprintf("\n");
}
tprintf("\n");
}
tprintf("\n");
for (col = 0; col < dim; ++col) tprintf("\t%d", col);
tprintf("\n");
for (row = 0; row < dim; ++row) {
for (col = 0; col <= row; ++col) {
if (col == 0) tprintf("%d\t", row);
if (row >= col + band_width) {
tprintf(" \t");
continue;
}
BLOB_CHOICE_LIST *rating = this->get(col, row);
if (rating != NOT_CLASSIFIED) {
BLOB_CHOICE_IT b_it(rating);
int counter = 0;
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
tprintf("%s ",
unicharset.id_to_unichar(b_it.data()->unichar_id()));
++counter;
if (counter == 3) break;
}
tprintf("\t");
} else {
tprintf(" \t");
}
}
tprintf("\n");
}
}
开发者ID:Kailigithub,项目名称:tesseract,代码行数:49,代码来源:matrix.cpp
示例11: strlen
/**
* WERD_CHOICE::WERD_CHOICE
*
* Constructor to build a WERD_CHOICE from the given string.
* The function assumes that src_string is not NULL.
*/
WERD_CHOICE::WERD_CHOICE(const char *src_string,
const UNICHARSET &unicharset) {
STRING src_lengths;
int len = strlen(src_string);
const char *ptr = src_string;
int step = unicharset.step(ptr);
for (; ptr < src_string + len && step > 0;
step = unicharset.step(ptr), src_lengths += step, ptr += step);
if (step != 0 && ptr == src_string + len) {
this->init(src_string, src_lengths.string(),
0.0, 0.0, NO_PERM, unicharset);
} else { // there must have been an invalid unichar in the string
this->init(8);
this->make_bad();
}
}
开发者ID:AngusHardie,项目名称:TesseractOCR-For-Mac,代码行数:22,代码来源:ratngs.cpp
示例12: init
/**
* WERD_CHOICE::init
*
* Helper function to build a WERD_CHOICE from the given string,
* fragment lengths, rating, certainty and permuter.
*
* The function assumes that src_string is not NULL.
* src_lengths argument could be NULL, in which case the unichars
* in src_string are assumed to all be of length 1.
*/
void WERD_CHOICE::init(const char *src_string,
const char *src_lengths,
float src_rating,
float src_certainty,
uinT8 src_permuter,
const UNICHARSET &unicharset) {
int src_string_len = strlen(src_string);
if (src_string_len == 0) {
this->init(8);
} else {
this->init(src_lengths ? strlen(src_lengths): src_string_len);
length_ = reserved_;
int offset = 0;
for (int i = 0; i < length_; ++i) {
int unichar_length = src_lengths ? src_lengths[i] : 1;
unichar_ids_[i] =
unicharset.unichar_to_id(src_string+offset, unichar_length);
fragment_lengths_[i] = 1;
offset += unichar_length;
}
}
rating_ = src_rating;
certainty_ = src_certainty;
permuter_ = src_permuter;
}
开发者ID:AngusHardie,项目名称:TesseractOCR-For-Mac,代码行数:35,代码来源:ratngs.cpp
示例13: AppendOtherUnicharset
// For each id in src, if it does not occur in this, add it, as in
// SetPropertiesFromOther, otherwise expand the ranges, as in
// ExpandRangesFromOther.
void UNICHARSET::AppendOtherUnicharset(const UNICHARSET& src) {
int initial_used = size_used;
for (int ch = 0; ch < src.size_used; ++ch) {
const UNICHAR_PROPERTIES& src_props = src.unichars[ch].properties;
const char* utf8 = src.id_to_unichar(ch);
if (strcmp(utf8, " ") != 0 && src_props.AnyRangeEmpty()) {
// Only use fully valid entries.
tprintf("Bad properties for index %d, char %s: "
"%d,%d %d,%d %d,%d %d,%d %d,%d\n",
ch, utf8, src_props.min_bottom, src_props.max_bottom,
src_props.min_top, src_props.max_top,
src_props.min_width, src_props.max_width,
src_props.min_bearing, src_props.max_bearing,
src_props.min_advance, src_props.max_advance);
continue;
}
int id = size_used;
if (contains_unichar(utf8)) {
id = unichar_to_id(utf8);
// Just expand current ranges.
unichars[id].properties.ExpandRangesFrom(src_props);
} else {
unichar_insert(utf8);
unichars[id].properties.SetRangesEmpty();
}
}
// Set properties, including mirror and other_case, WITHOUT reordering
// the unicharset.
PartialSetPropertiesFromOther(initial_used, src);
}
开发者ID:0ximDigital,项目名称:appsScanner,代码行数:33,代码来源:unicharset.cpp
示例14: print
// Print the best guesses out of the match rating matrix.
void MATRIX::print(const UNICHARSET &unicharset) {
tprintf("Ratings Matrix (top choices)\n");
int row, col;
for (col = 0; col < this->dimension(); ++col) tprintf("\t%d", col);
tprintf("\n");
for (row = 0; row < this->dimension(); ++row) {
for (col = 0; col <= row; ++col) {
if (col == 0) tprintf("%d\t", row);
BLOB_CHOICE_LIST *rating = this->get(col, row);
if (rating != NOT_CLASSIFIED) {
BLOB_CHOICE_IT b_it(rating);
int counter = 0;
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
tprintf("%s ", unicharset.id_to_unichar(b_it.data()->unichar_id()));
++counter;
if (counter == 3) break;
}
tprintf("\t");
} else {
tprintf(" \t");
}
}
tprintf("\n");
}
}
开发者ID:0359xiaodong,项目名称:tess-two,代码行数:26,代码来源:matrix.cpp
示例15: print_ratings_list
/**********************************************************************
* print_ratings_list
*
* Send all the ratings out to the logfile.
**********************************************************************/
void print_ratings_list(
const char *msg, // intro message
BLOB_CHOICE_LIST *ratings, // list of results
const UNICHARSET ¤t_unicharset // unicharset that can be used
// for id-to-unichar conversion
) {
if (ratings->length() == 0) {
tprintf("%s:<none>\n", msg);
return;
}
if (*msg != '\0') {
tprintf("%s\n", msg);
}
BLOB_CHOICE_IT c_it;
c_it.set_to_list(ratings);
for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) {
tprintf("r%.2f c%.2f : %d %s",
c_it.data()->rating(), c_it.data()->certainty(),
c_it.data()->unichar_id(),
current_unicharset.debug_str(c_it.data()->unichar_id()).string());
if (!c_it.at_last()) {
tprintf("\n");
}
}
tprintf("\n");
fflush(stdout);
}
开发者ID:mk219533,项目名称:tesseract-ocr,代码行数:32,代码来源:ratngs.cpp
示例16: wc_to_unichar_id
UNICHAR_ID wc_to_unichar_id(const UNICHARSET &unicharset, int wc) {
UNICHAR uch(wc);
char *unichar = uch.utf8_str();
UNICHAR_ID unichar_id = unicharset.unichar_to_id(unichar);
delete[] unichar;
return unichar_id;
}
开发者ID:ArnoldWu,项目名称:tess-two,代码行数:7,代码来源:unicharset_extractor.cpp
示例17: check_for_words
int Dawg::check_for_words(const char *filename,
const UNICHARSET &unicharset,
bool enable_wildcard) const {
if (filename == nullptr) return 0;
FILE *word_file;
char string [CHARS_PER_LINE];
int misses = 0;
UNICHAR_ID wildcard = unicharset.unichar_to_id(kWildcard);
word_file = fopen(filename, "r");
if (word_file == nullptr) {
tprintf("Error: Could not open file %s\n", filename);
ASSERT_HOST(word_file);
}
while (fgets (string, CHARS_PER_LINE, word_file) != nullptr) {
chomp_string(string); // remove newline
WERD_CHOICE word(string, unicharset);
if (word.length() > 0 &&
!word.contains_unichar_id(INVALID_UNICHAR_ID)) {
if (!match_words(&word, 0, 0,
enable_wildcard ? wildcard : INVALID_UNICHAR_ID)) {
tprintf("Missing word: %s\n", string);
++misses;
}
} else {
tprintf("Failed to create a valid word from %s\n", string);
}
}
fclose (word_file);
// Make sure the user sees this with fprintf instead of tprintf.
if (debug_level_) tprintf("Number of lost words=%d\n", misses);
return misses;
}
开发者ID:Shreeshrii,项目名称:tesseract,代码行数:35,代码来源:dawg.cpp
示例18: case_ok
int Dict::case_ok(const WERD_CHOICE &word, const UNICHARSET &unicharset) {
int state = 0;
int x;
for (x = 0; x < word.length(); ++x) {
UNICHAR_ID ch_id = word.unichar_id(x);
if (unicharset.get_isupper(ch_id))
state = case_state_table[state][1];
else if (unicharset.get_islower(ch_id))
state = case_state_table[state][2];
else if (unicharset.get_isdigit(ch_id))
state = case_state_table[state][3];
else
state = case_state_table[state][0];
if (state == -1) return false;
}
return state != 5; // single lower is bad
}
开发者ID:0ximDigital,项目名称:appsScanner,代码行数:17,代码来源:context.cpp
示例19: ExpandRangesFromOther
// Expands the tops and bottoms and widths for this unicharset given a
// src unicharset with ranges in it. The unicharsets don't have to be the
// same, and graphemes are correctly accounted for.
void UNICHARSET::ExpandRangesFromOther(const UNICHARSET& src) {
for (int ch = 0; ch < size_used; ++ch) {
const char* utf8 = id_to_unichar(ch);
UNICHAR_PROPERTIES properties;
if (src.GetStrProperties(utf8, &properties)) {
// Expand just the ranges from properties.
unichars[ch].properties.ExpandRangesFrom(properties);
}
}
}
开发者ID:0ximDigital,项目名称:appsScanner,代码行数:13,代码来源:unicharset.cpp
示例20: print_ratings_info
/**
* print_ratings_info
*
* Send all the ratings out to the logfile.
*
* @param fp file to use
* @param ratings list of results
* @param current_unicharset unicharset that can be used
* for id-to-unichar conversion
*/
void print_ratings_info(FILE *fp,
BLOB_CHOICE_LIST *ratings,
const UNICHARSET ¤t_unicharset) {
inT32 index; // to list
const char* first_char = NULL; // character
FLOAT32 first_rat; // rating
FLOAT32 first_cert; // certainty
const char* sec_char = NULL; // character
FLOAT32 sec_rat = 0.0f; // rating
FLOAT32 sec_cert = 0.0f; // certainty
BLOB_CHOICE_IT c_it = ratings; // iterator
index = ratings->length();
if (index > 0) {
first_char = current_unicharset.id_to_unichar(c_it.data()->unichar_id());
first_rat = c_it.data()->rating();
first_cert = -c_it.data()->certainty();
if (index > 1) {
sec_char = current_unicharset.id_to_unichar(
c_it.data_relative(1)->unichar_id());
sec_rat = c_it.data_relative(1)->rating();
sec_cert = -c_it.data_relative(1)->certainty();
} else {
sec_char = NULL;
sec_rat = -1;
sec_cert = -1;
}
} else {
first_char = NULL;
first_rat = -1;
first_cert = -1;
}
if (first_char != NULL && (*first_char == '\0' || *first_char == ' '))
first_char = NULL;
if (sec_char != NULL && (*sec_char == '\0' || *sec_char == ' '))
sec_char = NULL;
tprintf(" " INT32FORMAT " %s %g %g %s %g %g\n",
ratings->length(),
first_char != NULL ? first_char : "~",
first_rat, first_cert, sec_char != NULL ? sec_char : "~",
sec_rat, sec_cert);
}
开发者ID:coffeesam,项目名称:tesseract-ocr,代码行数:52,代码来源:ratngs.cpp
注:本文中的UNICHARSET类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论