21 #ifndef TESSERACT_WORDREC_LANGUAGE_MODEL_H_
22 #define TESSERACT_WORDREC_LANGUAGE_MODEL_H_
165 static int Compare(
const void *e1,
const void *e2) {
170 return (ve1->
cost < ve2->
cost) ? -1 : 1;
301 bool fixed_pitch,
float best_choice_cert,
302 float max_char_wh_ratio,
float rating_cert_scale,
330 int curr_col,
int curr_row,
331 BLOB_CHOICE_LIST *curr_list,
332 BLOB_CHOICE_LIST *parent_list,
359 int col,
int row,
float best_choice_cert,
371 float best_choice_cert,
396 float priority_adjustment,
397 float worst_piece_cert,
399 float best_choice_cert,
400 float max_char_wh_ratio,
414 float *cert,
bool *fragmented) {
446 return 1.0f / (1.0f + exp(10.0
f * cert));
448 return (-1.0
f / cert);
456 col > 0 && row+1 < dimension);
473 float *cert,
bool *fragmented) {
475 BLOB_CHOICE_IT bit(blist);
476 while (!bit.at_last() &&
IsFragment(bit.data())) {
482 if (bit.data()->certainty() < *cert) *cert = bit.data()->certainty();
486 if (num_problems == 0)
return 0.0f;
487 if (num_problems == 1)
return penalty;
489 static_cast<float>(num_problems-1)));
499 if (dawg_info !=
NULL) {
523 dawg_info, consistency_info)));
530 float ratings_sum,
int length,
float dawg_score,
547 BLOB_CHOICE_LIST *curr_list,
561 int curr_col,
int curr_row,
597 int curr_col,
int curr_row,
610 float certainty,
float denom,
611 int curr_col,
int curr_row,
624 const char *context,
int *unichar_step_len,
625 bool *found_small_prob,
float *ngram_prob);
693 int word_index,
int word_length,
694 int *skip,
int *covered,
696 bool *dawg_score_done);
700 float max_char_wh_ratio,
707 (parent_vse !=
NULL) ? parent_vse->
length : 0,
721 if (top_choice_flags)
return false;
722 if (dawg_info !=
NULL &&
723 (dawg_info->
permuter == SYSTEM_DAWG_PERM ||
724 dawg_info->
permuter == USER_DAWG_PERM ||
725 dawg_info->
permuter == FREQ_DAWG_PERM) &&
740 "Turn on/off the use of character ngram model");
742 "Maximum order of the character ngram model");
744 "Maximum number of prunable (those for which PrunablePath() is true)"
745 "entries in each viterbi list recorded in BLOB_CHOICEs");
747 "Maximum size of viterbi lists recorded in BLOB_CHOICEs");
749 "To avoid overly small denominators use this as the floor"
750 " of the probability returned by the ngram model");
752 "Average classifier score of a non-matching unichar");
754 "Use only the first UTF8 step of the given string"
755 " when computing log probabilities");
757 "Strength of the character ngram model relative to the"
758 " character classifier ");
760 "Words are delimited by space");
763 "Minimum length of compound words");
765 "Depth of blob choice lists to explore"
766 " when fixed length dawgs are on");
769 "Penalty for words not in the frequent word dictionary");
771 "Penalty for non-dictionary words");
773 "Penalty for inconsistent punctuation");
775 "Penalty for inconsistent case");
777 "Penalty for inconsistent script");
779 "Penalty for inconsistent character type");
781 "Penalty for inconsistent font");
783 "Penalty for inconsistent spacing");
786 "Use sigmoidal score for certainty");
852 #endif // TESSERACT_WORDREC_LANGUAGE_MODEL_H_