Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
textord.cpp
Go to the documentation of this file.
1 
2 // File: textord.cpp
3 // Description: The top-level text line and word finding functionality.
4 // Author: Ray Smith
5 // Created: Fri Mar 13 14:43:01 PDT 2009
6 //
7 // (C) Copyright 2009, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 // Include automatically generated configuration file if running autoconf.
21 #ifdef HAVE_CONFIG_H
22 #include "config_auto.h"
23 #endif
24 
25 #include "drawtord.h"
26 #include "textord.h"
27 #include "makerow.h"
28 #include "pageres.h"
29 #include "tordmain.h"
30 #include "wordseg.h"
31 
32 namespace tesseract {
33 
35  : ccstruct_(ccstruct), use_cjk_fp_model_(false),
36  // makerow.cpp ///////////////////////////////////////////
37  BOOL_MEMBER(textord_single_height_mode, false,
38  "Script has no xheight, so use a single mode",
39  ccstruct_->params()),
40  // tospace.cpp ///////////////////////////////////////////
41  BOOL_MEMBER(tosp_old_to_method, false, "Space stats use prechopping?",
42  ccstruct_->params()),
43  BOOL_MEMBER(tosp_old_to_constrain_sp_kn, false,
44  "Constrain relative values of inter and intra-word gaps for "
45  "old_to_method.",
46  ccstruct_->params()),
47  BOOL_MEMBER(tosp_only_use_prop_rows, true,
48  "Block stats to use fixed pitch rows?",
49  ccstruct_->params()),
50  BOOL_MEMBER(tosp_force_wordbreak_on_punct, false,
51  "Force word breaks on punct to break long lines in non-space "
52  "delimited langs",
53  ccstruct_->params()),
54  BOOL_MEMBER(tosp_use_pre_chopping, false,
55  "Space stats use prechopping?",
56  ccstruct_->params()),
57  BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code",
58  ccstruct_->params()),
59  BOOL_MEMBER(tosp_block_use_cert_spaces, true,
60  "Only stat OBVIOUS spaces",
61  ccstruct_->params()),
62  BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces",
63  ccstruct_->params()),
64  BOOL_MEMBER(tosp_narrow_blobs_not_cert, true,
65  "Only stat OBVIOUS spaces",
66  ccstruct_->params()),
67  BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces",
68  ccstruct_->params()),
69  BOOL_MEMBER(tosp_recovery_isolated_row_stats, true,
70  "Use row alone when inadequate cert spaces",
71  ccstruct_->params()),
72  BOOL_MEMBER(tosp_only_small_gaps_for_kern, false, "Better guess",
73  ccstruct_->params()),
74  BOOL_MEMBER(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?",
75  ccstruct_->params()),
76  BOOL_MEMBER(tosp_fuzzy_limit_all, true,
77  "Dont restrict kn->sp fuzzy limit to tables",
78  ccstruct_->params()),
79  BOOL_MEMBER(tosp_stats_use_xht_gaps, true,
80  "Use within xht gap for wd breaks",
81  ccstruct_->params()),
82  BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks",
83  ccstruct_->params()),
84  BOOL_MEMBER(tosp_only_use_xht_gaps, false,
85  "Only use within xht gap for wd breaks",
86  ccstruct_->params()),
87  BOOL_MEMBER(tosp_rule_9_test_punct, false,
88  "Dont chng kn to space next to punct",
89  ccstruct_->params()),
90  BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip",
91  ccstruct_->params()),
92  BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip",
93  ccstruct_->params()),
94  BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic",
95  ccstruct_->params()),
96  INT_MEMBER(tosp_debug_level, 0, "Debug data",
97  ccstruct_->params()),
98  INT_MEMBER(tosp_enough_space_samples_for_median, 3,
99  "or should we use mean",
100  ccstruct_->params()),
101  INT_MEMBER(tosp_redo_kern_limit, 10,
102  "No.samples reqd to reestimate for row",
103  ccstruct_->params()),
104  INT_MEMBER(tosp_few_samples, 40,
105  "No.gaps reqd with 1 large gap to treat as a table",
106  ccstruct_->params()),
107  INT_MEMBER(tosp_short_row, 20,
108  "No.gaps reqd with few cert spaces to use certs",
109  ccstruct_->params()),
110  INT_MEMBER(tosp_sanity_method, 1, "How to avoid being silly",
111  ccstruct_->params()),
112  double_MEMBER(tosp_old_sp_kn_th_factor, 2.0,
113  "Factor for defining space threshold in terms of space and "
114  "kern sizes",
115  ccstruct_->params()),
116  double_MEMBER(tosp_threshold_bias1, 0,
117  "how far between kern and space?",
118  ccstruct_->params()),
119  double_MEMBER(tosp_threshold_bias2, 0,
120  "how far between kern and space?",
121  ccstruct_->params()),
122  double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow",
123  ccstruct_->params()),
124  double_MEMBER(tosp_narrow_aspect_ratio, 0.48,
125  "narrow if w/h less than this",
126  ccstruct_->params()),
127  double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide",
128  ccstruct_->params()),
129  double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this",
130  ccstruct_->params()),
131  double_MEMBER(tosp_fuzzy_space_factor, 0.6,
132  "Fract of xheight for fuzz sp",
133  ccstruct_->params()),
134  double_MEMBER(tosp_fuzzy_space_factor1, 0.5,
135  "Fract of xheight for fuzz sp",
136  ccstruct_->params()),
137  double_MEMBER(tosp_fuzzy_space_factor2, 0.72,
138  "Fract of xheight for fuzz sp",
139  ccstruct_->params()),
140  double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern",
141  ccstruct_->params()),
142  double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp",
143  ccstruct_->params()),
144  double_MEMBER(tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp",
145  ccstruct_->params()),
146  double_MEMBER(tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp",
147  ccstruct_->params()),
148  double_MEMBER(tosp_ignore_big_gaps, -1, "xht multiplier",
149  ccstruct_->params()),
150  double_MEMBER(tosp_ignore_very_big_gaps, 3.5, "xht multiplier",
151  ccstruct_->params()),
152  double_MEMBER(tosp_rep_space, 1.6, "rep gap multiplier for space",
153  ccstruct_->params()),
154  double_MEMBER(tosp_enough_small_gaps, 0.65,
155  "Fract of kerns reqd for isolated row stats",
156  ccstruct_->params()),
157  double_MEMBER(tosp_table_kn_sp_ratio, 2.25,
158  "Min difference of kn & sp in table",
159  ccstruct_->params()),
160  double_MEMBER(tosp_table_xht_sp_ratio, 0.33,
161  "Expect spaces bigger than this",
162  ccstruct_->params()),
163  double_MEMBER(tosp_table_fuzzy_kn_sp_ratio, 3.0,
164  "Fuzzy if less than this",
165  ccstruct_->params()),
166  double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg",
167  ccstruct_->params()),
168  double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg",
169  ccstruct_->params()),
170  double_MEMBER(tosp_min_sane_kn_sp, 1.5,
171  "Dont trust spaces less than this time kn",
172  ccstruct_->params()),
173  double_MEMBER(tosp_init_guess_kn_mult, 2.2,
174  "Thresh guess - mult kn by this",
175  ccstruct_->params()),
176  double_MEMBER(tosp_init_guess_xht_mult, 0.28,
177  "Thresh guess - mult xht by this",
178  ccstruct_->params()),
179  double_MEMBER(tosp_max_sane_kn_thresh, 5.0,
180  "Multiplier on kn to limit thresh",
181  ccstruct_->params()),
182  double_MEMBER(tosp_flip_caution, 0.0,
183  "Dont autoflip kn to sp when large separation",
184  ccstruct_->params()),
185  double_MEMBER(tosp_large_kerning, 0.19,
186  "Limit use of xht gap with large kns",
187  ccstruct_->params()),
188  double_MEMBER(tosp_dont_fool_with_small_kerns, -1,
189  "Limit use of xht gap with odd small kns",
190  ccstruct_->params()),
191  double_MEMBER(tosp_near_lh_edge, 0,
192  "Dont reduce box if the top left is non blank",
193  ccstruct_->params()),
194  double_MEMBER(tosp_silly_kn_sp_gap, 0.2,
195  "Dont let sp minus kn get too small",
196  ccstruct_->params()),
197  double_MEMBER(tosp_pass_wide_fuzz_sp_to_context, 0.75,
198  "How wide fuzzies need context",
199  ccstruct_->params()),
200  // tordmain.cpp ///////////////////////////////////////////
201  BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs",
202  ccstruct_->params()),
203  BOOL_MEMBER(textord_show_blobs, false, "Display unsorted blobs",
204  ccstruct_->params()),
205  BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs",
206  ccstruct_->params()),
207  INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise",
208  ccstruct_->params()),
209  double_MEMBER(textord_blob_size_bigile, 95, "Percentile for large blobs",
210  ccstruct_->params()),
211  double_MEMBER(textord_noise_area_ratio, 0.7,
212  "Fraction of bounding box for noise",
213  ccstruct_->params()),
214  double_MEMBER(textord_blob_size_smallile, 20,
215  "Percentile for small blobs",
216  ccstruct_->params()),
217  double_MEMBER(textord_initialx_ile, 0.75,
218  "Ile of sizes for xheight guess",
219  ccstruct_->params()),
220  double_MEMBER(textord_initialasc_ile, 0.90,
221  "Ile of sizes for xheight guess",
222  ccstruct_->params()),
223  INT_MEMBER(textord_noise_sizefraction, 10,
224  "Fraction of size for maxima",
225  ccstruct_->params()),
226  double_MEMBER(textord_noise_sizelimit, 0.5,
227  "Fraction of x for big t count",
228  ccstruct_->params()),
229  INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob",
230  ccstruct_->params()),
231  double_MEMBER(textord_noise_normratio, 2.0,
232  "Dot to norm ratio for deletion",
233  ccstruct_->params()),
234  BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words",
235  ccstruct_->params()),
236  BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows",
237  ccstruct_->params()),
238  double_MEMBER(textord_noise_syfract, 0.2,
239  "xh fract height error for norm blobs",
240  ccstruct_->params()),
241  double_MEMBER(textord_noise_sxfract, 0.4,
242  "xh fract width error for norm blobs",
243  ccstruct_->params()),
244  double_MEMBER(textord_noise_hfract, 1.0/64,
245  "Height fraction to discard outlines as speckle noise",
246  ccstruct_->params()),
247  INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row",
248  ccstruct_->params()),
249  double_MEMBER(textord_noise_rowratio, 6.0,
250  "Dot to norm ratio for deletion",
251  ccstruct_->params()),
252  BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector",
253  ccstruct_->params()),
254  double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift",
255  ccstruct_->params()),
256  double_MEMBER(textord_blshift_xfraction, 9.99,
257  "Min size of baseline shift",
258  ccstruct_->params()) {
259 }
260 
262 }
263 
264 // Make the textlines and words inside each block.
266  int width, int height, Pix* pix,
267  BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) {
268  page_tr_.set_x(width);
269  page_tr_.set_y(height);
270  if (to_blocks->empty()) {
271  // AutoPageSeg was not used, so we need to find_components first.
272  find_components(pix, blocks, to_blocks);
273  } else {
274  // AutoPageSeg does not need to find_components as it did that already.
275  // Filter_blobs sets up the TO_BLOCKs the same as find_components does.
276  filter_blobs(page_tr_, to_blocks, true);
277  }
278 
279  ASSERT_HOST(!to_blocks->empty());
280  if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) {
281  const FCOORD anticlockwise90(0.0f, 1.0f);
282  const FCOORD clockwise90(0.0f, -1.0f);
283  TO_BLOCK_IT it(to_blocks);
284  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
285  TO_BLOCK* to_block = it.data();
286  BLOCK* block = to_block->block;
287  // Create a fake poly_block in block from its bounding box.
288  block->set_poly_block(new POLY_BLOCK(block->bounding_box(),
290  // Rotate the to_block along with its contained block and blobnbox lists.
291  to_block->rotate(anticlockwise90);
292  // Set the block's rotation values to obey the convention followed in
293  // layout analysis for vertical text.
294  block->set_re_rotation(clockwise90);
295  block->set_classify_rotation(clockwise90);
296  }
297  }
298 
299  TO_BLOCK_IT to_block_it(to_blocks);
300  TO_BLOCK* to_block = to_block_it.data();
301  // Make the rows in the block.
302  float gradient;
303  // Do it the old fashioned way.
304  if (PSM_LINE_FIND_ENABLED(pageseg_mode)) {
305  gradient = make_rows(page_tr_, to_blocks);
306  } else {
307  // SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
308  gradient = make_single_row(page_tr_, to_block, to_blocks);
309  }
310  // Now fit baselines. For now only old mode is available.
311  fit_rows(gradient, page_tr_, to_blocks);
312  // Now make the words in the lines.
313  if (PSM_WORD_FIND_ENABLED(pageseg_mode)) {
314  // SINGLE_LINE uses the old word maker on the single line.
315  make_words(this, page_tr_, gradient, blocks, to_blocks);
316  } else {
317  // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
318  // single word, and in SINGLE_CHAR mode, all the outlines
319  // go in a single blob.
320  TO_BLOCK* to_block = to_block_it.data();
321  make_single_word(pageseg_mode == PSM_SINGLE_CHAR,
322  to_block->get_rows(), to_block->block->row_list());
323  }
324  cleanup_blocks(blocks); // Remove empties.
325 
326  // Compute the margins for each row in the block, to be used later for
327  // paragraph detection.
328  BLOCK_IT b_it(blocks);
329  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
330  b_it.data()->compute_row_margins();
331  }
332 #ifndef GRAPHICS_DISABLED
333  close_to_win();
334 #endif
335 }
336 
337 // If we were supposed to return only a single textline, and there is more
338 // than one, clean up and leave only the best.
340  PAGE_RES* page_res) {
341  if (PSM_LINE_FIND_ENABLED(pageseg_mode))
342  return; // No cleanup required.
343  PAGE_RES_IT it(page_res);
344  // Find the best row, being the greatest mean word conf.
345  float row_total_conf = 0.0f;
346  int row_word_count = 0;
347  ROW_RES* best_row = NULL;
348  float best_conf = 0.0f;
349  for (it.restart_page(); it.word() != NULL; it.forward()) {
350  WERD_RES* word = it.word();
351  row_total_conf += word->best_choice->certainty();
352  ++row_word_count;
353  if (it.next_row() != it.row()) {
354  row_total_conf /= row_word_count;
355  if (best_row == NULL || best_conf < row_total_conf) {
356  best_row = it.row();
357  best_conf = row_total_conf;
358  }
359  row_total_conf = 0.0f;
360  row_word_count = 0;
361  }
362  }
363  // Now eliminate any word not in the best row.
364  for (it.restart_page(); it.word() != NULL; it.forward()) {
365  if (it.row() != best_row)
366  it.DeleteCurrentWord();
367  }
368 }
369 
370 } // namespace tesseract.