39 #include "config_auto.h"
46 "Force proportional word segmentation on all rows");
48 "Chopper is being tested.");
50 #define FIXED_WIDTH_MULTIPLE 5
51 #define BLOCK_STATS_CLUSTERS 10
62 TO_ROW_IT to_row_it(rows);
63 ROW_IT row_it(real_rows);
64 for (to_row_it.mark_cycle_pt(); !to_row_it.cycled_list();
65 to_row_it.forward()) {
66 TO_ROW* row = to_row_it.data();
70 C_BLOB_IT cblob_it(&cblobs);
72 for (;!box_it.empty(); box_it.forward()) {
76 C_OUTLINE_IT cout_it(cblob_it.data()->out_list());
77 cout_it.move_to_last();
79 delete bblob->
cblob();
83 cblob_it.add_after_then_move(bblob->
cblob());
95 word_it.add_after_then_move(word);
96 row_it.add_after_then_move(real_row);
109 TO_BLOCK_LIST *port_blocks) {
110 TO_BLOCK_IT block_it;
120 block_it.set_to_list(port_blocks);
121 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
122 block = block_it.data();
142 TO_ROW_IT row_it = block->
get_rows ();
147 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
148 row = row_it.data ();
164 tprintf (
"Assigning defaults %d non, %d space to row at %g\n",
172 #ifndef GRAPHICS_DISABLED
209 STATS gap_stats (0, maxwidth);
210 STATS cluster_stats[4];
220 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
221 blob = blob_it.data ();
225 gap_stats.
add (blob_box.
width (), 1);
229 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
230 blob = blob_it.data ();
235 if (this_valid && prev_valid
236 && blob_box.
left () - prev_x < maxwidth) {
237 gap_stats.
add (blob_box.
left () - prev_x, 1);
239 prev_x = blob_box.
right ();
240 prev_valid = this_valid;
248 gap_stats.
smooth (smooth_factor);
251 cluster_count = gap_stats.
cluster (lower, upper,
254 while (cluster_count < 2 && ceil (lower) < floor (upper)) {
256 upper = (upper * 3 + lower) / 4;
257 lower = (lower * 3 + upper) / 4;
258 cluster_count = gap_stats.
cluster (lower, upper,
262 if (cluster_count < 2) {
267 for (gap_index = 0; gap_index < cluster_count; gap_index++)
268 gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
270 if (cluster_count > 2) {
272 tprintf (
"Row at %g has 3 sizes of gap:%g,%g,%g\n",
274 cluster_stats[1].
ile (0.5),
275 cluster_stats[2].
ile (0.5), cluster_stats[3].
ile (0.5));
278 if (gaps[1] > lower) {
281 && gaps[2] > gaps[1]) {
285 else if (gaps[2] > lower
292 tprintf (
"Had to switch most common from lower to upper!!\n");
303 if (gaps[1] < gaps[0]) {
305 tprintf (
"Had to switch most common from lower to upper!!\n");
324 tprintf (
"Disagreement between block and row at %g!!\n",
326 tprintf (
"Lower=%g, upper=%g, Stats:\n", lower, upper);
346 tprintf (
"Row at %g has minspace=%d(%g), max_non=%d(%g)\n",
384 STATS gap_stats (0, maxwidth);
401 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
402 blob = blob_it.data ();
405 this_valid = blob_box.
width () >= min_width;
407 if (this_valid && prev_valid
408 && blob_box.
left () - prev_x < maxwidth) {
409 gap_stats.
add (blob_box.
left () - prev_x, 1);
412 prev_x = blob_box.
right ();
413 prev_valid = this_valid;
420 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
421 blob_it.forward ()) {
422 blob = blob_it.data ();
425 if (blob_box.
left () - prev_x < maxwidth) {
426 gap_stats.
add (blob_box.
left () - prev_x, 1);
428 prev_x = blob_box.
right ();
441 gap_stats.
smooth (smooth_factor);
443 prev_count = cluster_count;
444 cluster_count = gap_stats.
cluster (lower, upper,
449 if (cluster_count < 1) {
454 for (gap_index = 0; gap_index < cluster_count; gap_index++)
455 gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
458 tprintf (
"cluster_count=%d:", cluster_count);
459 for (gap_index = 0; gap_index < cluster_count; gap_index++)
460 tprintf (
" %g(%d)", gaps[gap_index],
461 cluster_stats[gap_index + 1].get_total ());
466 for (gap_index = 0; gap_index < cluster_count
468 if (gap_index < cluster_count)
469 lower = gaps[gap_index];
472 tprintf (
"No cluster below block threshold!, using default=%g\n",
476 for (gap_index = 0; gap_index < cluster_count
477 && gaps[gap_index] <= block->
max_nonspace; gap_index++);
478 if (gap_index < cluster_count)
479 upper = gaps[gap_index];
482 tprintf (
"No cluster above block threshold!, using default=%g\n",
502 tprintf (
"Row at %g has minspace=%d(%g), max_non=%d(%g)\n",
522 TO_ROW_IT row_it = block->
get_rows ();
528 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
529 row = row_it.data ();
553 if (real_row !=
NULL) {
555 real_row_it.add_after_then_move (real_row);
583 if (word_it.empty ())
585 word_box = word_it.data ()->bounding_box ();
586 for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ())
587 word_box += word_it.data ()->bounding_box ();
588 xstarts[0] = word_box.
left ();
589 xstarts[1] = word_box.
right ();
591 coeffs[1] = row->
line_m ();
592 coeffs[2] = row->
line_c ();
594 real_row =
new ROW(row,
596 word_it.set_to_list (real_row->
word_list ());
598 word_it.add_list_after (&row->
rep_words);
616 C_OUTLINE_IT cout_it;
618 C_BLOB_IT cblob_it = &cblobs;
623 for (blobindex = 0; blobindex < blobcount; blobindex++) {
624 bblob = box_it->extract();
627 cout_it.set_to_list(cblob_it.data()->out_list());
628 cout_it.move_to_last();
630 delete bblob->
cblob();
635 cblob_it.add_after_then_move(bblob->
cblob());
644 word =
new WERD(&cblobs, blanks,
NULL);
648 if (box_it->at_first())