2 This is a Optical-Character-Recognition program
3 Copyright (C) 2000-2006 Joerg Schulenburg
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License
7 as published by the Free Software Foundation; either version 2
8 of the License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 Joerg.Schulenburg@physik.uni-magdeburg.de */
21 /* Filter by tree, filter by number methods added by
22 * William Webber, william@williamwebber.com. */
29 * Defining this causes assert() calls to be turned off runtime.
31 * This is normally taken care of by make.
35 // ------------------ (&~7)-pixmap-functions ------------------------
37 /* test if pixel marked?
38 * Returns: 0 if not marked, least 3 bits if marked.
40 int marked (pix
* p
, int x
, int y
) {
41 if (x
< 0 || y
< 0 || x
>= p
->x
|| y
>= p
->y
)
43 return (pixel_atp(p
, x
, y
) & 7);
46 #define Nfilt3 6 /* number of 3x3 filter */
48 * Filters to correct possible scanning or image errors.
50 * Each of these filters represents a 3x3 pixel area.
51 * 0 represents a white or background pixel, 1 a black or
52 * foreground pixel, and 2 represents a pixel of either value.
53 * Note that this differs from the meaning of pixel values in
54 * the image, where a high value means "white" (background),
55 * and a low value means "black" (foreground).
57 * These filters are applied to the 3x3 environment of a pixel
58 * to be retrieved from the image, centered around that pixel
59 * (that is, the to-be-retrieved pixel corresponds with the
60 * the fifth position of the filter).
61 * If the filter matches that pixel environment, then
62 * the returned value of the pixel is inverted (black->white
65 * So, for instance, the second filter below matches this
72 * and "fills in" the middle (retrieved) pixel to rejoin a line
73 * that may have been broken by a scanning or image error.
75 const char filt3
[Nfilt3
][9]={
76 {0,0,0, 0,0,1, 1,0,0}, /* (-1,-1) (0,-1) (1,-1) (-1,0) (0,0) ... */
77 {0,0,0, 1,0,1, 0,0,0},
78 {1,0,0, 0,0,1, 0,0,0},
79 {1,1,0, 0,1,0, 2,1,1},
80 {0,0,1, 0,0,0, 2,1,0},
83 /* 2=ignore_pixel, 0=white_background, 1=black_pixel */
87 * Filter by matrix uses the above matrix of filters directly. Pixel
88 * environments to be filtered are compared pixel by pixel against
91 * Filter by number converts these filters into integer representations
92 * and stores them in a table. Pixel environments are similarly
93 * converted to integers, and looked up in the table.
95 * Filter by tree converts these filters into a binary tree. Pixel
96 * environments are matched by traversing the tree.
98 * A typical performance ratio for these three methods is 20:9:7
99 * respectively (i.e., the tree method takes around 35% of the
100 * time of the matrix method).
102 #define FILTER_BY_MATRIX 0
103 #define FILTER_BY_NUMBER 1
104 #define FILTER_BY_TREE 2
106 #define FILTER_METHOD FILTER_BY_TREE
109 * Defining FILTER_CHECKED causes filter results from either the tree
110 * or the number method to be checked against results of the other
111 * two methods to ensure correctness. This is for bug checking purposes
114 /* #define FILTER_CHECKED */
117 * Defining FILTER_STATISTICS causes statistics to be kept on how many
118 * times the filters are tried, how many times a filter matches, and
119 * of these matches how many flip a black pixel to white, and how many
120 * the reverse. These statistics are printed to stderr at the end of
121 * the program run. Currently, statistics are only kept if the tree
122 * filter method is being used.
124 /* #define FILTER_STATISTICS */
126 #ifdef FILTER_STATISTICS
127 static int filter_tries
= 0;
128 static int filter_matches
= 0;
129 static int filter_blackened
= 0;
130 static int filter_whitened
= 0;
133 #ifdef FILTER_STATISTICS
134 void print_filter_stats() {
135 fprintf(stderr
, "\n# Error filter statistics: tries %d, matches %d, "
136 "blackened %d, whitened %d\n",
137 filter_tries
, filter_matches
, filter_blackened
, filter_whitened
);
141 #if FILTER_METHOD == FILTER_BY_MATRIX || defined(FILTER_CHECKED)
143 * Filter the pixel at (x,y) by directly applying the matrix.
145 int pixel_filter_by_matrix(pix
* p
, int x
, int y
) {
148 memset(c33
, 0, sizeof(c33
));
149 /* copy environment of a point (only highest bit)
150 bbg: FASTER now. It has 4 ifs less at least, 8 at most. */
151 if (x
> 0) { c33
[3] = pixel_atp(p
,x
-1, y
)>>7;
152 if (y
> 0) c33
[0] = pixel_atp(p
,x
-1,y
-1)>>7;
153 if (y
+1 < p
->y
) c33
[6] = pixel_atp(p
,x
-1,y
+1)>>7;
155 if (x
+1 < p
->x
) { c33
[5] = pixel_atp(p
,x
+1, y
)>>7;
156 if (y
> 0) c33
[2] = pixel_atp(p
,x
+1,y
-1)>>7;
157 if (y
+1 < p
->y
) c33
[8] = pixel_atp(p
,x
+1,y
+1)>>7;
159 if (y
> 0) c33
[1] = pixel_atp(p
, x
,y
-1)>>7;
160 c33
[4] = pixel_atp(p
, x
, y
)>>7;
161 if (y
+1 < p
->y
) c33
[7] = pixel_atp(p
, x
,y
+1)>>7;
164 for (i
= 0; i
< Nfilt3
; i
++)
165 if( ( (filt3
[i
][0]>>1) || c33
[0]!=(1 & filt3
[i
][0]) )
166 && ( (filt3
[i
][1]>>1) || c33
[1]!=(1 & filt3
[i
][1]) )
167 && ( (filt3
[i
][2]>>1) || c33
[2]!=(1 & filt3
[i
][2]) )
168 && ( (filt3
[i
][3]>>1) || c33
[3]!=(1 & filt3
[i
][3]) )
169 && ( (filt3
[i
][4]>>1) || c33
[4]!=(1 & filt3
[i
][4]) )
170 && ( (filt3
[i
][5]>>1) || c33
[5]!=(1 & filt3
[i
][5]) )
171 && ( (filt3
[i
][6]>>1) || c33
[6]!=(1 & filt3
[i
][6]) )
172 && ( (filt3
[i
][7]>>1) || c33
[7]!=(1 & filt3
[i
][7]) )
173 && ( (filt3
[i
][8]>>1) || c33
[8]!=(1 & filt3
[i
][8]) ) ) {
174 return ((filt3
[i
][4])?JOB
->cfg
.cs
:0);
176 return pixel_atp(p
, x
, y
) & ~7;
180 #if FILTER_METHOD == FILTER_BY_NUMBER || defined(FILTER_CHECKED)
182 #define NUM_TABLE_SIZE 512 /* max value of 9-bit value */
184 * Recursively generates entries in the number table for a matrix filter.
186 * gen_num_filt is the number representation of the matrix filter.
187 * This generation is handled recursively because this is the easiest
188 * way to handle 2 (either value) entries in the filter, which lead
189 * to 2 distinct entries in the number table (one for each alternate
192 void rec_generate_number_table(char * num_table
, const char * filter
,
193 int i
, unsigned short gen_num_filt
) {
195 /* Invert the value of the number representation, to reflect the
196 * fact that the "white" is 0 in the filter, 1 (high) in the image. */
197 gen_num_filt
= ~gen_num_filt
;
198 gen_num_filt
&= 0x01ff;
199 assert(gen_num_filt
< NUM_TABLE_SIZE
);
200 num_table
[gen_num_filt
] = 1;
202 if (filter
[i
] == 0 || filter
[i
] == 2)
203 rec_generate_number_table(num_table
, filter
, i
+ 1, gen_num_filt
);
204 if (filter
[i
] == 1 || filter
[i
] == 2) {
205 gen_num_filt
|= (1 << (8 - i
));
206 rec_generate_number_table(num_table
, filter
, i
+ 1, gen_num_filt
);
212 * Filter the pixel at (x, y) using a number table.
214 * Each filter can be converted into a 9-bit representation, where
215 * filters containing 2 (either value) pixels are converted into
216 * a separate numerical representation for each pixel, where position
217 * i in the filter corresponds to bit i in the number. Each resulting
218 * numerical representation N is represented as a 1 value in the Nth
219 * position of a lookup table. A pixel's environment is converted in
220 * the same way to a numeric representation P, and that environment
221 * matches a filter if num_table[P] == 1.
223 int pixel_filter_by_number(pix
* p
, int x
, int y
) {
224 unsigned short val
= 0;
225 static char num_table
[NUM_TABLE_SIZE
];
226 static int num_table_generated
= 0;
227 if (!num_table_generated
) {
229 memset(num_table
, 0, sizeof(num_table
));
230 for (f
= 0; f
< Nfilt3
; f
++)
231 rec_generate_number_table(num_table
, filt3
[f
], 0, 0);
232 num_table_generated
= 1;
235 /* calculate a numeric value for the 3x3 square around the pixel. */
236 if (x
> 0) { val
|= (pixel_atp(p
,x
-1, y
)>>7) << (8 - 3);
237 if (y
> 0) val
|= (pixel_atp(p
,x
-1,y
-1)>>7) << (8 - 0);
238 if (y
+1 < p
->y
) val
|= (pixel_atp(p
,x
-1,y
+1)>>7) << (8 - 6);
240 if (x
+1 < p
->x
) { val
|= (pixel_atp(p
,x
+1, y
)>>7) << (8 - 5);
241 if (y
> 0) val
|= (pixel_atp(p
,x
+1,y
-1)>>7) << (8 - 2);
242 if (y
+1 < p
->y
) val
|= (pixel_atp(p
,x
+1,y
+1)>>7) << (8 - 8);
244 if (y
> 0) val
|= (pixel_atp(p
, x
,y
-1)>>7) << (8 - 1);
245 val
|= (pixel_atp(p
, x
, y
)>>7) << (8 - 4);
246 if (y
+1 < p
->y
) val
|= (pixel_atp(p
, x
,y
+1)>>7) << (8 - 7);
247 assert(val
< NUM_TABLE_SIZE
);
250 return (val
& (1 << 4)) ? 0 : JOB
->cfg
.cs
;
252 return pixel_atp(p
, x
, y
) & ~7;
256 #if FILTER_METHOD == FILTER_BY_TREE || defined(FILTER_CHECKED)
258 #define TREE_ARRAY_SIZE 1024
259 /* 1+ number of nodes in a complete binary tree of height 10 */
262 * Recursively generate a tree representation of a filter.
264 void rec_generate_tree(char * tree
, const char * filter
, int i
, int n
) {
265 assert(i
>= 0 && i
<= 9);
266 assert(n
< TREE_ARRAY_SIZE
);
274 /* first iteration has n == -1, does not set any values of the tree,
275 just to find whether to start to the left or the right */
279 rec_generate_tree(tree
, filter
, i
+ 1, n
* 2 + 2);
280 else if (filter
[i
] == 1)
281 rec_generate_tree(tree
, filter
, i
+ 1, n
* 2 + 3);
283 rec_generate_tree(tree
, filter
, i
+ 1, n
* 2 + 2);
284 rec_generate_tree(tree
, filter
, i
+ 1, n
* 2 + 3);
289 * Filter the pixel at (x, y) using the tree method.
291 * Each filter is represented by a single branch of a binary
292 * tree, except for filters contain "either value" entries, which
293 * bifurcate at that point in the branch. Each white pixel in the filter
294 * is a left branch in the tree, each black pixel a right branch. The
295 * final node of a branch indicates whether this filter turns a white
296 * pixel black, or a black one white.
298 * We match a pixel's environment against this tree by similarly
299 * using the pixels in that environment to traverse the tree. If
300 * we run out of nodes before getting to the end of a branch, then
301 * the environment doesn't match against any of the filters represented
302 * by the tree. Otherwise, we return the value specified by the
305 * Since the total tree size, even including missing nodes, is small
306 * (2 ^ 10), we can use a standard array representation of a binary
307 * tree, where for the node tree[n], the left child is tree[2n + 2],
308 * and the right tree[2n + 3]. The only information we want
309 * from a non-leaf node is whether it exists (that is, is part of
310 * a filter-representing branch). We represent this with the value
311 * 1 at the node's slot in the array, the contrary by 0. For the
312 * leaf node, 0 again represents non-existence, 1 that the filter
313 * represented by this branch turns a black pixel white, and 2 a
316 int pixel_filter_by_tree(pix
* p
, int x
, int y
) {
317 static char tree
[TREE_ARRAY_SIZE
];
318 static int tree_generated
= 0;
320 int pixel_val
= pixel_atp(p
, x
, y
) & ~7;
321 #ifdef FILTER_STATISTICS
322 static int registered_filter_stats
= 0;
323 if (!registered_filter_stats
) {
324 atexit(print_filter_stats
);
325 registered_filter_stats
= 1;
328 #endif /* FILTER_STATISTICS */
329 if (!tree_generated
) {
331 memset(tree
, 0, sizeof(tree
));
332 for (f
= 0; f
< Nfilt3
; f
++) {
333 const char * filter
= filt3
[f
];
334 rec_generate_tree(tree
, filter
, 0, -1);
340 /* Note that for the image, low is black, high is white, whereas
341 * for the filter, 0 is white, 1 is black. For the image, then,
342 * high (white) means go left, low (black) means go right. */
344 #define IS_BLACK(_dx,_dy) !(pixel_atp(p, x + (_dx), y + (_dy)) >> 7)
345 #define IS_WHITE(_dx,_dy) (pixel_atp(p, x + (_dx), y + (_dy)) >> 7)
346 #define GO_LEFT n = n * 2 + 2
347 #define GO_RIGHT n = n * 2 + 3
348 #define CHECK_NO_MATCH if (tree[n] == 0) return pixel_val
352 /* top 3 pixels off edge == black == right
353 n = 2 * (2 * (2 * -1 + 3) + 3) + 3 = 13 */
356 if (x
== 0 || IS_BLACK(-1, -1))
367 if (x
+ 1 == p
->x
|| IS_BLACK(+1, -1))
375 if (x
== 0 || IS_BLACK(-1, 0))
387 if (x
+ 1 == p
->x
|| IS_BLACK(+1, 0))
395 /* bottom 3 pixels off edge == black == right
396 n' = 2 * (2 * (2n + 3) + 3) + 3
401 if (x
== 0 || IS_BLACK(-1, +1))
413 if (x
+ 1 == p
->x
|| IS_BLACK(+1, +1))
418 assert(n
< TREE_ARRAY_SIZE
);
419 assert(tree
[n
] == 0 || tree
[n
] == 1 || tree
[n
] == 2);
421 #ifdef FILTER_STATISTICS
425 #ifdef FILTER_STATISTICS
426 if (pixel_atp(p
, x
, y
) < JOB
->cfg
.cs
)
431 #ifdef FILTER_STATISTICS
432 if (pixel_atp(p
, x
, y
) >= JOB
->cfg
.cs
)
438 #endif /* FILTER_METHOD == FILTER_BY_TREE */
441 * This simple filter attempts to correct "fax"-like scan errors.
443 int pixel_faxfilter(pix
*p
, int x
, int y
) {
445 r
= pixel_atp(p
,x
,y
)&~7;
446 /* {2,2,2, 2,0,1, 2,1,0} */
447 if ((r
&128) && (~pixel_atp(p
,x
+1, y
)&128)
448 && (~pixel_atp(p
, x
,y
+1)&128)
449 && ( pixel_atp(p
,x
+1,y
+1)&128))
450 r
= 64; /* faxfilter */
453 /* {2,2,2, 1,0,2, 0,1,2} */
454 if ((r
&128) && (~pixel_atp(p
,x
-1, y
)&128)
455 && (~pixel_atp(p
, x
,y
+1)&128)
456 && ( pixel_atp(p
,x
-1,y
+1)&128))
457 r
= 64; /* faxfilter */
461 #ifdef FILTER_CHECKED
463 * Print out the 3x3 environment of a pixel as a 9-bit binary.
465 * For debugging purposes only.
467 void print_pixel_env(FILE * out
, pix
* p
, int x
, int y
) {
469 for (y0
= y
- 1; y0
< y
+ 2; y0
++) {
470 for (x0
= x
- 1; x0
< x
+ 2; x0
++) {
471 if (x0
< 0 || x0
>= p
->x
|| y0
< 0 || y0
>= p
->y
)
473 else if (pixel_atp(p
, x0
, y0
) >> 7)
482 /* this function is heavily used
483 * test if pixel was set, remove low bits (marks) --- later with error-correction
484 * result depends on n_run, if n_run>0 filter are used
485 * Returns: pixel-color (without marks)
487 int getpixel(pix
*p
, int x
, int y
){
488 if ( x
< 0 || y
< 0 || x
>= p
->x
|| y
>= p
->y
)
491 /* filter will be used only once later, when vectorization replaces pixel
494 if (JOB
->tmp
.n_run
> 0) { /* use the filters (correction of errors) */
495 #if FILTER_METHOD == FILTER_BY_NUMBER
496 int pix
= pixel_filter_by_number(p
, x
, y
);
497 #ifdef FILTER_CHECKED
498 int pix2
= pixel_filter_by_matrix(p
, x
, y
);
501 "# BUG: pixel_filter: by number: %d; by matrix: %d, "
502 "by atp %d; env: ", pix
, pix2
, pixel_atp(p
, x
, y
) & ~7);
503 print_pixel_env(stderr
, p
, x
, y
);
506 #endif /* FILTER_CHECKED */
508 #elif FILTER_METHOD == FILTER_BY_MATRIX
509 return pixel_filter_by_matrix(p
, x
, y
);
510 #elif FILTER_METHOD == FILTER_BY_TREE
511 int pix
= pixel_filter_by_tree(p
, x
, y
);
512 #ifdef FILTER_CHECKED
513 int pix2
= pixel_filter_by_matrix(p
, x
, y
);
514 int pix3
= pixel_filter_by_number(p
, x
, y
);
515 if (pix
!= pix2
|| pix
!= pix3
) {
517 "# BUG: pixel_filter: tree: %d; matrix: %d, "
518 "number: %d, atp %d; env: ", pix
, pix2
, pix3
,
519 pixel_atp(p
, x
, y
) & ~7);
520 print_pixel_env(stderr
, p
, x
, y
);
523 #endif /* FILTER_CHECKED */
526 #error FILTER_METHOD not defined
527 #endif /* FILTER_BY_NUMBER */
530 return (pixel_atp(p
,x
,y
) & ~7);
533 /* modify pixel, test if out of range */
534 void put(pix
* p
, int x
, int y
, int ia
, int io
) {
535 if (x
< p
->x
&& x
>= 0 && y
>= 0 && y
< p
->y
)
536 pixel_atp(p
, x
, y
) = (pixel_atp(p
, x
, y
) & ia
) | io
;