/*====================================================================* - Copyright (C) 2001 Leptonica. All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials - provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *====================================================================*/ /*! * \file binarize.c *
* * =================================================================== * Image binarization algorithms are found in: * grayquant.c: standard, simple, general grayscale quantization * adaptmap.c: local adaptive; mostly gray-to-gray in preparation * for binarization * binarize.c: special binarization methods, locally adaptive and * global. * =================================================================== * * Adaptive Otsu-based thresholding * l_int32 pixOtsuAdaptiveThreshold() 8 bpp * * Otsu thresholding on adaptive background normalization * PIX *pixOtsuThreshOnBackgroundNorm() 8 bpp * * Masking and Otsu estimate on adaptive background normalization * PIX *pixMaskedThreshOnBackgroundNorm() 8 bpp * * Sauvola local thresholding * l_int32 pixSauvolaBinarizeTiled() * l_int32 pixSauvolaBinarize() * static PIX *pixSauvolaGetThreshold() * static PIX *pixApplyLocalThreshold(); * * Global thresholding using connected components * PIX *pixThresholdByConnComp() * * Global thresholding by histogram * PIX *pixThresholdByHisto() * * Notes: * (1) pixOtsuAdaptiveThreshold() computes a global threshold over each * tile and performs the threshold operation, resulting in a * binary image for each tile. These are stitched into the * final result. * (2) pixOtsuThreshOnBackgroundNorm() and * pixMaskedThreshOnBackgroundNorm() are binarization functions * that use background normalization with other techniques. * (3) Sauvola binarization computes a local threshold based on * the local average and square average. It takes two constants: * the window size for the measurement at each pixel and a * parameter that determines the amount of normalized local * standard deviation to subtract from the local average value. * (4) pixThresholdByConnComp() uses the numbers of 4 and 8 connected * components at different thresholding to determine if a * global threshold can be used (for text or line-art) and the * value it should have. **/ #ifdef HAVE_CONFIG_H #include
* Notes: * (1) The Otsu method finds a single global threshold for an image. * This function allows a locally adapted threshold to be * found for each tile into which the image is broken up. * (2) The array of threshold values, one for each tile, constitutes * a highly downscaled image. This array is optionally * smoothed using a convolution. The full width and height of the * convolution kernel are (2 * %smoothx + 1) and (2 * %smoothy + 1). * (3) The minimum tile dimension allowed is 16. If such small * tiles are used, it is recommended to use smoothing, because * without smoothing, each small tile determines the splitting * threshold independently. A tile that is entirely in the * image bg will then hallucinate fg, resulting in a very noisy * binarization. The smoothing should be large enough that no * tile is only influenced by one type (fg or bg) of pixels, * because it will force a split of its pixels. * (4) To get a single global threshold for the entire image, use * input values of %sx and %sy that are larger than the image. * For this situation, the smoothing parameters are ignored. * (5) The threshold values partition the image pixels into two classes: * one whose values are less than the threshold and another * whose values are greater than or equal to the threshold. * This is the same use of 'threshold' as in pixThresholdToBinary(). * (6) The scorefract is the fraction of the maximum Otsu score, which * is used to determine the range over which the histogram minimum * is searched. See numaSplitDistribution() for details on the * underlying method of choosing a threshold. * (7) This uses enables a modified version of the Otsu criterion for * splitting the distribution of pixels in each tile into a * fg and bg part. The modification consists of searching for * a minimum in the histogram over a range of pixel values where * the Otsu score is within a defined fraction, %scorefract, * of the max score. To get the original Otsu algorithm, set * %scorefract == 0. * (8) N.B. This method is NOT recommended for images with weak text * and significant background noise, such as bleedthrough, because * of the problem noted in (3) above for tiling. Use Sauvola. **/ l_ok pixOtsuAdaptiveThreshold(PIX *pixs, l_int32 sx, l_int32 sy, l_int32 smoothx, l_int32 smoothy, l_float32 scorefract, PIX **ppixth, PIX **ppixd) { l_int32 w, h, nx, ny, i, j, thresh; l_uint32 val; PIX *pixt, *pixb, *pixthresh, *pixth, *pixd; PIXTILING *pt; PROCNAME("pixOtsuAdaptiveThreshold"); if (!ppixth && !ppixd) return ERROR_INT("neither &pixth nor &pixd defined", procName, 1); if (ppixth) *ppixth = NULL; if (ppixd) *ppixd = NULL; if (!pixs || pixGetDepth(pixs) != 8) return ERROR_INT("pixs not defined or not 8 bpp", procName, 1); if (sx < 16 || sy < 16) return ERROR_INT("sx and sy must be >= 16", procName, 1); /* Compute the threshold array for the tiles */ pixGetDimensions(pixs, &w, &h, NULL); nx = L_MAX(1, w / sx); ny = L_MAX(1, h / sy); smoothx = L_MIN(smoothx, (nx - 1) / 2); smoothy = L_MIN(smoothy, (ny - 1) / 2); pt = pixTilingCreate(pixs, nx, ny, 0, 0, 0, 0); pixthresh = pixCreate(nx, ny, 8); for (i = 0; i < ny; i++) { for (j = 0; j < nx; j++) { pixt = pixTilingGetTile(pt, i, j); pixSplitDistributionFgBg(pixt, scorefract, 1, &thresh, NULL, NULL, NULL); pixSetPixel(pixthresh, j, i, thresh); /* see note (4) */ pixDestroy(&pixt); } } /* Optionally smooth the threshold array */ if (smoothx > 0 || smoothy > 0) pixth = pixBlockconv(pixthresh, smoothx, smoothy); else pixth = pixClone(pixthresh); pixDestroy(&pixthresh); /* Optionally apply the threshold array to binarize pixs */ if (ppixd) { pixd = pixCreate(w, h, 1); pixCopyResolution(pixd, pixs); for (i = 0; i < ny; i++) { for (j = 0; j < nx; j++) { pixt = pixTilingGetTile(pt, i, j); pixGetPixel(pixth, j, i, &val); pixb = pixThresholdToBinary(pixt, val); pixTilingPaintTile(pixd, i, j, pixb, pt); pixDestroy(&pixt); pixDestroy(&pixb); } } *ppixd = pixd; } if (ppixth) *ppixth = pixth; else pixDestroy(&pixth); pixTilingDestroy(&pt); return 0; } /*------------------------------------------------------------------* * Otsu thresholding on adaptive background normalization * *------------------------------------------------------------------*/ /*! * \brief pixOtsuThreshOnBackgroundNorm() * * \param[in] pixs 8 bpp grayscale; not colormapped * \param[in] pixim [optional] 1 bpp 'image' mask; can be null * \param[in] sx, sy tile size in pixels * \param[in] thresh threshold for determining foreground * \param[in] mincount min threshold on counts in a tile * \param[in] bgval target bg val; typ. > 128 * \param[in] smoothx half-width of block convolution kernel width * \param[in] smoothy half-width of block convolution kernel height * \param[in] scorefract fraction of the max Otsu score; typ. 0.1 * \param[out] pthresh [optional] threshold value that was * used on the normalized image * \return pixd 1 bpp thresholded image, or NULL on error * *
* Notes: * (1) This does background normalization followed by Otsu * thresholding. Otsu binarization attempts to split the * image into two roughly equal sets of pixels, and it does * a very poor job when there are large amounts of dark * background. By doing a background normalization first, * to get the background near 255, we remove this problem. * Then we use a modified Otsu to estimate the best global * threshold on the normalized image. * (2) See pixBackgroundNorm() for meaning and typical values * of input parameters. For a start, you can try: * sx, sy = 10, 15 * thresh = 100 * mincount = 50 * bgval = 255 * smoothx, smoothy = 2 **/ PIX * pixOtsuThreshOnBackgroundNorm(PIX *pixs, PIX *pixim, l_int32 sx, l_int32 sy, l_int32 thresh, l_int32 mincount, l_int32 bgval, l_int32 smoothx, l_int32 smoothy, l_float32 scorefract, l_int32 *pthresh) { l_int32 w, h; l_uint32 val; PIX *pixn, *pixt, *pixd; PROCNAME("pixOtsuThreshOnBackgroundNorm"); if (pthresh) *pthresh = 0; if (!pixs || pixGetDepth(pixs) != 8) return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); if (pixGetColormap(pixs)) return (PIX *)ERROR_PTR("pixs is colormapped", procName, NULL); if (sx < 4 || sy < 4) return (PIX *)ERROR_PTR("sx and sy must be >= 4", procName, NULL); if (mincount > sx * sy) { L_WARNING("mincount too large for tile size\n", procName); mincount = (sx * sy) / 3; } pixn = pixBackgroundNorm(pixs, pixim, NULL, sx, sy, thresh, mincount, bgval, smoothx, smoothy); if (!pixn) return (PIX *)ERROR_PTR("pixn not made", procName, NULL); /* Just use 1 tile for a global threshold, which is stored * as a single pixel in pixt. */ pixGetDimensions(pixn, &w, &h, NULL); pixOtsuAdaptiveThreshold(pixn, w, h, 0, 0, scorefract, &pixt, &pixd); pixDestroy(&pixn); if (pixt && pthresh) { pixGetPixel(pixt, 0, 0, &val); *pthresh = val; } pixDestroy(&pixt); if (!pixd) return (PIX *)ERROR_PTR("pixd not made", procName, NULL); else return pixd; } /*----------------------------------------------------------------------* * Masking and Otsu estimate on adaptive background normalization * *----------------------------------------------------------------------*/ /*! * \brief pixMaskedThreshOnBackgroundNorm() * * \param[in] pixs 8 bpp grayscale; not colormapped * \param[in] pixim [optional] 1 bpp 'image' mask; can be null * \param[in] sx, sy tile size in pixels * \param[in] thresh threshold for determining foreground * \param[in] mincount min threshold on counts in a tile * \param[in] smoothx half-width of block convolution kernel width * \param[in] smoothy half-width of block convolution kernel height * \param[in] scorefract fraction of the max Otsu score; typ. ~ 0.1 * \param[out] pthresh [optional] threshold value that was * used on the normalized image * \return pixd 1 bpp thresholded image, or NULL on error * *
* Notes: * (1) This begins with a standard background normalization. * Additionally, there is a flexible background norm, that * will adapt to a rapidly varying background, and this * puts white pixels in the background near regions with * significant foreground. The white pixels are turned into * a 1 bpp selection mask by binarization followed by dilation. * Otsu thresholding is performed on the input image to get an * estimate of the threshold in the non-mask regions. * The background normalized image is thresholded with two * different values, and the result is combined using * the selection mask. * (2) Note that the numbers 255 (for bgval target) and 190 (for * thresholding on pixn) are tied together, and explicitly * defined in this function. * (3) See pixBackgroundNorm() for meaning and typical values * of input parameters. For a start, you can try: * sx, sy = 10, 15 * thresh = 100 * mincount = 50 * smoothx, smoothy = 2 **/ PIX * pixMaskedThreshOnBackgroundNorm(PIX *pixs, PIX *pixim, l_int32 sx, l_int32 sy, l_int32 thresh, l_int32 mincount, l_int32 smoothx, l_int32 smoothy, l_float32 scorefract, l_int32 *pthresh) { l_int32 w, h, highthresh; l_uint32 val; PIX *pixn, *pixm, *pixd, *pix1, *pix2, *pix3, *pix4; PROCNAME("pixMaskedThreshOnBackgroundNorm"); if (pthresh) *pthresh = 0; if (!pixs || pixGetDepth(pixs) != 8) return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); if (pixGetColormap(pixs)) return (PIX *)ERROR_PTR("pixs is colormapped", procName, NULL); if (sx < 4 || sy < 4) return (PIX *)ERROR_PTR("sx and sy must be >= 4", procName, NULL); if (mincount > sx * sy) { L_WARNING("mincount too large for tile size\n", procName); mincount = (sx * sy) / 3; } /* Standard background normalization */ pixn = pixBackgroundNorm(pixs, pixim, NULL, sx, sy, thresh, mincount, 255, smoothx, smoothy); if (!pixn) return (PIX *)ERROR_PTR("pixn not made", procName, NULL); /* Special background normalization for adaptation to quickly * varying background. Threshold on the very light parts, * which tend to be near significant edges, and dilate to * form a mask over regions that are typically text. The * dilation size is chosen to cover the text completely, * except for very thick fonts. */ pix1 = pixBackgroundNormFlex(pixs, 7, 7, 1, 1, 20); pix2 = pixThresholdToBinary(pix1, 240); pixInvert(pix2, pix2); pixm = pixMorphSequence(pix2, "d21.21", 0); pixDestroy(&pix1); pixDestroy(&pix2); /* Use Otsu to get a global threshold estimate for the image, * which is stored as a single pixel in pix3. */ pixGetDimensions(pixs, &w, &h, NULL); pixOtsuAdaptiveThreshold(pixs, w, h, 0, 0, scorefract, &pix3, NULL); pixGetPixel(pix3, 0, 0, &val); if (pthresh) *pthresh = val; pixDestroy(&pix3); /* Threshold the background normalized images differentially, * using a high value correlated with the background normalization * for the part of the image under the mask (i.e., near the * darker, thicker foreground), and a value that depends on the Otsu * threshold for the rest of the image. This gives a solid * (high) thresholding for the foreground parts of the image, * while allowing the background and light foreground to be * reasonably well cleaned using a threshold adapted to the * input image. */ highthresh = L_MIN(256, val + 30); pixd = pixThresholdToBinary(pixn, highthresh); /* for bg and light fg */ pix4 = pixThresholdToBinary(pixn, 190); /* for heavier fg */ pixCombineMasked(pixd, pix4, pixm); pixDestroy(&pix4); pixDestroy(&pixm); pixDestroy(&pixn); if (!pixd) return (PIX *)ERROR_PTR("pixd not made", procName, NULL); else return pixd; } /*----------------------------------------------------------------------* * Sauvola binarization * *----------------------------------------------------------------------*/ /*! * \brief pixSauvolaBinarizeTiled() * * \param[in] pixs 8 bpp grayscale, not colormapped * \param[in] whsize window half-width for measuring local statistics * \param[in] factor factor for reducing threshold due to variance; >= 0 * \param[in] nx, ny subdivision into tiles; >= 1 * \param[out] ppixth [optional] Sauvola threshold values * \param[out] ppixd [optional] thresholded image * \return 0 if OK, 1 on error * *
* Notes: * (1) The window width and height are 2 * %whsize + 1. The minimum * value for %whsize is 2; typically it is >= 7.. * (2) For nx == ny == 1, this defaults to pixSauvolaBinarize(). * (3) Why a tiled version? * (a) Because the mean value accumulator is a uint32, overflow * can occur for an image with more than 16M pixels. * (b) The mean value accumulator array for 16M pixels is 64 MB. * The mean square accumulator array for 16M pixels is 128 MB. * Using tiles reduces the size of these arrays. * (c) Each tile can be processed independently, in parallel, * on a multicore processor. * (4) The Sauvola threshold is determined from the formula: * t = m * (1 - k * (1 - s / 128)) * See pixSauvolaBinarize() for details. **/ l_ok pixSauvolaBinarizeTiled(PIX *pixs, l_int32 whsize, l_float32 factor, l_int32 nx, l_int32 ny, PIX **ppixth, PIX **ppixd) { l_int32 i, j, w, h, xrat, yrat; PIX *pixth, *pixd, *tileth, *tiled, *pixt; PIX **ptileth, **ptiled; PIXTILING *pt; PROCNAME("pixSauvolaBinarizeTiled"); if (!ppixth && !ppixd) return ERROR_INT("no outputs", procName, 1); if (ppixth) *ppixth = NULL; if (ppixd) *ppixd = NULL; if (!pixs || pixGetDepth(pixs) != 8) return ERROR_INT("pixs undefined or not 8 bpp", procName, 1); if (pixGetColormap(pixs)) return ERROR_INT("pixs is cmapped", procName, 1); pixGetDimensions(pixs, &w, &h, NULL); if (whsize < 2) return ERROR_INT("whsize must be >= 2", procName, 1); if (w < 2 * whsize + 3 || h < 2 * whsize + 3) return ERROR_INT("whsize too large for image", procName, 1); if (factor < 0.0) return ERROR_INT("factor must be >= 0", procName, 1); if (nx <= 1 && ny <= 1) return pixSauvolaBinarize(pixs, whsize, factor, 1, NULL, NULL, ppixth, ppixd); /* Test to see if the tiles are too small. The required * condition is that the tile dimensions must be at least * (whsize + 2) x (whsize + 2). */ xrat = w / nx; yrat = h / ny; if (xrat < whsize + 2) { nx = w / (whsize + 2); L_WARNING("tile width too small; nx reduced to %d\n", procName, nx); } if (yrat < whsize + 2) { ny = h / (whsize + 2); L_WARNING("tile height too small; ny reduced to %d\n", procName, ny); } if (nx <= 1 && ny <= 1) return pixSauvolaBinarize(pixs, whsize, factor, 1, NULL, NULL, ppixth, ppixd); /* We can use pixtiling for painting both outputs, if requested */ if (ppixth) { pixth = pixCreateNoInit(w, h, 8); *ppixth = pixth; } if (ppixd) { pixd = pixCreateNoInit(w, h, 1); *ppixd = pixd; } pt = pixTilingCreate(pixs, nx, ny, 0, 0, whsize + 1, whsize + 1); pixTilingNoStripOnPaint(pt); /* pixSauvolaBinarize() does the stripping */ for (i = 0; i < ny; i++) { for (j = 0; j < nx; j++) { pixt = pixTilingGetTile(pt, i, j); ptileth = (ppixth) ? &tileth : NULL; ptiled = (ppixd) ? &tiled : NULL; pixSauvolaBinarize(pixt, whsize, factor, 0, NULL, NULL, ptileth, ptiled); if (ppixth) { /* do not strip */ pixTilingPaintTile(pixth, i, j, tileth, pt); pixDestroy(&tileth); } if (ppixd) { pixTilingPaintTile(pixd, i, j, tiled, pt); pixDestroy(&tiled); } pixDestroy(&pixt); } } pixTilingDestroy(&pt); return 0; } /*! * \brief pixSauvolaBinarize() * * \param[in] pixs 8 bpp grayscale; not colormapped * \param[in] whsize window half-width for measuring local statistics * \param[in] factor factor for reducing threshold due to variance; >= 0 * \param[in] addborder 1 to add border of width (%whsize + 1) on all sides * \param[out] ppixm [optional] local mean values * \param[out] ppixsd [optional] local standard deviation values * \param[out] ppixth [optional] threshold values * \param[out] ppixd [optional] thresholded image * \return 0 if OK, 1 on error * *
* Notes: * (1) The window width and height are 2 * %whsize + 1. The minimum * value for %whsize is 2; typically it is >= 7.. * (2) The local statistics, measured over the window, are the * average and standard deviation. * (3) The measurements of the mean and standard deviation are * performed inside a border of (%whsize + 1) pixels. If pixs does * not have these added border pixels, use %addborder = 1 to add * it here; otherwise use %addborder = 0. * (4) The Sauvola threshold is determined from the formula: * t = m * (1 - k * (1 - s / 128)) * where: * t = local threshold * m = local mean * k = %factor (>= 0) [ typ. 0.35 ] * s = local standard deviation, which is maximized at * 127.5 when half the samples are 0 and half are 255. * (5) The basic idea of Niblack and Sauvola binarization is that * the local threshold should be less than the median value, * and the larger the variance, the closer to the median * it should be chosen. Typical values for k are between * 0.2 and 0.5. **/ l_ok pixSauvolaBinarize(PIX *pixs, l_int32 whsize, l_float32 factor, l_int32 addborder, PIX **ppixm, PIX **ppixsd, PIX **ppixth, PIX **ppixd) { l_int32 w, h; PIX *pixg, *pixsc, *pixm, *pixms, *pixth, *pixd; PROCNAME("pixSauvolaBinarize"); if (ppixm) *ppixm = NULL; if (ppixsd) *ppixsd = NULL; if (ppixth) *ppixth = NULL; if (ppixd) *ppixd = NULL; if (!ppixm && !ppixsd && !ppixth && !ppixd) return ERROR_INT("no outputs", procName, 1); if (!pixs || pixGetDepth(pixs) != 8) return ERROR_INT("pixs undefined or not 8 bpp", procName, 1); if (pixGetColormap(pixs)) return ERROR_INT("pixs is cmapped", procName, 1); pixGetDimensions(pixs, &w, &h, NULL); if (whsize < 2) return ERROR_INT("whsize must be >= 2", procName, 1); if (w < 2 * whsize + 3 || h < 2 * whsize + 3) return ERROR_INT("whsize too large for image", procName, 1); if (factor < 0.0) return ERROR_INT("factor must be >= 0", procName, 1); if (addborder) { pixg = pixAddMirroredBorder(pixs, whsize + 1, whsize + 1, whsize + 1, whsize + 1); pixsc = pixClone(pixs); } else { pixg = pixClone(pixs); pixsc = pixRemoveBorder(pixs, whsize + 1); } if (!pixg || !pixsc) return ERROR_INT("pixg and pixsc not made", procName, 1); /* All these functions strip off the border pixels. */ if (ppixm || ppixth || ppixd) pixm = pixWindowedMean(pixg, whsize, whsize, 1, 1); if (ppixsd || ppixth || ppixd) pixms = pixWindowedMeanSquare(pixg, whsize, whsize, 1); if (ppixth || ppixd) pixth = pixSauvolaGetThreshold(pixm, pixms, factor, ppixsd); if (ppixd) { pixd = pixApplyLocalThreshold(pixsc, pixth); pixCopyResolution(pixd, pixs); } if (ppixm) *ppixm = pixm; else pixDestroy(&pixm); pixDestroy(&pixms); if (ppixth) *ppixth = pixth; else pixDestroy(&pixth); if (ppixd) *ppixd = pixd; pixDestroy(&pixg); pixDestroy(&pixsc); return 0; } /*! * \brief pixSauvolaGetThreshold() * * \param[in] pixm 8 bpp grayscale; not colormapped * \param[in] pixms 32 bpp * \param[in] factor factor for reducing threshold due to variance; >= 0 * \param[out] ppixsd [optional] local standard deviation * \return pixd 8 bpp, sauvola threshold values, or NULL on error * *
* Notes: * (1) The Sauvola threshold is determined from the formula: * t = m * (1 - k * (1 - s / 128)) * where: * t = local threshold * m = local mean * k = %factor (>= 0) [ typ. 0.35 ] * s = local standard deviation, which is maximized at * 127.5 when half the samples are 0 and half are 255. * (2) See pixSauvolaBinarize() for other details. * (3) Important definitions and relations for computing averages: * v == pixel value * E(p) == expected value of p == average of p over some pixel set * S(v) == square of v == v * v * mv == E(v) == expected pixel value == mean value * ms == E(S(v)) == expected square of pixel values * == mean square value * var == variance == expected square of deviation from mean * == E(S(v - mv)) = E(S(v) - 2 * S(v * mv) + S(mv)) * = E(S(v)) - S(mv) * = ms - mv * mv * s == standard deviation = sqrt(var) * So for evaluating the standard deviation in the Sauvola * threshold, we take * s = sqrt(ms - mv * mv) **/ static PIX * pixSauvolaGetThreshold(PIX *pixm, PIX *pixms, l_float32 factor, PIX **ppixsd) { l_int32 i, j, w, h, tabsize, wplm, wplms, wplsd, wpld, usetab; l_int32 mv, ms, var, thresh; l_uint32 *datam, *datams, *datasd, *datad; l_uint32 *linem, *linems, *linesd, *lined; l_float32 sd; l_float32 *tab; /* of 2^16 square roots */ PIX *pixsd, *pixd; PROCNAME("pixSauvolaGetThreshold"); if (ppixsd) *ppixsd = NULL; if (!pixm || pixGetDepth(pixm) != 8) return (PIX *)ERROR_PTR("pixm undefined or not 8 bpp", procName, NULL); if (pixGetColormap(pixm)) return (PIX *)ERROR_PTR("pixm is colormapped", procName, NULL); if (!pixms || pixGetDepth(pixms) != 32) return (PIX *)ERROR_PTR("pixms undefined or not 32 bpp", procName, NULL); if (factor < 0.0) return (PIX *)ERROR_PTR("factor must be >= 0", procName, NULL); /* Only make a table of 2^16 square roots if there * are enough pixels to justify it. */ pixGetDimensions(pixm, &w, &h, NULL); usetab = (w * h > 100000) ? 1 : 0; if (usetab) { tabsize = 1 << 16; tab = (l_float32 *)LEPT_CALLOC(tabsize, sizeof(l_float32)); for (i = 0; i < tabsize; i++) tab[i] = sqrtf((l_float32)i); } pixd = pixCreate(w, h, 8); if (ppixsd) { pixsd = pixCreate(w, h, 8); *ppixsd = pixsd; } datam = pixGetData(pixm); datams = pixGetData(pixms); if (ppixsd) datasd = pixGetData(pixsd); datad = pixGetData(pixd); wplm = pixGetWpl(pixm); wplms = pixGetWpl(pixms); if (ppixsd) wplsd = pixGetWpl(pixsd); wpld = pixGetWpl(pixd); for (i = 0; i < h; i++) { linem = datam + i * wplm; linems = datams + i * wplms; if (ppixsd) linesd = datasd + i * wplsd; lined = datad + i * wpld; for (j = 0; j < w; j++) { mv = GET_DATA_BYTE(linem, j); ms = linems[j]; var = ms - mv * mv; if (usetab) sd = tab[var]; else sd = sqrtf((l_float32)var); if (ppixsd) SET_DATA_BYTE(linesd, j, (l_int32)sd); thresh = (l_int32)(mv * (1.0 - factor * (1.0 - sd / 128.))); SET_DATA_BYTE(lined, j, thresh); } } if (usetab) LEPT_FREE(tab); return pixd; } /*! * \brief pixApplyLocalThreshold() * * \param[in] pixs 8 bpp grayscale; not colormapped * \param[in] pixth 8 bpp array of local thresholds * \return pixd 1 bpp, thresholded image, or NULL on error */ static PIX * pixApplyLocalThreshold(PIX *pixs, PIX *pixth) { l_int32 i, j, w, h, wpls, wplt, wpld, vals, valt; l_uint32 *datas, *datat, *datad, *lines, *linet, *lined; PIX *pixd; PROCNAME("pixApplyLocalThreshold"); if (!pixs || pixGetDepth(pixs) != 8) return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL); if (pixGetColormap(pixs)) return (PIX *)ERROR_PTR("pixs is colormapped", procName, NULL); if (!pixth || pixGetDepth(pixth) != 8) return (PIX *)ERROR_PTR("pixth undefined or not 8 bpp", procName, NULL); pixGetDimensions(pixs, &w, &h, NULL); pixd = pixCreate(w, h, 1); datas = pixGetData(pixs); datat = pixGetData(pixth); datad = pixGetData(pixd); wpls = pixGetWpl(pixs); wplt = pixGetWpl(pixth); wpld = pixGetWpl(pixd); for (i = 0; i < h; i++) { lines = datas + i * wpls; linet = datat + i * wplt; lined = datad + i * wpld; for (j = 0; j < w; j++) { vals = GET_DATA_BYTE(lines, j); valt = GET_DATA_BYTE(linet, j); if (vals < valt) SET_DATA_BIT(lined, j); } } return pixd; } /*----------------------------------------------------------------------* * Global thresholding using connected components * *----------------------------------------------------------------------*/ /*! * \brief pixThresholdByConnComp() * * \param[in] pixs depth > 1, colormap OK * \param[in] pixm [optional] 1 bpp mask giving region to ignore * by setting pixels to white; use NULL if no mask * \param[in] start, end, incr binarization threshold levels to test * \param[in] thresh48 threshold on normalized difference between the * numbers of 4 and 8 connected components * \param[in] threshdiff threshold on normalized difference between the * number of 4 cc at successive iterations * \param[out] pglobthresh [optional] best global threshold; 0 * if no threshold is found * \param[out] ppixd [optional] image thresholded to binary, or * null if no threshold is found * \param[in] debugflag 1 for plotted results * \return 0 if OK, 1 on error or if no threshold is found * *
* Notes: * (1) This finds a global threshold based on connected components. * Although slow, it is reasonable to use it in a situation where * (a) the background in the image is relatively uniform, and * (b) the result will be fed to an OCR program that accepts 1 bpp * images and works best with easily segmented characters. * The reason for (b) is that this selects a threshold with a * minimum number of both broken characters and merged characters. * (2) If the pix has color, it is converted to gray using the * max component. * (3) Input 0 to use default values for any of these inputs: * %start, %end, %incr, %thresh48, %threshdiff. * (4) This approach can be understood as follows. When the * binarization threshold is varied, the numbers of c.c. identify * four regimes: * (a) For low thresholds, text is broken into small pieces, and * the number of c.c. is large, with the 4 c.c. significantly * exceeding the 8 c.c. * (b) As the threshold rises toward the optimum value, the text * characters coalesce and there is very little difference * between the numbers of 4 and 8 c.c, which both go * through a minimum. * (c) Above this, the image background gets noisy because some * pixels are(thresholded to foreground, and the numbers * of c.c. quickly increase, with the 4 c.c. significantly * larger than the 8 c.c. * (d) At even higher thresholds, the image background noise * coalesces as it becomes mostly foreground, and the * number of c.c. drops quickly. * (5) If there is no global threshold that distinguishes foreground * text from background (e.g., weak text over a background that * has significant variation and/or bleedthrough), this returns 1, * which the caller should check. **/ l_ok pixThresholdByConnComp(PIX *pixs, PIX *pixm, l_int32 start, l_int32 end, l_int32 incr, l_float32 thresh48, l_float32 threshdiff, l_int32 *pglobthresh, PIX **ppixd, l_int32 debugflag) { l_int32 i, thresh, n, n4, n8, mincounts, found, globthresh; l_float32 count4, count8, firstcount4, prevcount4, diff48, diff4; GPLOT *gplot; NUMA *na4, *na8; PIX *pix1, *pix2, *pix3; PROCNAME("pixThresholdByConnComp"); if (pglobthresh) *pglobthresh = 0; if (ppixd) *ppixd = NULL; if (!pixs || pixGetDepth(pixs) == 1) return ERROR_INT("pixs undefined or 1 bpp", procName, 1); if (pixm && pixGetDepth(pixm) != 1) return ERROR_INT("pixm must be 1 bpp", procName, 1); /* Assign default values if requested */ if (start <= 0) start = 80; if (end <= 0) end = 200; if (incr <= 0) incr = 10; if (thresh48 <= 0.0) thresh48 = 0.01; if (threshdiff <= 0.0) threshdiff = 0.01; if (start > end) return ERROR_INT("invalid start,end", procName, 1); /* Make 8 bpp, using the max component if color. */ if (pixGetColormap(pixs)) pix1 = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC); else pix1 = pixClone(pixs); if (pixGetDepth(pix1) == 32) pix2 = pixConvertRGBToGrayMinMax(pix1, L_CHOOSE_MAX); else pix2 = pixConvertTo8(pix1, 0); pixDestroy(&pix1); /* Mask out any non-text regions. Do this in-place, because pix2 * can never be the same pix as pixs. */ if (pixm) pixSetMasked(pix2, pixm, 255); /* Make sure there are enough components to get a valid signal */ pix3 = pixConvertTo1(pix2, start); pixCountConnComp(pix3, 4, &n4); pixDestroy(&pix3); mincounts = 500; if (n4 < mincounts) { L_INFO("Insufficient component count: %d\n", procName, n4); pixDestroy(&pix2); return 1; } /* Compute the c.c. data */ na4 = numaCreate(0); na8 = numaCreate(0); numaSetParameters(na4, start, incr); numaSetParameters(na8, start, incr); for (thresh = start, i = 0; thresh <= end; thresh += incr, i++) { pix3 = pixConvertTo1(pix2, thresh); pixCountConnComp(pix3, 4, &n4); pixCountConnComp(pix3, 8, &n8); numaAddNumber(na4, n4); numaAddNumber(na8, n8); pixDestroy(&pix3); } if (debugflag) { lept_mkdir("lept/binarize"); gplot = gplotCreate("/tmp/lept/binarize", GPLOT_PNG, "number of cc vs. threshold", "threshold", "number of cc"); gplotAddPlot(gplot, NULL, na4, GPLOT_LINES, "plot 4cc"); gplotAddPlot(gplot, NULL, na8, GPLOT_LINES, "plot 8cc"); gplotMakeOutput(gplot); gplotDestroy(&gplot); } n = numaGetCount(na4); found = FALSE; for (i = 0; i < n; i++) { if (i == 0) { numaGetFValue(na4, i, &firstcount4); prevcount4 = firstcount4; } else { numaGetFValue(na4, i, &count4); numaGetFValue(na8, i, &count8); diff48 = (count4 - count8) / firstcount4; diff4 = L_ABS(prevcount4 - count4) / firstcount4; if (debugflag) { lept_stderr("diff48 = %7.3f, diff4 = %7.3f\n", diff48, diff4); } if (diff48 < thresh48 && diff4 < threshdiff) { found = TRUE; break; } prevcount4 = count4; } } numaDestroy(&na4); numaDestroy(&na8); if (found) { globthresh = start + i * incr; if (pglobthresh) *pglobthresh = globthresh; if (ppixd) { *ppixd = pixConvertTo1(pix2, globthresh); pixCopyResolution(*ppixd, pixs); } if (debugflag) lept_stderr("global threshold = %d\n", globthresh); pixDestroy(&pix2); return 0; } if (debugflag) lept_stderr("no global threshold found\n"); pixDestroy(&pix2); return 1; } /*----------------------------------------------------------------------* * Global thresholding by histogram * *----------------------------------------------------------------------*/ /*! * \brief pixThresholdByHisto() * * \param[in] pixs gray 8 bpp, no colormap * \param[in] factor subsampling factor >= 1 * \param[in] halfw half of window width for smoothing; * use 0 for default * \param[in] delta relative amount to resolve peaks and valleys; * in (0 ... 1], use 0 for default * \param[out] pthresh best global threshold; 0 if no threshold is found * \param[out] ppixd [optional] thresholded 1 bpp pix * \param[out] ppixhisto [optional] rescaled histogram of gray values * \return 0 if OK, 1 on error or if no threshold is found * *
* Notes: * (1) This finds a global threshold. It is best for an image that * has a fairly well-defined fg and bg. * (2) If it finds a good threshold and %ppixd is defined, the binarized * image is returned in &pixd; otherwise it return null. * (3) Suggest using default values for %half and %delta. * (4) Returns 0 in %pthresh if it can't find a good threshold. **/ l_ok pixThresholdByHisto(PIX *pixs, l_int32 factor, l_int32 halfw, l_float32 delta, l_int32 *pthresh, PIX **ppixd, PIX **ppixhisto) { l_float32 maxval, fract; NUMA *na1, *na2, *na3; PROCNAME("pixThresholdByHisto"); if (ppixhisto) *ppixhisto = NULL; if (ppixd) *ppixd = NULL; if (!pthresh) return ERROR_INT("&thresh not defined", procName, 1); *pthresh = 0; if (!pixs || pixGetDepth(pixs) != 8) return ERROR_INT("pixs undefined or not 8 bpp", procName, 1); if (pixGetColormap(pixs)) return ERROR_INT("pixs has colormap", procName, 1); if (factor < 1) return ERROR_INT("sampling must be >= 1", procName, 1); if (halfw <= 0) halfw = 20; if (delta <= 0.0) delta = 0.1; /* Make a histogram of pixel values where the largest peak * is normalized to a value of 1.0. */ na1 = pixGetGrayHistogram(pixs, factor); na2 = numaWindowedMean(na1, halfw); /* smoothing */ numaGetMax(na2, &maxval, NULL); na3 = numaTransform(na2, 0.0, 1.0 / maxval); /* rescale to max of 1.0 */ numaDestroy(&na1); numaDestroy(&na2); numaFindLocForThreshold(na3, 0, pthresh, &fract); L_INFO("fractional area under first peak: %5.3f\n", procName, fract); if (ppixhisto) { lept_mkdir("lept/histo"); gplotSimple1(na3, GPLOT_PNG, "/tmp/lept/histo/histo", NULL); *ppixhisto = pixRead("/tmp/lept/histo/histo.png"); } numaDestroy(&na3); if (*pthresh > 0 && ppixd) *ppixd = pixThresholdToBinary(pixs, *pthresh); return 0; }