From d82e8565d8409d94855739fbf6ebf288b9dc0800 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Mon, 15 Jul 2024 15:40:19 +0200 Subject: [PATCH 01/66] picked up doxygen comment/documentation edits from master/dev branch # Conflicts: # include/tesseract/baseapi.h # src/api/baseapi.cpp # src/ccmain/control.cpp # src/ccmain/equationdetect.cpp # src/ccmain/tesseractclass.cpp # src/ccmain/thresholder.cpp # src/ccmain/thresholder.h --- include/tesseract/baseapi.h | 120 ++++++++++++++++++++++++++-------- src/api/baseapi.cpp | 6 +- src/api/hocrrenderer.cpp | 4 ++ src/api/pagerenderer.cpp | 3 +- src/ccmain/control.cpp | 8 ++- src/ccmain/paramsd.cpp | 1 + src/ccmain/tessedit.cpp | 6 ++ src/ccmain/tesseractclass.cpp | 2 +- src/ccmain/tesseractclass.h | 21 +++++- src/ccstruct/pageres.h | 2 + src/ccstruct/ratngs.h | 4 ++ 11 files changed, 143 insertions(+), 34 deletions(-) diff --git a/include/tesseract/baseapi.h b/include/tesseract/baseapi.h index 9475fb27ba..6e436b6f62 100644 --- a/include/tesseract/baseapi.h +++ b/include/tesseract/baseapi.h @@ -91,46 +91,73 @@ class TESS_API TessBaseAPI { * reading a UNLV zone file, and for searchable PDF output. */ void SetInputName(const char *name); + /** * These functions are required for searchable PDF output. * We need our hands on the input file so that we can include * it in the PDF without transcoding. If that is not possible, * we need the original image. Finally, resolution metadata * is stored in the PDF so we need that as well. + * + * @{ */ + const char *GetInputName(); + // Takes ownership of the input pix. void SetInputImage(Pix *pix); + Pix *GetInputImage(); + int GetSourceYResolution(); + const char *GetDatapath(); + + /** + * @} + */ + /** Set the name of the bonus output files. Needed only for debugging. */ void SetOutputName(const char *name); /** * Set the value of an internal "parameter." + * * Supply the name of the parameter and the value as a string, just as * you would in a config file. - * Returns false if the name lookup failed. - * Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z. - * Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode. - * SetVariable may be used before Init, but settings will revert to + * E.g. `SetVariable("tessedit_char_blacklist", "xyz");` to ignore 'x', 'y' and 'z'. + * Or `SetVariable("classify_bln_numeric_mode", "1");` to set numeric-only mode. + * + * Returns false if the name lookup failed (or the set-value attempt is rejected + * for any reason). + * + * SetVariable() may be used before Init(), but settings will revert to * defaults on End(). * * Note: Must be called after Init(). Only works for non-init variables * (init variables should be passed to Init()). + * + * @{ */ bool SetVariable(const char *name, const char *value); bool SetDebugVariable(const char *name, const char *value); + /** + * @} + */ /** * Returns true if the parameter was found among Tesseract parameters. * Fills in value with the value of the parameter. + * + * @{ */ bool GetIntVariable(const char *name, int *value) const; bool GetBoolVariable(const char *name, bool *value) const; bool GetDoubleVariable(const char *name, double *value) const; + /** + * @} + */ /** * Returns the pointer to the string that represents the value of the @@ -161,28 +188,30 @@ class TESS_API TessBaseAPI { * Instances are now mostly thread-safe and totally independent, * but some global parameters remain. Basically it is safe to use multiple * TessBaseAPIs in different threads in parallel, UNLESS: - * you use SetVariable on some of the Params in classify and textord. + * you use SetVariable on some of the Params in `classify` and `textord`. * If you do, then the effect will be to change it for all your instances. * - * Start tesseract. Returns zero on success and -1 on failure. + * Starts tesseract. Returns zero on success and -1 on failure. * NOTE that the only members that may be called before Init are those * listed above here in the class definition. * * The datapath must be the name of the tessdata directory. - * The language is (usually) an ISO 639-3 string or nullptr will default to - * eng. It is entirely safe (and eventually will be efficient too) to call - * Init multiple times on the same instance to change language, or just + * The language is (usually) an ISO 639-3 string or, when empty or nullptr, will default to + * "eng". It is entirely safe (and eventually will be efficient too) to call + * Init() multiple times on the same instance to change language, or just * to reset the classifier. + * * The language may be a string of the form [~][+[~]]* indicating - * that multiple languages are to be loaded. Eg hin+eng will load Hindi and + * that multiple languages are to be loaded. E.g. "hin+eng" will load Hindi and * English. Languages may specify internally that they want to be loaded - * with one or more other languages, so the ~ sign is available to override - * that. Eg if hin were set to load eng by default, then hin+~eng would force - * loading only hin. The number of loaded languages is limited only by + * with one or more other languages, so the `~` sign is available to override + * that. E.g. if "hin" were set to load "eng" by default, then "hin+~eng" would force + * loading only "hin". The number of loaded languages is limited only by * memory, with the caveat that loading additional languages will impact * both speed and accuracy, as there is more work to do to decide on the * applicable language, and there is more chance of hallucinating incorrect * words. + * * WARNING: On changing languages, all Tesseract parameters are reset * back to their default values. (Which may vary between languages.) * If you have a rare need to set a Variable that controls @@ -193,6 +222,8 @@ class TESS_API TessBaseAPI { * * If set_only_non_debug_params is true, only params that do not contain * "debug" in the name will be set. + * + * @{ */ int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, @@ -214,6 +245,8 @@ class TESS_API TessBaseAPI { const std::vector *vars_values, bool set_only_non_debug_params, FileReader reader); + /** @} */ + /** * Returns the languages string used in the last valid initialization. * If the last initialization specified "deu+hin" then that will be @@ -255,7 +288,7 @@ class TESS_API TessBaseAPI { /** * Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK. * The mode is stored as an IntParam so it can also be modified by - * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string). + * ReadConfigFile() or SetVariable("tessedit_pageseg_mode"). */ void SetPageSegMode(PageSegMode mode); @@ -266,7 +299,7 @@ class TESS_API TessBaseAPI { * Recognize a rectangle from an image and return the result as a string. * May be called many times for a single Init. * Currently has no error checking. - * Greyscale of 8 and color of 24 or 32 bits per pixel may be given. + * Greyscale of 8 and color of 24 or 32 bits per pixel may be given (in RGB/RGBA byte layout). * Palette color images will not work properly and must be converted to * 24 bit. * Binary images of 1 bit per pixel may also be given but they must be @@ -284,7 +317,7 @@ class TESS_API TessBaseAPI { int height); /** - * Call between pages or documents etc to free up memory and forget + * Call between pages or documents, etc., to free up memory and forget * adaptive data. */ void ClearAdaptiveClassifier(); @@ -455,6 +488,8 @@ class TESS_API TessBaseAPI { /** * Methods to retrieve information after SetAndThresholdImage(), * Recognize() or TesseractRect(). (Recognize is called implicitly if needed.) + * + * @{ */ /** @@ -531,6 +566,7 @@ class TESS_API TessBaseAPI { * monitor can be used to * cancel the recognition * receive progress callbacks + * * Returned string must be freed with the delete [] operator. */ char *GetHOCRText(ETEXT_DESC *monitor, int page_number); @@ -539,6 +575,7 @@ class TESS_API TessBaseAPI { * Make a HTML-formatted string with hOCR markup from the internal * data structures. * page_number is 0-based but will appear in the output as 1-based. + * * Returned string must be freed with the delete [] operator. */ char *GetHOCRText(int page_number); @@ -546,30 +583,39 @@ class TESS_API TessBaseAPI { /** * Make an XML-formatted string with Alto markup from the internal * data structures. + * + * Returned string must be freed with the delete [] operator. */ char *GetAltoText(ETEXT_DESC *monitor, int page_number); /** * Make an XML-formatted string with Alto markup from the internal * data structures. + * + * Returned string must be freed with the delete [] operator. */ char *GetAltoText(int page_number); /** * Make an XML-formatted string with PAGE markup from the internal * data structures. + * + * Returned string must be freed with the delete [] operator. */ char *GetPAGEText(ETEXT_DESC *monitor, int page_number); /** * Make an XML-formatted string with PAGE markup from the internal * data structures. + * + * Returned string must be freed with the delete [] operator. */ char *GetPAGEText(int page_number); /** * Make a TSV-formatted string from the internal data structures. * page_number is 0-based but will appear in the output as 1-based. + * * Returned string must be freed with the delete [] operator. */ char *GetTSVText(int page_number); @@ -578,6 +624,7 @@ class TESS_API TessBaseAPI { * Make a box file for LSTM training from the internal data structures. * Constructs coordinates in the original image - not just the rectangle. * page_number is a 0-based page index that will appear in the box file. + * * Returned string must be freed with the delete [] operator. */ char *GetLSTMBoxText(int page_number); @@ -587,6 +634,7 @@ class TESS_API TessBaseAPI { * format as a box file used in training. * Constructs coordinates in the original image - not just the rectangle. * page_number is a 0-based page index that will appear in the box file. + * * Returned string must be freed with the delete [] operator. */ char *GetBoxText(int page_number); @@ -595,6 +643,7 @@ class TESS_API TessBaseAPI { * The recognized text is returned as a char* which is coded in the same * format as a WordStr box file used in training. * page_number is a 0-based page index that will appear in the box file. + * * Returned string must be freed with the delete [] operator. */ char *GetWordStrBoxText(int page_number); @@ -602,6 +651,7 @@ class TESS_API TessBaseAPI { /** * The recognized text is returned as a char* which is coded * as UNLV format Latin-1 with specific reject and suspect codes. + * * Returned string must be freed with the delete [] operator. */ char *GetUNLVText(); @@ -622,19 +672,27 @@ class TESS_API TessBaseAPI { * The recognized text is returned as a char* which is coded * as UTF8 and must be freed with the delete [] operator. * page_number is a 0-based page index that will appear in the osd file. + * + * Returned string must be freed with the delete [] operator. */ char *GetOsdText(int page_number); /** Returns the (average) confidence value between 0 and 100. */ int MeanTextConf(); + /** * Returns all word confidences (between 0 and 100) in an array, terminated - * by -1. The calling function must delete [] after use. + * by -1. + * + * The calling function must `delete []` after use. + * * The number of confidences should correspond to the number of space- * delimited words in GetUTF8Text. */ int *AllWordConfidences(); + /** @} */ + #ifndef DISABLED_LEGACY_ENGINE /** * Applies the given word to the adaptive classifier if possible. @@ -668,20 +726,23 @@ class TESS_API TessBaseAPI { /** * Clear any library-level memory caches. * There are a variety of expensive-to-load constant data structures (mostly - * language dictionaries) that are cached globally -- surviving the Init() - * and End() of individual TessBaseAPI's. This function allows the clearing + * language dictionaries) that are cached globally -- surviving the `Init()` + * and `End()` of individual TessBaseAPI's. This function allows the clearing * of these caches. **/ static void ClearPersistentCache(); /** * Check whether a word is valid according to Tesseract's language model + * * @return 0 if the word is invalid, non-zero if valid. + * * @warning temporary! This function will be removed from here and placed * in a separate API at some future time. */ int IsValidWord(const char *word) const; - // Returns true if utf8_character is defined in the UniCharset. + + /// Returns true if utf8_character is defined in the UniCharset. bool IsValidCharacter(const char *utf8_character) const; bool GetTextDirection(int *out_offset, float *out_slope); @@ -744,12 +805,13 @@ class TESS_API TessBaseAPI { */ int FindLines(); - /** Delete the pageres and block list ready for a new page. */ + /** Delete the PageRes and block list, readying tesseract for OCRing a new page. */ void ClearResults(); /** * Return an LTR Result Iterator -- used only for training, as we really want * to ignore all BiDi smarts at that point. + * * delete once you're done with it. */ LTRResultIterator *GetLTRIterator(); @@ -770,11 +832,11 @@ class TESS_API TessBaseAPI { } protected: - Tesseract *tesseract_; ///< The underlying data object. - Tesseract *osd_tesseract_; ///< For orientation & script detection. - EquationDetect *equ_detect_; ///< The equation detector. - FileReader reader_; ///< Reads files from any filesystem. - ImageThresholder *thresholder_; ///< Image thresholding module. + Tesseract *tesseract_; ///< The underlying data object. + Tesseract *osd_tesseract_; ///< For orientation & script detection. + EquationDetect *equ_detect_; ///< The equation detector. + FileReader reader_; ///< Reads files from any filesystem. + ImageThresholder *thresholder_; ///< Image thresholding module. std::vector *paragraph_models_; BLOCK_LIST *block_list_; ///< The page layout. PAGE_RES *page_res_; ///< The page-level data. @@ -800,11 +862,17 @@ class TESS_API TessBaseAPI { private: // A list of image filenames gets special consideration + // + // If global parameter `tessedit_page_number` is non-negative, will only process that + // single page. Works for multi-page tiff file, or filelist. bool ProcessPagesFileList(FILE *fp, std::string *buf, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer, int tessedit_page_number); // TIFF supports multipage so gets special consideration. + // + // If global parameter `tessedit_page_number` is non-negative, will only process that + // single page. Works for multi-page tiff file, or filelist. bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size, const char *filename, const char *retry_config, int timeout_millisec, diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 8c6b96c400..90c6847d4a 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -340,6 +340,7 @@ void TessBaseAPI::PrintVariables(FILE *fp) const { * The language is (usually) an ISO 639-3 string or nullptr will default to eng. * If numeric_mode is true, then only digits and Roman numerals will * be returned. + * * @return: 0 on success and -1 on initialization failure. */ int TessBaseAPI::Init(const char *datapath, const char *language, OcrEngineMode oem, char **configs, @@ -915,11 +916,14 @@ int TessBaseAPI::GetSourceYResolution() { return thresholder_->GetSourceYResolution(); } -// If flist exists, get data from there. Otherwise get data from buf. +// If `flist` exists, get data from there. Otherwise get data from `buf`. // Seems convoluted, but is the easiest way I know of to meet multiple // goals. Support streaming from stdin, and also work on platforms // lacking fmemopen. // TODO: check different logic for flist/buf and simplify. +// +// If `tessedit_page_number` is non-negative, will only process that +// single page. Works for multi-page tiff file as well as or filelist. bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer, int tessedit_page_number) { diff --git a/src/api/hocrrenderer.cpp b/src/api/hocrrenderer.cpp index 08d9b6d331..cd9bdd0557 100644 --- a/src/api/hocrrenderer.cpp +++ b/src/api/hocrrenderer.cpp @@ -498,6 +498,10 @@ TessHOcrRenderer::TessHOcrRenderer(const char *outputbase, bool font_info) } bool TessHOcrRenderer::BeginDocumentHandler() { + // This code ensures that Tesseract's hOCR output conforms to XHTML standards. + // It includes text direction and baseline information to facilitate correct rendering in Chrome. + //SetContentType("application/xhtml+xml"); + AppendString( "\n" "WordDirection()) { diff --git a/src/ccmain/control.cpp b/src/ccmain/control.cpp index 573524e1d5..60651b7fdc 100644 --- a/src/ccmain/control.cpp +++ b/src/ccmain/control.cpp @@ -1307,9 +1307,11 @@ float Tesseract::ClassifyBlobAsWord(int pass_n, PAGE_RES_IT *pr_it, C_BLOB *blob // word_data holds the word to be recognized, and its block and row, and // pr_it points to the word as well, in case we are running LSTM and it wants // to output multiple words. -// Recognizes in the current language, and if successful that is all. +// Recognizes in the current language, and if successful (a.k.a. accepted) that is all. // If recognition was not successful, tries all available languages until -// it gets a successful result or runs out of languages. Keeps the best result. +// it gets a successful result or runs out of languages. Keeps the best result, +// where "best" is defined as: the first language that producs an *acceptable* result +// (as determined by Dict::AcceptableResult() et al). void Tesseract::classify_word_and_language(int pass_n, PAGE_RES_IT *pr_it, WordData *word_data) { #ifdef DISABLED_LEGACY_ENGINE WordRecognizer recognizer = &Tesseract::classify_word_pass1; @@ -1880,7 +1882,7 @@ bool Tesseract::check_debug_pt(WERD_RES *word, int location) { } else { tprintf("null best choice\n"); } - tprintf("Tess Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE"); + tprintf("Word Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE"); tprintf("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE"); return true; } else { diff --git a/src/ccmain/paramsd.cpp b/src/ccmain/paramsd.cpp index 0cc18e9cf9..398e34b1bd 100644 --- a/src/ccmain/paramsd.cpp +++ b/src/ccmain/paramsd.cpp @@ -205,6 +205,7 @@ SVMenuNode *ParamsEditor::BuildListOfAllLeaves(tesseract::Tesseract *tess) { ParamContent_LIST vclist; ParamContent_IT vc_it(&vclist); // Amount counts the number of entries for a specific char*. + // // TODO(rays) get rid of the use of std::map. std::map amount; diff --git a/src/ccmain/tessedit.cpp b/src/ccmain/tessedit.cpp index 5a7601ad9f..fc6e3887c1 100644 --- a/src/ccmain/tessedit.cpp +++ b/src/ccmain/tessedit.cpp @@ -397,17 +397,23 @@ int Tesseract::init_tesseract(const std::string &arg0, const std::string &textba } // Common initialization for a single language. +// // arg0 is the datapath for the tessdata directory, which could be the // path of the tessdata directory with no trailing /, or (if tessdata // lives in the same directory as the executable, the path of the executable, // hence the name arg0. +// // textbase is an optional output file basename (used only for training) +// // language is the language code to load. +// // oem controls which engine(s) will operate on the image +// // configs (argv) is an array of config filenames to load variables from. // May be nullptr. // configs_size (argc) is the number of elements in configs. // vars_vec is an optional vector of variables to set. +// // vars_values is an optional corresponding vector of values for the variables // in vars_vec. // If set_only_non_debug_params is true, only params that do not contain diff --git a/src/ccmain/tesseractclass.cpp b/src/ccmain/tesseractclass.cpp index 3f63ea01cd..871958fff5 100644 --- a/src/ccmain/tesseractclass.cpp +++ b/src/ccmain/tesseractclass.cpp @@ -422,7 +422,7 @@ Tesseract::Tesseract() , double_MEMBER(textord_tabfind_aligned_gap_fraction, 0.75, "Fraction of height used as a minimum gap for aligned blobs.", this->params()) , INT_MEMBER(tessedit_parallelize, 0, "Run in parallel where possible", this->params()) - , BOOL_MEMBER(preserve_interword_spaces, false, "Preserve multiple interword spaces", + , BOOL_MEMBER(preserve_interword_spaces, false, "When `true`: preserve multiple inter-word spaces as-is, or when `false`: compress multiple inter-word spaces to a single space character.", this->params()) , STRING_MEMBER(page_separator, "\f", "Page separator (default is form feed control character)", this->params()) diff --git a/src/ccmain/tesseractclass.h b/src/ccmain/tesseractclass.h index 42f8febcd2..166d01a674 100644 --- a/src/ccmain/tesseractclass.h +++ b/src/ccmain/tesseractclass.h @@ -181,7 +181,7 @@ class TESS_API Tesseract : public Wordrec { ~Tesseract() override; // Return appropriate dictionary - Dict &getDict() override; + virtual Dict &getDict() override; // Clear as much used memory as possible without resetting the adaptive // classifier or losing any other classifier data. @@ -200,9 +200,11 @@ class TESS_API Tesseract : public Wordrec { const FCOORD &reskew() const { return reskew_; } + float gradient() const { return gradient_; } + // Destroy any existing pix and return a pointer to the pointer. Image *mutable_pix_binary() { pix_binary_.destroy(); @@ -435,6 +437,16 @@ class TESS_API Tesseract : public Wordrec { // best raw choice, and undoing all the work done to fake out the word. float ClassifyBlobAsWord(int pass_n, PAGE_RES_IT *pr_it, C_BLOB *blob, std::string &best_str, float *c2); + // Generic function for classifying a word. Can be used either for pass1 or + // pass2 according to the function passed to recognizer. + // word_data holds the word to be recognized, and its block and row, and + // pr_it points to the word as well, in case we are running LSTM and it wants + // to output multiple words. + // Recognizes in the current language, and if successful (a.k.a. accepted) that is all. + // If recognition was not successful, tries all available languages until + // it gets a successful result or runs out of languages. Keeps the best result, + // where "best" is defined as: the first language that producs an *acceptable* result + // (as determined by Dict::AcceptableResult() et al). void classify_word_and_language(int pass_n, PAGE_RES_IT *pr_it, WordData *word_data); void classify_word_pass1(const WordData &word_data, WERD_RES **in_word, PointerVector *out_words); @@ -504,18 +516,25 @@ class TESS_API Tesseract : public Wordrec { TessdataManager mgr; return init_tesseract(datapath, {}, language, oem, nullptr, 0, nullptr, nullptr, false, &mgr); } + // Common initialization for a single language. + // // arg0 is the datapath for the tessdata directory, which could be the // path of the tessdata directory with no trailing /, or (if tessdata // lives in the same directory as the executable, the path of the executable, // hence the name arg0. + // // textbase is an optional output file basename (used only for training) + // // language is the language code to load. + // // oem controls which engine(s) will operate on the image + // // configs (argv) is an array of config filenames to load variables from. // May be nullptr. // configs_size (argc) is the number of elements in configs. // vars_vec is an optional vector of variables to set. + // // vars_values is an optional corresponding vector of values for the variables // in vars_vec. // If set_only_non_debug_params is true, only params that do not contain diff --git a/src/ccstruct/pageres.h b/src/ccstruct/pageres.h index 7e631a9bb2..9901f66d1e 100644 --- a/src/ccstruct/pageres.h +++ b/src/ccstruct/pageres.h @@ -461,9 +461,11 @@ class TESS_API WERD_RES : public ELIST_LINK { // those languages that are using CJK pitch model and thus it has to // be true if and only if tesseract->textord_use_cjk_fp_model is // true. + // // If allow_detailed_fx is true, the feature extractor will receive fine // precision outline information, allowing smoother features and better // features on low resolution images. + // // The norm_mode sets the default mode for normalization in absence // of any of the above flags. It should really be a tesseract::OcrEngineMode // but is declared as int for ease of use with tessedit_ocr_engine_mode. diff --git a/src/ccstruct/ratngs.h b/src/ccstruct/ratngs.h index 1389d4a5bf..acb5f8e7c6 100644 --- a/src/ccstruct/ratngs.h +++ b/src/ccstruct/ratngs.h @@ -177,11 +177,13 @@ class BLOB_CHOICE : public ELIST_LINK { static_cast(max_xheight_), unichar_id_, (unicharset == nullptr) ? "" : unicharset->debug_str(unichar_id_).c_str()); } + void print_full() const { print(nullptr); tprintf(" script=%d, font1=%d, font2=%d, yshift=%g, classifier=%d\n", script_id_, fontinfo_id_, fontinfo_id2_, static_cast(yshift_), classifier_); } + // Sort function for sorting BLOB_CHOICEs in increasing order of rating. static int SortByRating(const void *p1, const void *p2) { const BLOB_CHOICE *bc1 = *static_cast(p1); @@ -577,6 +579,7 @@ class TESS_API WERD_CHOICE : public ELIST_LINK { private: const UNICHARSET *unicharset_; // TODO(rays) Perhaps replace the multiple arrays with an array of structs? + // // unichar_ids_ is an array of classifier "results" that make up a word. // For each unichar_ids_[i], script_pos_[i] has the sub/super/normal position // of each unichar_id. @@ -584,6 +587,7 @@ class TESS_API WERD_CHOICE : public ELIST_LINK { // were put together to make the classification results in the ith position // in unichar_ids_, and certainties_[i] is the certainty of the choice that // was used in this word. + // // == Change from before == // Previously there was fragment_lengths_ that allowed a word to be // artificially composed of multiple fragment results. Since the new From b0a563416a18ae6e13ec7d480ed31db9594e087d Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Mon, 15 Jul 2024 16:09:07 +0200 Subject: [PATCH 02/66] correcting comment formatting --- src/ccutil/elst2.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/ccutil/elst2.h b/src/ccutil/elst2.h index 6935403475..c75105f4c9 100644 --- a/src/ccutil/elst2.h +++ b/src/ccutil/elst2.h @@ -44,13 +44,13 @@ b) Most of the code is inline so: i) The duplication in source does not affect the run time code size - the code is copied inline anyway! - ii) The compiler should have a bit less work to do! +ii) The compiler should have a bit less work to do! **********************************************************************/ /********************************************************************** * CLASS - ELIST2_LINK * - * Generic link class for doubly linked lists with embedded links + * Generic link class for doubly linked lists with embedded links * * Note: No destructor - elements are assumed to be destroyed EITHER after * they have been extracted from a list OR by the ELIST2 destructor which @@ -785,8 +785,7 @@ inline void ELIST2_ITERATOR::sort( // sort elements * Add a new element to the end of the list without moving the iterator. * This is provided because a single linked list cannot move to the last as * the iterator couldn't set its prev pointer. Adding to the end is - * essential for implementing - queues. + * essential for implementing queues. **********************************************************************/ inline void ELIST2_ITERATOR::add_to_end( // element to add From eea578a19534dcbf265f01f3f1284254183b563a Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Mon, 15 Jul 2024 16:31:16 +0200 Subject: [PATCH 03/66] improved parameter description # Conflicts: # src/ccmain/tesseractclass.cpp --- src/ccmain/tesseractclass.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ccmain/tesseractclass.cpp b/src/ccmain/tesseractclass.cpp index 871958fff5..a16c9bae3e 100644 --- a/src/ccmain/tesseractclass.cpp +++ b/src/ccmain/tesseractclass.cpp @@ -117,9 +117,9 @@ Tesseract::Tesseract() "For standard Otsu use 0.0, otherwise 0.1 is recommended", this->params()) , INT_INIT_MEMBER(tessedit_ocr_engine_mode, tesseract::OEM_DEFAULT, - "Which OCR engine(s) to run (Tesseract, LSTM, both)." - " Defaults to loading and running the most accurate" - " available.", + "Which OCR engine(s) to run (0: Tesseract, 1: LSTM, 2: both, 3: default). " + "Defaults to loading and running the most accurate " + "available.", this->params()) , STRING_MEMBER(tessedit_char_blacklist, "", "Blacklist of chars not to recognize", this->params()) From e5994d27da3e8c49bbd62ded6a5bdf53f34eae9c Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Mon, 15 Jul 2024 23:54:19 +0200 Subject: [PATCH 04/66] fix tesseract error report: ERROR: Illegal ambiguity specification on line 7181: expected 3 fields but got 0; line: `` [] # Conflicts: # src/ccutil/ambigs.cpp --- src/ccutil/ambigs.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/ccutil/ambigs.cpp b/src/ccutil/ambigs.cpp index 34d68968f8..e8ffc71b12 100644 --- a/src/ccutil/ambigs.cpp +++ b/src/ccutil/ambigs.cpp @@ -103,6 +103,9 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET &encoder_set, TFile *ambi tprintf("read line %s\n", buffer); } ++line_num; + // empty line? skip. + if (!*buffer) + continue; if (!ParseAmbiguityLine(line_num, version, debug_level, encoder_set, buffer, &test_ambig_part_size, test_unichar_ids, &replacement_ambig_part_size, replacement_string, &type)) { From 46a51a61260501a17b30fc14aa57d28e28b94c9f Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Tue, 16 Jul 2024 21:08:52 +0200 Subject: [PATCH 05/66] fix: Tesseract::CountMisfitTops(): do not clip the top to max(255) (while we did not clip to min(0)!) while we use that top value to sanity-check the box for inclusion in the xheight recalc. Also make the debug output more legible for non-experts: we're not in VT100 restricted-view county any more so there's no need for cryptic one-char descriptors in diagnostics texts. # Conflicts: # src/ccmain/fixxht.cpp --- src/ccmain/fixxht.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/ccmain/fixxht.cpp b/src/ccmain/fixxht.cpp index 9253673d32..501cb20a61 100644 --- a/src/ccmain/fixxht.cpp +++ b/src/ccmain/fixxht.cpp @@ -37,7 +37,7 @@ namespace tesseract { // 2. All xheight lines, such as summer. Here the initial estimate will have // guessed that the blob tops are caps and will have placed the xheight too low. // 3. Noise/logos beside words, or changes in font size on a line. Such -// things can blow the statistics and cause an incorrect estimate. +// things can blow up the statistics and cause an incorrect estimate. // 4. Incorrect baseline. Can happen when 2 columns are incorrectly merged. // In this case the x-height is often still correct. // @@ -76,10 +76,13 @@ int Tesseract::CountMisfitTops(WERD_RES *word_res) { TBLOB *blob = word_res->rebuild_word->blobs[blob_id]; UNICHAR_ID class_id = word_res->best_choice->unichar_id(blob_id); if (unicharset.get_isalpha(class_id) || unicharset.get_isdigit(class_id)) { - int top = blob->bounding_box().top(); + TBOX bbox = blob->bounding_box(); + auto top = bbox.top(); +#if 0 if (top >= INT_FEAT_RANGE) { top = INT_FEAT_RANGE - 1; } +#endif int min_bottom, max_bottom, min_top, max_top; unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom, &min_top, &max_top); if (max_top - min_top > kMaxCharTopRange) { @@ -91,9 +94,9 @@ int Tesseract::CountMisfitTops(WERD_RES *word_res) { ++bad_blobs; } if (debug_x_ht_level >= 1) { - tprintf("Class %s is %s with top %d vs limits of %d->%d, +/-%d\n", + tprintf("Class %s is %s with top %d vs limits of %d->%d, +/-%d (bbox: %s)\n", unicharset.id_to_unichar(class_id), bad ? "Misfit" : "OK", top, min_top, max_top, - static_cast(x_ht_acceptance_tolerance)); + static_cast(x_ht_acceptance_tolerance), bbox.print_to_str().c_str()); } } } From ffb271218eae53dbcd96e18c60068a534fe4190e Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Tue, 16 Jul 2024 21:17:01 +0200 Subject: [PATCH 06/66] consistency in coding: introducing UNICHARSET::set_enabled() which does the same for the `enabled` member flag as the other set_xyz() member functions do for their repective member flags already. # Conflicts: # src/ccutil/unicharset.cpp --- src/ccutil/unicharset.cpp | 5 +++-- src/ccutil/unicharset.h | 8 +++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/ccutil/unicharset.cpp b/src/ccutil/unicharset.cpp index c366ac37f7..13eecc1129 100644 --- a/src/ccutil/unicharset.cpp +++ b/src/ccutil/unicharset.cpp @@ -45,6 +45,7 @@ static const int ISDIGIT_MASK = 0x8; static const int ISPUNCTUATION_MASK = 0x10; // Y coordinate threshold for determining cap-height vs x-height. +// // TODO(rays) Bring the global definition down to the ccutil library level, // so this constant is relative to some other constants. static const int kMeanlineThreshold = 220; @@ -901,7 +902,7 @@ bool UNICHARSET::load_via_fgets( this->set_ispunctuation(id, properties & ISPUNCTUATION_MASK); this->set_isngram(id, false); this->set_script(id, script); - this->unichars[id].properties.enabled = true; + this->set_enabled(id, true); this->set_top_bottom(id, min_bottom, max_bottom, min_top, max_top); this->set_width_stats(id, width, width_sd); this->set_bearing_stats(id, bearing, bearing_sd); @@ -1013,7 +1014,7 @@ bool UNICHARSET::major_right_to_left() const { void UNICHARSET::set_black_and_whitelist(const char *blacklist, const char *whitelist, const char *unblacklist) { - bool def_enabled = whitelist == nullptr || whitelist[0] == '\0'; + bool def_enabled = (whitelist == nullptr || whitelist[0] == '\0'); // Set everything to default for (auto &uc : unichars) { uc.properties.enabled = def_enabled; diff --git a/src/ccutil/unicharset.h b/src/ccutil/unicharset.h index 5cafb46930..88fe7440be 100644 --- a/src/ccutil/unicharset.h +++ b/src/ccutil/unicharset.h @@ -605,7 +605,7 @@ class TESS_API UNICHARSET { int *min_top, int *max_top) const { if (INVALID_UNICHAR_ID == unichar_id) { *min_bottom = *min_top = 0; - *max_bottom = *max_top = 256; // kBlnCellHeight + *max_bottom = *max_top = UINT8_MAX; // kBlnCellHeight return; } ASSERT_HOST(contains_unichar_id(unichar_id)); @@ -928,6 +928,12 @@ class TESS_API UNICHARSET { // The script parameter is copied and thus can be a temporary. int add_script(const char *script); + // Set the enabled property of the given unichar to the given value. + void set_enabled(UNICHAR_ID unichar_id, bool value) { + ASSERT_HOST(contains_unichar_id(unichar_id)); + unichars[unichar_id].properties.enabled = value; + } + // Return the enabled property of the given unichar. bool get_enabled(UNICHAR_ID unichar_id) const { ASSERT_HOST(contains_unichar_id(unichar_id)); From fb2f0dc6a4511a0d7a50c7121491a99b57df939c Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Wed, 17 Jul 2024 00:11:15 +0200 Subject: [PATCH 07/66] ensuring all class members are initialized by default. --- include/tesseract/osdetect.h | 1 + src/wordrec/lm_state.h | 40 ++++++++++++++++++------------------ 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/include/tesseract/osdetect.h b/include/tesseract/osdetect.h index 34bfb557d9..d306200e36 100644 --- a/include/tesseract/osdetect.h +++ b/include/tesseract/osdetect.h @@ -38,6 +38,7 @@ const int kMaxNumberOfScripts = 116 + 1 + 2 + 1; struct OSBestResult { OSBestResult() : orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {} + int orientation_id; int script_id; float sconfidence; diff --git a/src/wordrec/lm_state.h b/src/wordrec/lm_state.h index f03ef6269f..0f804aead7 100644 --- a/src/wordrec/lm_state.h +++ b/src/wordrec/lm_state.h @@ -75,16 +75,16 @@ struct LanguageModelNgramInfo { std::string context; ///< context string /// Length of the context measured by advancing using UNICHAR::utf8_step() /// (should be at most the order of the character ngram model used). - int context_unichar_step_len; + int context_unichar_step_len = 0; /// The paths with pruned set are pruned out from the perspective of the /// character ngram model. They are explored further because they represent /// a dictionary match or a top choice. Thus ngram_info is still computed /// for them in order to calculate the combined cost. - bool pruned; + bool pruned = false; /// -ln(P_ngram_model(path)) - float ngram_cost; + float ngram_cost = 0.0; /// -[ ln(P_classifier(path)) + scale_factor * ln(P_ngram_model(path)) ] - float ngram_and_classifier_cost; + float ngram_and_classifier_cost = 0.0; }; /// Struct for storing the information about a path in the segmentation graph @@ -159,43 +159,43 @@ struct ViterbiStateEntry : public ELIST_LINK { void Print(const char *msg) const; /// Pointers to BLOB_CHOICE and parent ViterbiStateEntry (not owned by this). - BLOB_CHOICE *curr_b; - ViterbiStateEntry *parent_vse; + BLOB_CHOICE *curr_b = nullptr; + ViterbiStateEntry *parent_vse = nullptr; /// Pointer to a case-competing ViterbiStateEntry in the same list that /// represents a path ending in the same letter of the opposite case. - ViterbiStateEntry *competing_vse; + ViterbiStateEntry *competing_vse = nullptr; /// Extra information maintained by Dawg language model component /// (owned by ViterbiStateEntry). - LanguageModelDawgInfo *dawg_info; + LanguageModelDawgInfo *dawg_info = nullptr; /// Extra information maintained by Ngram language model component /// (owned by ViterbiStateEntry). - LanguageModelNgramInfo *ngram_info; + LanguageModelNgramInfo *ngram_info = nullptr; /// UTF8 string representing the path corresponding to this vse. /// Populated only in when language_model_debug_level > 0. - std::string *debug_str; + std::string *debug_str = nullptr; /// The cost is an adjusted ratings sum, that is adjusted by all the language /// model components that use Viterbi search. - float cost; + float cost = 0.0; /// Various information about the characters on the path represented /// by this ViterbiStateEntry. - float ratings_sum; ///< sum of ratings of character on the path - float min_certainty; ///< minimum certainty on the path - int adapted; ///< number of BLOB_CHOICES from adapted templates - int length; ///< number of characters on the path - float outline_length; ///< length of the outline so far + float ratings_sum = 0.0; ///< sum of ratings of character on the path + float min_certainty = 0.0; ///< minimum certainty on the path + int adapted = 0; ///< number of BLOB_CHOICES from adapted templates + int length = 0; ///< number of characters on the path + float outline_length = 0.0; ///< length of the outline so far LMConsistencyInfo consistency_info; ///< path consistency info AssociateStats associate_stats; ///< character widths/gaps/seams /// Flags for marking the entry as a top choice path with /// the smallest rating or lower/upper case letters). - LanguageModelFlagsType top_choice_flags; + LanguageModelFlagsType top_choice_flags = 0; - bool updated; ///< set to true if the entry has just been created/updated + bool updated = false; ///< set to true if the entry has just been created/updated }; ELISTIZEH(ViterbiStateEntry) @@ -237,7 +237,7 @@ struct BestChoiceBundle { } /// Flag to indicate whether anything was changed. - bool updated; + bool updated = false; /// Places to try to fix the word suggested by ambiguity checking. DANGERR fixpt; /// The beam. One LanguageModelState containing a list of ViterbiStateEntry @@ -245,7 +245,7 @@ struct BestChoiceBundle { /// somewhere in the corresponding row. std::vector beam; /// Best ViterbiStateEntry and BLOB_CHOICE. - ViterbiStateEntry *best_vse; + ViterbiStateEntry *best_vse = nullptr; }; } // namespace tesseract From afb8d154de5271d704bacf717f48ccc92a4b2957 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Thu, 18 Jul 2024 23:20:34 +0200 Subject: [PATCH 08/66] reduced debug output for lower settings -- tweaking the log output volume for our bulk tests. This cuts down some log files from 100MB+ down to 10MB+ for images with lots of noise and thus lots of bboxes to test-OCR. # Conflicts: # src/wordrec/wordclass.cpp --- src/wordrec/wordclass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wordrec/wordclass.cpp b/src/wordrec/wordclass.cpp index 2f0704adaf..001837ea14 100644 --- a/src/wordrec/wordclass.cpp +++ b/src/wordrec/wordclass.cpp @@ -64,7 +64,7 @@ BLOB_CHOICE_LIST *Wordrec::classify_blob(TBLOB *blob, const char *string, Scroll wordrec_debug_blamer); } #ifndef GRAPHICS_DISABLED - if (classify_debug_level && string) { + if (classify_debug_level > 1 && string) { print_ratings_list(string, choices, getDict().getUnicharset()); } From 87e20da735c426d7234b52c220276114b4db9f75 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Fri, 19 Jul 2024 22:29:36 +0200 Subject: [PATCH 09/66] parameter `wordrec_display_segmentations` is a boolean, not a integer. # Conflicts: # src/ccmain/tesseractclass.cpp # src/wordrec/language_model.h --- src/wordrec/language_model.cpp | 2 +- src/wordrec/language_model.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/wordrec/language_model.cpp b/src/wordrec/language_model.cpp index 5a6c4d4463..d1244182ab 100644 --- a/src/wordrec/language_model.cpp +++ b/src/wordrec/language_model.cpp @@ -110,7 +110,7 @@ LanguageModel::LanguageModel(const UnicityTable *fontinfo_table, Dict dict->getCCUtil()->params()) , double_MEMBER(language_model_penalty_increment, 0.01, "Penalty increment", dict->getCCUtil()->params()) - , INT_MEMBER(wordrec_display_segmentations, 0, "Display Segmentations (ScrollView)", + , BOOL_MEMBER(wordrec_display_segmentations, false, "Display Segmentations (ScrollView)", dict->getCCUtil()->params()) , BOOL_INIT_MEMBER(language_model_use_sigmoidal_certainty, false, "Use sigmoidal score for certainty", dict->getCCUtil()->params()) diff --git a/src/wordrec/language_model.h b/src/wordrec/language_model.h index 010cf5b4f4..f81bd6d882 100644 --- a/src/wordrec/language_model.h +++ b/src/wordrec/language_model.h @@ -311,7 +311,7 @@ class LanguageModel { double_VAR_H(language_model_penalty_font); double_VAR_H(language_model_penalty_spacing); double_VAR_H(language_model_penalty_increment); - INT_VAR_H(wordrec_display_segmentations); + BOOL_VAR_H(wordrec_display_segmentations); BOOL_VAR_H(language_model_use_sigmoidal_certainty); protected: From 6a0c06baec67ea9f5b617efb2f19f9f9cb32d129 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Fri, 19 Jul 2024 23:09:29 +0200 Subject: [PATCH 10/66] `textord_debug_block` parameter is unused. --- src/textord/tovars.cpp | 2 +- src/textord/tovars.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/textord/tovars.cpp b/src/textord/tovars.cpp index 7b5ad2f0ce..3917045b61 100644 --- a/src/textord/tovars.cpp +++ b/src/textord/tovars.cpp @@ -26,7 +26,7 @@ BOOL_VAR(textord_show_initial_words, false, "Display separate words"); BOOL_VAR(textord_blocksall_fixed, false, "Moan about prop blocks"); BOOL_VAR(textord_blocksall_prop, false, "Moan about fixed pitch blocks"); INT_VAR(textord_dotmatrix_gap, 3, "Max pixel gap for broken pixed pitch"); -INT_VAR(textord_debug_block, 0, "Block to do debug on"); +//INT_VAR(textord_debug_block, 0, "Block to do debug on"); INT_VAR(textord_pitch_range, 2, "Max range test on pitch"); double_VAR(textord_wordstats_smooth_factor, 0.05, "Smoothing gap stats"); double_VAR(textord_words_maxspace, 4.0, "Multiple of xheight"); diff --git a/src/textord/tovars.h b/src/textord/tovars.h index cf44863481..1f7c5bf402 100644 --- a/src/textord/tovars.h +++ b/src/textord/tovars.h @@ -28,7 +28,7 @@ extern BOOL_VAR_H(textord_show_initial_words); extern BOOL_VAR_H(textord_blocksall_fixed); extern BOOL_VAR_H(textord_blocksall_prop); extern INT_VAR_H(textord_dotmatrix_gap); -extern INT_VAR_H(textord_debug_block); +//extern INT_VAR_H(textord_debug_block); extern INT_VAR_H(textord_pitch_range); extern double_VAR_H(textord_wordstats_smooth_factor); extern double_VAR_H(textord_words_maxspace); From a3e4594964b098bbab754ba13231114e046b7934 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Fri, 19 Jul 2024 23:10:15 +0200 Subject: [PATCH 11/66] integer parameters: make sure all checks against them are numeric too, i.e. check for levels > 0 # Conflicts: # src/textord/colfind.cpp --- src/textord/colfind.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/textord/colfind.cpp b/src/textord/colfind.cpp index ff255d0ed5..e14a748757 100644 --- a/src/textord/colfind.cpp +++ b/src/textord/colfind.cpp @@ -433,7 +433,7 @@ int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Image scaled_color, int s SmoothPartnerRuns(); #ifndef GRAPHICS_DISABLED - if (textord_tabfind_show_partitions) { + if (textord_tabfind_show_partitions > 0) { ScrollView *window = MakeWindow(400, 300, "Partitions"); if (window != nullptr) { part_grid_.DisplayBoxes(window); From c24a03468df5fb842aac67d6a4c689f05767fa54 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Sat, 20 Jul 2024 00:15:02 +0200 Subject: [PATCH 12/66] - disable parameters which are unused - update notes for a few parameters - propagate use of the debug parameter name in the code for easier identifiable debug code right now (easier to answer the question: *which parameter(s) decide whether this particular debug statement is executed or not?*) # Conflicts: # src/ccmain/tesseractclass.cpp # src/ccmain/tesseractclass.h --- src/dict/dawg.h | 14 +++++++------- src/dict/dawg_cache.cpp | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/dict/dawg.h b/src/dict/dawg.h index 408fa2ca96..1d6de23a87 100644 --- a/src/dict/dawg.h +++ b/src/dict/dawg.h @@ -408,11 +408,11 @@ class DawgPositionVector : public std::vector { class TESS_API SquishedDawg : public Dawg { public: SquishedDawg(DawgType type, const std::string &lang, PermuterType perm, - int debug_level) - : Dawg(type, lang, perm, debug_level) {} + int dawg_debug_level) + : Dawg(type, lang, perm, dawg_debug_level) {} SquishedDawg(const char *filename, DawgType type, const std::string &lang, - PermuterType perm, int debug_level) - : Dawg(type, lang, perm, debug_level) { + PermuterType perm, int dawg_debug_level) + : Dawg(type, lang, perm, dawg_debug_level) { TFile file; ASSERT_HOST(file.Open(filename, nullptr)); ASSERT_HOST(read_squished_dawg(&file)); @@ -420,13 +420,13 @@ class TESS_API SquishedDawg : public Dawg { } SquishedDawg(EDGE_ARRAY edges, int num_edges, DawgType type, const std::string &lang, PermuterType perm, int unicharset_size, - int debug_level) - : Dawg(type, lang, perm, debug_level), + int dawg_debug_level) + : Dawg(type, lang, perm, dawg_debug_level), edges_(edges), num_edges_(num_edges) { init(unicharset_size); num_forward_edges_in_node0 = num_forward_edges(0); - if (debug_level > 3) { + if (dawg_debug_level > 3) { print_all("SquishedDawg:"); } } diff --git a/src/dict/dawg_cache.cpp b/src/dict/dawg_cache.cpp index a8671ad35d..3705e7d3e6 100644 --- a/src/dict/dawg_cache.cpp +++ b/src/dict/dawg_cache.cpp @@ -41,10 +41,10 @@ struct DawgLoader { }; Dawg *DawgCache::GetSquishedDawg(const std::string &lang, TessdataType tessdata_dawg_type, - int debug_level, TessdataManager *data_file) { + int dawg_debug_level, TessdataManager *data_file) { std::string data_id = data_file->GetDataFileName(); data_id += kTessdataFileSuffixes[tessdata_dawg_type]; - DawgLoader loader(lang, tessdata_dawg_type, debug_level, data_file); + DawgLoader loader(lang, tessdata_dawg_type, dawg_debug_level, data_file); return dawgs_.Get(data_id, std::bind(&DawgLoader::Load, &loader)); } From 7c5fd96d8a4093e348bd7bf2f6dc944be67024ab Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Sat, 20 Jul 2024 18:07:36 +0200 Subject: [PATCH 13/66] adding extra debug/diagnostics code to observe the call depth of the word splitter / recog_word logic: it seems to me this code suffers runaway when fed noisy images: call depths of over 60 (sic) have been observed and I wonder if we can improve tesseract by keeping this down to a "sane amount", if there is such a thing in OCR processing. # Conflicts: # src/ccmain/tfacepp.cpp --- src/ccmain/superscript.cpp | 6 +++--- src/ccmain/tessbox.cpp | 2 +- src/ccmain/tesseractclass.h | 6 +++--- src/ccmain/tfacepp.cpp | 28 +++++++++++++++++++++------- 4 files changed, 28 insertions(+), 14 deletions(-) diff --git a/src/ccmain/superscript.cpp b/src/ccmain/superscript.cpp index 68ff974b1d..29cc0eaa6e 100644 --- a/src/ccmain/superscript.cpp +++ b/src/ccmain/superscript.cpp @@ -406,7 +406,7 @@ WERD_RES *Tesseract::TrySuperscriptSplits(int num_chopped_leading, float leading if (superscript_debug >= 3) { tprintf(" recognizing first %d chopped blobs\n", num_chopped_leading); } - recog_word_recursive(prefix); + recog_word_recursive(prefix, 0); if (superscript_debug >= 2) { tprintf(" The leading bits look like %s %s\n", ScriptPosToString(leading_pos), prefix->best_choice->unichar_string().c_str()); @@ -430,7 +430,7 @@ WERD_RES *Tesseract::TrySuperscriptSplits(int num_chopped_leading, float leading if (superscript_debug >= 3) { tprintf(" recognizing last %d chopped blobs\n", num_chopped_trailing); } - recog_word_recursive(suffix); + recog_word_recursive(suffix, 0); if (superscript_debug >= 2) { tprintf(" The trailing bits look like %s %s\n", ScriptPosToString(trailing_pos), suffix->best_choice->unichar_string().c_str()); @@ -461,7 +461,7 @@ WERD_RES *Tesseract::TrySuperscriptSplits(int num_chopped_leading, float leading delete bb1; return nullptr; } - recog_word_recursive(core); + recog_word_recursive(core, 0); // Now paste the results together into core. if (suffix) { diff --git a/src/ccmain/tessbox.cpp b/src/ccmain/tessbox.cpp index caf664d2f2..df280c8a0a 100644 --- a/src/ccmain/tessbox.cpp +++ b/src/ccmain/tessbox.cpp @@ -44,7 +44,7 @@ void Tesseract::tess_segment_pass_n(int pass_n, WERD_RES *word) { } else { set_pass2(); } - recog_word(word); + recog_word(word, 0); if (word->best_choice == nullptr) { word->SetupFake(*word->uch_set); } diff --git a/src/ccmain/tesseractclass.h b/src/ccmain/tesseractclass.h index 166d01a674..6f3f25f48d 100644 --- a/src/ccmain/tesseractclass.h +++ b/src/ccmain/tesseractclass.h @@ -613,9 +613,9 @@ class TESS_API Tesseract : public Wordrec { WERD_RES *word, uint16_t mode); //// tfacepp.cpp /////////////////////////////////////////////////////// - void recog_word_recursive(WERD_RES *word); - void recog_word(WERD_RES *word); - void split_and_recog_word(WERD_RES *word); + void recog_word_recursive(WERD_RES *word, int call_depth); + void recog_word(WERD_RES *word, int call_depth); + void split_and_recog_word(WERD_RES *word, int call_depth); void split_word(WERD_RES *word, unsigned split_pt, WERD_RES **right_piece, BlamerBundle **orig_blamer_bundle) const; void join_words(WERD_RES *word, WERD_RES *word2, BlamerBundle *orig_bb) const; diff --git a/src/ccmain/tfacepp.cpp b/src/ccmain/tfacepp.cpp index a04c948c97..fe587bb324 100644 --- a/src/ccmain/tfacepp.cpp +++ b/src/ccmain/tfacepp.cpp @@ -34,7 +34,8 @@ * Convert the output back to editor form. **********************************************************************/ namespace tesseract { -void Tesseract::recog_word(WERD_RES *word) { + +void Tesseract::recog_word(WERD_RES *word, int call_depth) { if (wordrec_skip_no_truth_words && (word->blamer_bundle == nullptr || word->blamer_bundle->incorrect_result_reason() == IRR_NO_TRUTH)) { @@ -45,7 +46,7 @@ void Tesseract::recog_word(WERD_RES *word) { return; } ASSERT_HOST(!word->chopped_word->blobs.empty()); - recog_word_recursive(word); + recog_word_recursive(word, call_depth + 1); word->SetupBoxWord(); ASSERT_HOST(word->best_choice != nullptr); ASSERT_HOST(static_cast(word->best_choice->length()) == word->box_word->length()); @@ -92,10 +93,23 @@ void Tesseract::recog_word(WERD_RES *word) { * Convert the word to tess form and pass it to the tess segmenter. * Convert the output back to editor form. **********************************************************************/ -void Tesseract::recog_word_recursive(WERD_RES *word) { +void Tesseract::recog_word_recursive(WERD_RES *word, int call_depth) { auto word_length = word->chopped_word->NumBlobs(); // no of blobs + + { + static float depth_ema = 0.0; + if (call_depth > depth_ema) { + if (call_depth % 10 == 0) + tprintDebug("recog_word_recursive call depth: {}, peak EMA: {}, word length: \n", call_depth, depth_ema, word_length); + depth_ema = call_depth; + } else { + depth_ema *= 0.97; + depth_ema += 0.03 * call_depth; + } + } + if (word_length > MAX_UNDIVIDED_LENGTH) { - return split_and_recog_word(word); + return split_and_recog_word(word, call_depth); } cc_recog(word); word_length = word->rebuild_word->NumBlobs(); // No of blobs in output. @@ -124,7 +138,7 @@ void Tesseract::recog_word_recursive(WERD_RES *word) { * Split the word into 2 smaller pieces at the largest gap. * Recognize the pieces and stick the results back together. **********************************************************************/ -void Tesseract::split_and_recog_word(WERD_RES *word) { +void Tesseract::split_and_recog_word(WERD_RES *word, int call_depth) { // Find the biggest blob gap in the chopped_word. int bestgap = -INT32_MAX; int split_index = 0; @@ -144,9 +158,9 @@ void Tesseract::split_and_recog_word(WERD_RES *word) { split_word(word, split_index, &word2, &orig_bb); // Recognize the first part of the word. - recog_word_recursive(word); + recog_word_recursive(word, call_depth + 1); // Recognize the second part of the word. - recog_word_recursive(word2); + recog_word_recursive(word2, call_depth + 1); join_words(word, word2, orig_bb); } From 8000c92fbd681326e2b5e7969a2a6150bab32e65 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Sat, 20 Jul 2024 20:06:30 +0200 Subject: [PATCH 14/66] fix bug + tweak output for now for the new word splitter call depth monitoring code. # Conflicts: # src/wordrec/language_model.cpp --- src/ccmain/tfacepp.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/ccmain/tfacepp.cpp b/src/ccmain/tfacepp.cpp index fe587bb324..ba8bc8030d 100644 --- a/src/ccmain/tfacepp.cpp +++ b/src/ccmain/tfacepp.cpp @@ -99,9 +99,15 @@ void Tesseract::recog_word_recursive(WERD_RES *word, int call_depth) { { static float depth_ema = 0.0; if (call_depth > depth_ema) { - if (call_depth % 10 == 0) - tprintDebug("recog_word_recursive call depth: {}, peak EMA: {}, word length: \n", call_depth, depth_ema, word_length); - depth_ema = call_depth; + static int maxx = 0; + if (maxx < call_depth && call_depth > 20 && call_depth % 10 == 0) { + maxx = call_depth; + tprintDebug("recog_word_recursive call depth: {}, peak: {} +EMA: {}, word length: \n", call_depth, maxx, depth_ema, word_length); + } + if (maxx < call_depth) + maxx = call_depth; + + depth_ema = call_depth; } else { depth_ema *= 0.97; depth_ema += 0.03 * call_depth; From 210396896320082bad0c3ac374a23eb2262ec53e Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Sun, 21 Jul 2024 01:20:06 +0200 Subject: [PATCH 15/66] > ( :-) wow! no crashes in the latest (uncommitted ) edit now and all heap leaks gone as well; tested with timeout @ 10 seconds to allow debug build to collect some work done and get a real behaviour; 5 seconds for the same in release build and test image 1216) - introducing parameter `recog_word_recursion_depth_limit` to deal with the sometimes insanely deep recog_word call chains which occur for large images where a lot of content is marked as text boxes but is just a lot of noise, taking sometimes *many minutes* to complete one image OCR run. The default setting is so high it won't ever be reached, essentially making this a no-impact-by-default change, but when you set this in a config file to a value somewhere in the range 40..60 you'll start seeing effects: the 'bad' image runs will run that much faster as the deep call depth is detected and local word OCR attempts are cut short. (We have tested with a level of 42 and seen no adverse effects on 'good' images yet as those never got above depth level 30 in our test runs today. YMMV. - we use the `jpg_quality` parameter for all supported output formats, at least we already do in our debug_pixa based diangostics/debug HTML output code: 1..100% quality ~ low..high lossless compression for PNG and WEBP. - introducing further parameters to control our (sometimes costly) diagnostics/debug log HTML output: now this feature can be turned OFF via config parameter. - introducing a parameter to specify the image format type (webp, png, jpeg, ..) for the intermediate in-process images that accompany the diagnostics/debug log output and are referenced in the generated HTML equivalent output file. - parameters: + bool: debug_recog_word_recursion_depth -- Debug the word recognizer recursion depth by having peak call depths reported as they appear. + int: recog_word_recursion_depth_limit -- Restrict the word recognizer from recursing more than N levels deep. Setting this to a lower number can speed up processing of very noisy images which produce a lot of semi-random text noise as output anyway (with low OCR confidence numbers), but setting this too low can negatively impact any images with large amounts of text, so tread carefully. Empirical numbers today are: 50 and higher is image noise, 40 and lower is complex text pages. + bool: debug_output_diagnostics_HTML -- Write the debug/diagnostics output to a HTML file, including the collected images of the various process stages inside tesseract. The content is equivalent to the debug info you see on stderr, but in a nicely formatted and easier to grok modern format. Also handy for sharing your sessions' diagnostics with others. The output filename is derived from the source image name and output base path. + int: debug_output_diagnostics_images_format -- The format of the images included in the debug/diagnostics output HTML file. Specify one of the Leptonica constants: IFF_WEBP=webp, IFF_PNG=png, IFF_JFIF_JPEG=jpeg. While we support the other formats, those are ill-advised to use as web browsers won't support those other formats out of the box and choosing those formats will strongly and *negatively* impact your HTML diagnostics viewing experience. Tip: use PNG or JPEG if you want the output to be produced faster, WEBP if you want smaller image files with maximum precision. Set the jpeg_quality parameter for any of these formats for targeted compression ratio. - fixed/improved the debug/warn diagnostic in recog_word_recursive regarding our call depth ceiling check - also changed the code there to allow the monitor to kick in and abort, based on both deadline timeout and/or userland cancel signal: before, the recognizer could not be aborted and you had to wait minutes (or seconds, in case you were lucky) until it completed for overly noisy images. # Conflicts: # src/ccmain/pixProcessing.cpp # src/ccmain/pixProcessing.h # src/ccmain/tesseractclass.cpp # src/ccmain/tesseractclass.h # src/ccstruct/debugpixa.cpp --- src/ccmain/control.cpp | 8 ++ src/ccmain/tesseractclass.cpp | 9 +- src/ccmain/tesseractclass.h | 4 + src/ccmain/tfacepp.cpp | 239 ++++++++++++++++++++++------------ 4 files changed, 175 insertions(+), 85 deletions(-) diff --git a/src/ccmain/control.cpp b/src/ccmain/control.cpp index 60651b7fdc..c9f0f38ccc 100644 --- a/src/ccmain/control.cpp +++ b/src/ccmain/control.cpp @@ -229,6 +229,10 @@ bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC *monitor, PAGE_RES_IT for (; w < words->size(); ++w) { (*words)[w].word->SetupFake(unicharset); } + // flip recursion setting back to a positive limit; see tface.cpp for the sign-flipping logic inside the recursive word recognizer. + if (recog_word_recursion_depth_limit < 0) { + recog_word_recursion_depth_limit.set_value(-recog_word_recursion_depth_limit.value()); + } return false; } } @@ -264,6 +268,10 @@ bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC *monitor, PAGE_RES_IT pr_it->MakeCurrentWordFuzzy(); } } + // flip recursion setting back to a positive limit; see tface.cpp for the sign-flipping logic inside the recursive word recognizer. + if (recog_word_recursion_depth_limit < 0) { + recog_word_recursion_depth_limit.set_value(-recog_word_recursion_depth_limit.value()); + } return true; } diff --git a/src/ccmain/tesseractclass.cpp b/src/ccmain/tesseractclass.cpp index a16c9bae3e..00b42be019 100644 --- a/src/ccmain/tesseractclass.cpp +++ b/src/ccmain/tesseractclass.cpp @@ -351,7 +351,7 @@ Tesseract::Tesseract() , BOOL_MEMBER(tessedit_create_pdf, false, "Write .pdf output file", this->params()) , BOOL_MEMBER(textonly_pdf, false, "Create PDF with only one invisible text layer", this->params()) - , INT_MEMBER(jpg_quality, 85, "Set JPEG quality level", this->params()) + , INT_MEMBER(jpg_quality, 85, "Set JPEG/WEBP/PNG quality level as a 0..100% percentage.", this->params()) , INT_MEMBER(user_defined_dpi, 0, "Specify DPI for input image", this->params()) , INT_MEMBER(min_characters_to_try, 50, "Specify minimum characters to try during OSD", this->params()) @@ -447,9 +447,12 @@ Tesseract::Tesseract() this->params()) , BOOL_MEMBER(pageseg_apply_music_mask, false, "Detect music staff and remove intersecting components", this->params()) - , + , BOOL_MEMBER(debug_recog_word_recursion_depth, false, "Debug the word recognizer recursion depth by having peak call depths reported as they appear.", params()) + , INT_MEMBER(recog_word_recursion_depth_limit, 10000, "Restrict the word recognizer from recursing more than N levels deep. Setting this to a lower number can speed up processing of very noisy images which produce a lot of semi-random text noise as output anyway (with low OCR confidence numbers), but setting this too low can negatively impact any images with large amounts of text, so tread carefully. Empirical numbers today are: 50 and higher is image noise, 40 and lower is complex text pages.", params()) + , BOOL_MEMBER(debug_output_diagnostics_HTML, false, "Write the debug/diagnostics output to a HTML file, including the collected images of the various process stages inside tesseract. The content is equivalent to the debug info you see on stderr, but in a nicely formatted and easier to grok modern format. Also handy for sharing your sessions' diagnostics with others. The output filename is derived from the source image name and output base path.", params()) + , INT_MEMBER(debug_output_diagnostics_images_format, IFF_WEBP, "The format of the images included in the debug/diagnostics output HTML file. Specify one of the Leptonica constants: IFF_WEBP=webp, IFF_PNG=png, IFF_JFIF_JPEG=jpeg. While we support the other formats, those are ill-advised to use as web browsers won't support those other formats out of the box and choosing those formats will strongly and *negatively* impact your HTML diagnostics viewing experience. Tip: use PNG or JPEG if you want the output to be produced faster, WEBP if you want smaller image files with maximum precision. Set the jpeg_quality parameter for any of these formats for targeted compression ratio.", params()) - backup_config_file_(nullptr) + , backup_config_file_(nullptr) , pix_binary_(nullptr) , pix_grey_(nullptr) , pix_original_(nullptr) diff --git a/src/ccmain/tesseractclass.h b/src/ccmain/tesseractclass.h index 6f3f25f48d..23def6ad9d 100644 --- a/src/ccmain/tesseractclass.h +++ b/src/ccmain/tesseractclass.h @@ -987,6 +987,10 @@ class TESS_API Tesseract : public Wordrec { INT_VAR_H(lstm_choice_iterations); double_VAR_H(lstm_rating_coefficient); BOOL_VAR_H(pageseg_apply_music_mask); + BOOL_VAR_H(debug_recog_word_recursion_depth); + INT_VAR_H(recog_word_recursion_depth_limit); + BOOL_VAR_H(debug_output_diagnostics_HTML); + INT_VAR_H(debug_output_diagnostics_images_format); //// ambigsrecog.cpp ///////////////////////////////////////////////////////// FILE *init_recog_training(const char *filename); diff --git a/src/ccmain/tfacepp.cpp b/src/ccmain/tfacepp.cpp index ba8bc8030d..3b2bcf7730 100644 --- a/src/ccmain/tfacepp.cpp +++ b/src/ccmain/tfacepp.cpp @@ -47,6 +47,17 @@ void Tesseract::recog_word(WERD_RES *word, int call_depth) { } ASSERT_HOST(!word->chopped_word->blobs.empty()); recog_word_recursive(word, call_depth + 1); + if (word->tess_failed) { + // word is already marked and set up to fail, so we percolate down... + // ...after we duplicate the effort further below for when word is a failed one. + ASSERT_HOST((word->best_choice == nullptr) == (word->raw_choice == nullptr)); + //word->tess_failed = true; + word->ClearResults(); + // word->reject_map.initialise(word->box_word->length()); --crash + word->reject_map.initialise(word->uch_set->size()); + word->reject_map.rej_word_tess_failure(); + return; + } word->SetupBoxWord(); ASSERT_HOST(word->best_choice != nullptr); ASSERT_HOST(static_cast(word->best_choice->length()) == word->box_word->length()); @@ -80,6 +91,8 @@ void Tesseract::recog_word(WERD_RES *word, int call_depth) { strspn(word->best_choice->unichar_string().c_str(), " ") == word->best_choice->length()) { word->tess_failed = true; + ASSERT_HOST((word->best_choice == nullptr) == (word->raw_choice == nullptr)); + word->ClearResults(); word->reject_map.initialise(word->box_word->length()); word->reject_map.rej_word_tess_failure(); } else { @@ -99,24 +112,57 @@ void Tesseract::recog_word_recursive(WERD_RES *word, int call_depth) { { static float depth_ema = 0.0; if (call_depth > depth_ema) { +# if 0 static int maxx = 0; if (maxx < call_depth && call_depth > 20 && call_depth % 10 == 0) { maxx = call_depth; - tprintDebug("recog_word_recursive call depth: {}, peak: {} +EMA: {}, word length: \n", call_depth, maxx, depth_ema, word_length); - } + tprintDebug("recog_word_recursive call depth: {}, peak: {} +EMA: {}, word length: {}\n", call_depth, maxx, depth_ema, word_length); + } if (maxx < call_depth) maxx = call_depth; - - depth_ema = call_depth; +# else + if (debug_recog_word_recursion_depth && call_depth >= 10) + tprintDebug("recog_word_recursive call depth: {}, peak.EMA: {}, word length: {}\n", call_depth, depth_ema, word_length); +# endif + depth_ema = call_depth; } else { + // decay rate: slow decay, so we only catch the noteworthy peaks in the diag/log output. depth_ema *= 0.97; depth_ema += 0.03 * call_depth; } - } - if (word_length > MAX_UNDIVIDED_LENGTH) { - return split_and_recog_word(word, call_depth); + bool restrict_recursion = (call_depth >= recog_word_recursion_depth_limit); + + if (!restrict_recursion && owner_.Monitor().bump_progress().exec_progress_func().kick_watchdog_and_check_for_cancel()) { + // deadline reached: as we don't check all the way down once we get a cancel signal, dial down the call depth limit to insane low values in order to stop the word recognizer in its tracks for the remainder of the run. + // + // what we do to also keep the userland configured value is to flip its sign: that way we can flip that value back at the end of the run if its only this particular session's + // deadline that's expired, not just the entire session's -- this is us anticipating tesseract core readying for batch processing in a single session. + recog_word_recursion_depth_limit.set_value(-recog_word_recursion_depth_limit.value()); + + tprintInfo("recog_word_recursive call depth is restricted by CANCEL SIGNAL at level {} --> peak.EMA: {}, word length: {}\n", call_depth, depth_ema, word_length); + + // set word as faked/failed and call it a day. + word->SetupFake(*word->uch_set); + + return; + } + + if (!restrict_recursion && word_length > MAX_UNDIVIDED_LENGTH) { + return split_and_recog_word(word, call_depth); + } else if (restrict_recursion) { + tprintWarn("recog_word_recursive call depth is restricted by configuration parameter at level {} --> peak.EMA: {}, word length: {}\n", call_depth, depth_ema, word_length); + + // prevent long waits due to gigantic word lengths being processed by cc_recog() as if they were real... + if (word_length > MAX_UNDIVIDED_LENGTH * 2) { + // set word as faked/failed and call it a day. + word->SetupFake(*word->uch_set); + + return; + } + } } + cc_recog(word); word_length = word->rebuild_word->NumBlobs(); // No of blobs in output. @@ -235,86 +281,115 @@ void Tesseract::split_word(WERD_RES *word, unsigned split_pt, WERD_RES **right_p * Also, if orig_bb is provided, stitch it back into word. **********************************************************************/ void Tesseract::join_words(WERD_RES *word, WERD_RES *word2, BlamerBundle *orig_bb) const { - TBOX prev_box = word->chopped_word->blobs.back()->bounding_box(); - TBOX blob_box = word2->chopped_word->blobs[0]->bounding_box(); - // Tack the word2 outputs onto the end of the word outputs. - word->chopped_word->blobs.insert(word->chopped_word->blobs.end(), word2->chopped_word->blobs.begin(), word2->chopped_word->blobs.end()); - word->rebuild_word->blobs.insert(word->rebuild_word->blobs.end(), word2->rebuild_word->blobs.begin(), word2->rebuild_word->blobs.end()); - word2->chopped_word->blobs.clear(); - word2->rebuild_word->blobs.clear(); - TPOINT split_pt; - split_pt.x = (prev_box.right() + blob_box.left()) / 2; - split_pt.y = (prev_box.top() + prev_box.bottom() + blob_box.top() + blob_box.bottom()) / 4; - // Move the word2 seams onto the end of the word1 seam_array. - // Since the seam list is one element short, an empty seam marking the - // end of the last blob in the first word is needed first. - word->seam_array.push_back(new SEAM(0.0f, split_pt)); - word->seam_array.insert(word->seam_array.end(), word2->seam_array.begin(), word2->seam_array.end()); - word2->seam_array.clear(); - // Fix widths and gaps. - word->blob_widths.insert(word->blob_widths.end(), word2->blob_widths.begin(), word2->blob_widths.end()); - word->blob_gaps.insert(word->blob_gaps.end(), word2->blob_gaps.begin(), word2->blob_gaps.end()); - // Fix the ratings matrix. - int rat1 = word->ratings->dimension(); - int rat2 = word2->ratings->dimension(); - word->ratings->AttachOnCorner(word2->ratings); - ASSERT_HOST(word->ratings->dimension() == rat1 + rat2); - word->best_state.insert(word->best_state.end(), word2->best_state.begin(), word2->best_state.end()); - // Append the word choices. - *word->raw_choice += *word2->raw_choice; - - // How many alt choices from each should we try to get? - const int kAltsPerPiece = 2; - // When do we start throwing away extra alt choices? - const int kTooManyAltChoices = 100; - - // Construct the cartesian product of the best_choices of word(1) and word2. - WERD_CHOICE_LIST joined_choices; - WERD_CHOICE_IT jc_it(&joined_choices); - WERD_CHOICE_IT bc1_it(&word->best_choices); - WERD_CHOICE_IT bc2_it(&word2->best_choices); - int num_word1_choices = word->best_choices.length(); - int total_joined_choices = num_word1_choices; - // Nota Bene: For the main loop here, we operate only on the 2nd and greater - // word2 choices, and put them in the joined_choices list. The 1st word2 - // choice gets added to the original word1 choices in-place after we have - // finished with them. - int bc2_index = 1; - for (bc2_it.forward(); !bc2_it.at_first(); bc2_it.forward(), ++bc2_index) { - if (total_joined_choices >= kTooManyAltChoices && bc2_index > kAltsPerPiece) { - break; - } - int bc1_index = 0; - for (bc1_it.move_to_first(); bc1_index < num_word1_choices; ++bc1_index, bc1_it.forward()) { - if (total_joined_choices >= kTooManyAltChoices && bc1_index > kAltsPerPiece) { + // due to a timeout or cancel we may end up in here with words marked failed: make sure we don't b0rk on those. + if (!word->tess_failed && !word2->tess_failed) { + TBOX prev_box = word->chopped_word->blobs.back()->bounding_box(); + TBOX blob_box = word2->chopped_word->blobs[0]->bounding_box(); + // Tack the word2 outputs onto the end of the word outputs. + word->chopped_word->blobs.insert(word->chopped_word->blobs.end(), word2->chopped_word->blobs.begin(), word2->chopped_word->blobs.end()); + word->rebuild_word->blobs.insert(word->rebuild_word->blobs.end(), word2->rebuild_word->blobs.begin(), word2->rebuild_word->blobs.end()); + word2->chopped_word->blobs.clear(); + word2->rebuild_word->blobs.clear(); + TPOINT split_pt; + split_pt.x = (prev_box.right() + blob_box.left()) / 2; + split_pt.y = (prev_box.top() + prev_box.bottom() + blob_box.top() + blob_box.bottom()) / 4; + // Move the word2 seams onto the end of the word1 seam_array. + // Since the seam list is one element short, an empty seam marking the + // end of the last blob in the first word is needed first. + word->seam_array.push_back(new SEAM(0.0f, split_pt)); + word->seam_array.insert(word->seam_array.end(), word2->seam_array.begin(), word2->seam_array.end()); + word2->seam_array.clear(); + // Fix widths and gaps. + word->blob_widths.insert(word->blob_widths.end(), word2->blob_widths.begin(), word2->blob_widths.end()); + word->blob_gaps.insert(word->blob_gaps.end(), word2->blob_gaps.begin(), word2->blob_gaps.end()); + // Fix the ratings matrix. + int rat1 = word->ratings->dimension(); + int rat2 = word2->ratings->dimension(); + word->ratings->AttachOnCorner(word2->ratings); + ASSERT_HOST(word->ratings->dimension() == rat1 + rat2); + word->best_state.insert(word->best_state.end(), word2->best_state.begin(), word2->best_state.end()); + // Append the word choices. + *word->raw_choice += *word2->raw_choice; + + // How many alt choices from each should we try to get? + const int kAltsPerPiece = 2; + // When do we start throwing away extra alt choices? + const int kTooManyAltChoices = 100; + + // Construct the cartesian product of the best_choices of word(1) and word2. + WERD_CHOICE_LIST joined_choices; + WERD_CHOICE_IT jc_it(&joined_choices); + WERD_CHOICE_IT bc1_it(&word->best_choices); + WERD_CHOICE_IT bc2_it(&word2->best_choices); + int num_word1_choices = word->best_choices.length(); + int total_joined_choices = num_word1_choices; + // Nota Bene: For the main loop here, we operate only on the 2nd and greater + // word2 choices, and put them in the joined_choices list. The 1st word2 + // choice gets added to the original word1 choices in-place after we have + // finished with them. + int bc2_index = 1; + for (bc2_it.forward(); !bc2_it.at_first(); bc2_it.forward(), ++bc2_index) { + if (total_joined_choices >= kTooManyAltChoices && bc2_index > kAltsPerPiece) { break; } - auto *wc = new WERD_CHOICE(*bc1_it.data()); - *wc += *bc2_it.data(); - jc_it.add_after_then_move(wc); - ++total_joined_choices; + int bc1_index = 0; + for (bc1_it.move_to_first(); bc1_index < num_word1_choices; ++bc1_index, bc1_it.forward()) { + if (total_joined_choices >= kTooManyAltChoices && bc1_index > kAltsPerPiece) { + break; + } + auto *wc = new WERD_CHOICE(*bc1_it.data()); + *wc += *bc2_it.data(); + jc_it.add_after_then_move(wc); + ++total_joined_choices; + } } - } - // Now that we've filled in as many alternates as we want, paste the best - // choice for word2 onto the original word alt_choices. - bc1_it.move_to_first(); - bc2_it.move_to_first(); - for (bc1_it.mark_cycle_pt(); !bc1_it.cycled_list(); bc1_it.forward()) { - *bc1_it.data() += *bc2_it.data(); - } - bc1_it.move_to_last(); - bc1_it.add_list_after(&joined_choices); + // Now that we've filled in as many alternates as we want, paste the best + // choice for word2 onto the original word alt_choices. + bc1_it.move_to_first(); + bc2_it.move_to_first(); + for (bc1_it.mark_cycle_pt(); !bc1_it.cycled_list(); bc1_it.forward()) { + *bc1_it.data() += *bc2_it.data(); + } + bc1_it.move_to_last(); + bc1_it.add_list_after(&joined_choices); - // Restore the pointer to original blamer bundle and combine blamer - // information recorded in the splits. - if (orig_bb != nullptr) { - orig_bb->JoinBlames(*word->blamer_bundle, *word2->blamer_bundle, wordrec_debug_blamer); - delete word->blamer_bundle; - word->blamer_bundle = orig_bb; + // Restore the pointer to original blamer bundle and combine blamer + // information recorded in the splits. + if (orig_bb != nullptr) { + orig_bb->JoinBlames(*word->blamer_bundle, *word2->blamer_bundle, wordrec_debug_blamer); + delete word->blamer_bundle; + word->blamer_bundle = orig_bb; + } + word->SetupBoxWord(); + word->reject_map.initialise(word->box_word->length()); + delete word2; + } else { + // [GerH] to propagate or not to propagate, that is the question here. + // We choose to propagate as we tried the other approach and had a spot of trouble and doubtful of our capabilities to deal, + // so we push the failed state into `word` and then pass it on, letting the invokers deal with the aftermath. + // + // Cleanup and mark `word` failed (if it isn't already), propagating the failed state down the call chain. +// word2->chopped_word->blobs.clear(); +// word2->rebuild_word->blobs.clear(); + + // ripped from elsewhere in the codebase: saw this waiting for us way up the call chain so I assume we'll survive, when cloning that. + // +#if 01 + //if (word->best_choice == nullptr || word->best_choice->empty() || + // strspn(word->best_choice->unichar_string().c_str(), " ") == + // word->best_choice->length()) { + word->tess_failed = true; + ASSERT_HOST((word->best_choice == nullptr) == (word->raw_choice == nullptr)); + word->ClearResults(); + word->reject_map.initialise(word->uch_set->size()); + word->reject_map.rej_word_tess_failure(); +#else + // set word as faked/failed and call it a day. + word->SetupFake(*word->uch_set); +#endif + + delete word2; } - word->SetupBoxWord(); - word->reject_map.initialise(word->box_word->length()); - delete word2; } } // namespace tesseract From 03c1333c6e3c0bc93e4447955da4ff3a8d860592 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Sun, 21 Jul 2024 01:33:09 +0200 Subject: [PATCH 16/66] cleaning up the debug/test code from the previous patch, introducing the parameters `debug_recog_word_recursion_depth` + `recog_word_recursion_depth_limit` --- src/ccmain/tfacepp.cpp | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/src/ccmain/tfacepp.cpp b/src/ccmain/tfacepp.cpp index 3b2bcf7730..6843825d0d 100644 --- a/src/ccmain/tfacepp.cpp +++ b/src/ccmain/tfacepp.cpp @@ -53,7 +53,6 @@ void Tesseract::recog_word(WERD_RES *word, int call_depth) { ASSERT_HOST((word->best_choice == nullptr) == (word->raw_choice == nullptr)); //word->tess_failed = true; word->ClearResults(); - // word->reject_map.initialise(word->box_word->length()); --crash word->reject_map.initialise(word->uch_set->size()); word->reject_map.rej_word_tess_failure(); return; @@ -112,18 +111,9 @@ void Tesseract::recog_word_recursive(WERD_RES *word, int call_depth) { { static float depth_ema = 0.0; if (call_depth > depth_ema) { -# if 0 - static int maxx = 0; - if (maxx < call_depth && call_depth > 20 && call_depth % 10 == 0) { - maxx = call_depth; - tprintDebug("recog_word_recursive call depth: {}, peak: {} +EMA: {}, word length: {}\n", call_depth, maxx, depth_ema, word_length); - } - if (maxx < call_depth) - maxx = call_depth; -# else if (debug_recog_word_recursion_depth && call_depth >= 10) tprintDebug("recog_word_recursive call depth: {}, peak.EMA: {}, word length: {}\n", call_depth, depth_ema, word_length); -# endif + depth_ema = call_depth; } else { // decay rate: slow decay, so we only catch the noteworthy peaks in the diag/log output. @@ -369,25 +359,11 @@ void Tesseract::join_words(WERD_RES *word, WERD_RES *word2, BlamerBundle *orig_b // so we push the failed state into `word` and then pass it on, letting the invokers deal with the aftermath. // // Cleanup and mark `word` failed (if it isn't already), propagating the failed state down the call chain. -// word2->chopped_word->blobs.clear(); -// word2->rebuild_word->blobs.clear(); - - // ripped from elsewhere in the codebase: saw this waiting for us way up the call chain so I assume we'll survive, when cloning that. - // -#if 01 - //if (word->best_choice == nullptr || word->best_choice->empty() || - // strspn(word->best_choice->unichar_string().c_str(), " ") == - // word->best_choice->length()) { word->tess_failed = true; ASSERT_HOST((word->best_choice == nullptr) == (word->raw_choice == nullptr)); word->ClearResults(); word->reject_map.initialise(word->uch_set->size()); word->reject_map.rej_word_tess_failure(); -#else - // set word as faked/failed and call it a day. - word->SetupFake(*word->uch_set); -#endif - delete word2; } } From dd08a7aa6a3a6b0fdfefc8e10887122e4b15a412 Mon Sep 17 00:00:00 2001 From: JKamlah Date: Thu, 11 Jul 2024 14:35:53 +0200 Subject: [PATCH 17/66] Fix confidence output for the PAGE XML renderer. --- src/api/pagerenderer.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/api/pagerenderer.cpp b/src/api/pagerenderer.cpp index a611341628..2adfc68cd6 100644 --- a/src/api/pagerenderer.cpp +++ b/src/api/pagerenderer.cpp @@ -789,6 +789,10 @@ char *TessBaseAPI::GetPAGEText(ETEXT_DESC *monitor, int page_number) { << "\" caption=\"Regions reading order\">\n"; ResultIterator *res_it = GetIterator(); + + float block_conf = 0; + float line_conf = 0; + while (!res_it->Empty(RIL_BLOCK)) { if (res_it->Empty(RIL_WORD)) { res_it->Next(RIL_WORD); @@ -825,7 +829,6 @@ char *TessBaseAPI::GetPAGEText(ETEXT_DESC *monitor, int page_number) { break; } - float block_conf = 0; if (res_it->IsAtBeginningOf(RIL_BLOCK)) { // Add Block to reading order reading_order_str << "\t\t\t\tIsAtBeginningOf(RIL_TEXTLINE)) { // writing_direction_before = writing_direction; line_conf = ((res_it->Confidence(RIL_TEXTLINE)) / 100.); From d0147896de4ecd6bab2547c113166940f04016d6 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Thu, 25 Jul 2024 20:44:53 +0200 Subject: [PATCH 18/66] tweak to diagnostics/HTML debug log output: now we only produce *lossless* WebP images when `jpg_quality` is set to 100 (100%); the lossless WebP images are beautiful but take a horrendous amount of time to produce for larger source images than their lossy-yet-high-quality alternatives: we're talking *large factors* in the time spent to produce one image -- and we produce *many*! --- src/ccmain/pixProcessing.cpp | 8 +++++++- src/ccstruct/debugpixa.cpp | 21 ++++++++++++++++----- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/src/ccmain/pixProcessing.cpp b/src/ccmain/pixProcessing.cpp index e8c2555d64..538f3482ef 100644 --- a/src/ccmain/pixProcessing.cpp +++ b/src/ccmain/pixProcessing.cpp @@ -873,7 +873,13 @@ static inline int MIX(int val1, int val2, const int factor) { bool pixAlphaIsSaneAndPresent(const PIX *pix) { l_int32 opaque; - if (pixAlphaIsOpaque(const_cast(pix), &opaque)) + if (!pix) + return false; + if (pixGetDepth(pix) != 32) + return false; + if (pixGetSpp(pix) != 4) + return false; + if (pixAlphaIsOpaque(const_cast(pix), &opaque)) return false; return !opaque; } diff --git a/src/ccstruct/debugpixa.cpp b/src/ccstruct/debugpixa.cpp index 9ab3cb01dc..2dfcff7a12 100644 --- a/src/ccstruct/debugpixa.cpp +++ b/src/ccstruct/debugpixa.cpp @@ -1316,12 +1316,20 @@ namespace tesseract { * to get the compression value. */ img_quality = (img_quality + 5) / 10; pixSetSpecial(img, 10 + img_quality); - pixWrite(img_filename.c_str(), img, IFF_PNG); + if (pixWrite(img_filename.c_str(), img, IFF_PNG)) { + tprintError("Did not succeeed writing the image data to file '{}' while generating the HTML diagnostic/log report.\n", img_filename); + // delete broken output file(s): + remove(img_filename.c_str()); + } break; case IFF_JFIF_JPEG: pixSetSpecial(img, img_quality); - pixWrite(img_filename.c_str(), img, IFF_JFIF_JPEG); + if (pixWrite(img_filename.c_str(), img, IFF_JFIF_JPEG)) { + tprintError("Did not succeeed writing the image data to file '{}' while generating the HTML diagnostic/log report.\n", img_filename); + // delete broken output file(s): + remove(img_filename.c_str()); + } break; case IFF_WEBP: { @@ -1329,12 +1337,15 @@ namespace tesseract { if (!fp) { tprintError("Failed to open file '{}' for writing one of the debug/diagnostics log impages.\n", img_filename); } else { - img_quality += 5; - img_quality /= 10; - auto rv = pixWriteStreamWebP(fp, img, 1 + img_quality, TRUE); + //img_quality is expected to be in range [0..100] + //img_quality += 5; + //img_quality /= 10; + auto rv = pixWriteStreamWebP(fp, img, img_quality, (img_quality > 99) /* lossless */); fclose(fp); if (rv) { tprintError("Did not succeeed writing the image data to file '{}' while generating the HTML diagnostic/log report.\n", img_filename); + // delete broken output file(s): + remove(img_filename.c_str()); } } } break; From 4a8b1910f78388bd75c6fee692db894f7ac4789f Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Thu, 25 Jul 2024 21:31:32 +0200 Subject: [PATCH 19/66] diagnostics/debug HTML output: restrict the `jpg_quality` value used for the various output formats to their legal range: if we don't do this, leptonica et al may decide to use potentially surprising default settings instead, e.g. when producing PNGs with jpg_quality=100 would result in default-medium-compression files. --- src/ccstruct/debugpixa.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/ccstruct/debugpixa.cpp b/src/ccstruct/debugpixa.cpp index 2dfcff7a12..927d18fcb5 100644 --- a/src/ccstruct/debugpixa.cpp +++ b/src/ccstruct/debugpixa.cpp @@ -1313,8 +1313,13 @@ namespace tesseract { * over default (6), but the compression is 3 to 10 times slower. * Use the zlib default (6) as our default compression unless * pix->special falls in the range [10 ... 19]; then subtract 10 - * to get the compression value. */ - img_quality = (img_quality + 5) / 10; + * to get the compression value. + * + * compval = Z_DEFAULT_COMPRESSION; + * if (pix->special >= 10 && pix->special < 20) + * compval = pix->special - 10; + */ + img_quality = std::max(0, std::min(9, img_quality / 10)); pixSetSpecial(img, 10 + img_quality); if (pixWrite(img_filename.c_str(), img, IFF_PNG)) { tprintError("Did not succeeed writing the image data to file '{}' while generating the HTML diagnostic/log report.\n", img_filename); @@ -1324,6 +1329,7 @@ namespace tesseract { break; case IFF_JFIF_JPEG: + img_quality = std::max(0, std::min(100, img_quality)); pixSetSpecial(img, img_quality); if (pixWrite(img_filename.c_str(), img, IFF_JFIF_JPEG)) { tprintError("Did not succeeed writing the image data to file '{}' while generating the HTML diagnostic/log report.\n", img_filename); @@ -1340,6 +1346,7 @@ namespace tesseract { //img_quality is expected to be in range [0..100] //img_quality += 5; //img_quality /= 10; + img_quality = std::max(0, std::min(100, img_quality)); auto rv = pixWriteStreamWebP(fp, img, img_quality, (img_quality > 99) /* lossless */); fclose(fp); if (rv) { From 2feff8e75275ef3dada1d95c6eafa5b23c0fbdb4 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Thu, 25 Jul 2024 21:32:36 +0200 Subject: [PATCH 20/66] intentionally permanently ignoring a PVS Studio reported error: V1053 Calling the 'Clean' virtual function in the destructor may lead to unexpected result at runtime. plumbing.h 33 --- src/lstm/parallel.h | 1 + src/lstm/plumbing.h | 3 +++ src/lstm/series.h | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/lstm/parallel.h b/src/lstm/parallel.h index 0d7ce094fb..e5f21d54c7 100644 --- a/src/lstm/parallel.h +++ b/src/lstm/parallel.h @@ -28,6 +28,7 @@ class Parallel : public Plumbing { // ni_ and no_ will be set by AddToStack. TESS_API Parallel(const std::string &name, NetworkType type); + virtual ~Parallel() override = default; // Returns the shape output from the network given an input shape (which may // be partially unknown ie zero). diff --git a/src/lstm/plumbing.h b/src/lstm/plumbing.h index 47f996f0a2..7c0ea2e4ca 100644 --- a/src/lstm/plumbing.h +++ b/src/lstm/plumbing.h @@ -30,6 +30,9 @@ class TESS_API Plumbing : public Network { // ni_ and no_ will be set by AddToStack. explicit Plumbing(const std::string &name); virtual ~Plumbing() override { + // V1053 Calling the 'Clean' virtual function in the destructor may lead to unexpected result at runtime. plumbing.h 33 + // https://pvs-studio.com/en/docs/warnings/v1053/print/ + //-V::1053 Clean(); } diff --git a/src/lstm/series.h b/src/lstm/series.h index fc63f28414..7b65ac9443 100644 --- a/src/lstm/series.h +++ b/src/lstm/series.h @@ -28,7 +28,7 @@ class Series : public Plumbing { // ni_ and no_ will be set by AddToStack. TESS_API explicit Series(const std::string &name); - ~Series() override = default; + virtual ~Series() override = default; // Returns the shape output from the network given an input shape (which may // be partially unknown ie zero). From 723f84fcf605ea4a661d2e1d670d7144f93001c4 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Thu, 25 Jul 2024 21:33:33 +0200 Subject: [PATCH 21/66] diagnostics/debug HTML output: (temporarily) switching to PNG default output format: those files are larger yet are produced way faster than WebP format images. --- src/ccmain/tesseractclass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ccmain/tesseractclass.cpp b/src/ccmain/tesseractclass.cpp index fbc01791f6..e4de5a1db3 100644 --- a/src/ccmain/tesseractclass.cpp +++ b/src/ccmain/tesseractclass.cpp @@ -465,7 +465,7 @@ Tesseract::Tesseract(TessBaseAPI &owner, Tesseract *parent) , BOOL_MEMBER(debug_recog_word_recursion_depth, false, "Debug the word recognizer recursion depth by having peak call depths reported as they appear.", params()) , INT_MEMBER(recog_word_recursion_depth_limit, 10000, "Restrict the word recognizer from recursing more than N levels deep. Setting this to a lower number can speed up processing of very noisy images which produce a lot of semi-random text noise as output anyway (with low OCR confidence numbers), but setting this too low can negatively impact any images with large amounts of text, so tread carefully. Empirical numbers today are: 50 and higher is image noise, 40 and lower is complex text pages.", params()) , BOOL_MEMBER(debug_output_diagnostics_HTML, false, "Write the debug/diagnostics output to a HTML file, including the collected images of the various process stages inside tesseract. The content is equivalent to the debug info you see on stderr, but in a nicely formatted and easier to grok modern format. Also handy for sharing your sessions' diagnostics with others. The output filename is derived from the source image name and output base path.", params()), - INT_MEMBER(debug_output_diagnostics_images_format, IFF_WEBP, "The format of the images included in the debug/diagnostics output HTML file. Specify one of the Leptonica constants: IFF_WEBP=webp, IFF_PNG=png, IFF_JFIF_JPEG=jpeg. While we support the other formats, those are ill-advised to use as web browsers won't support those other formats out of the box and choosing those formats will strongly and *negatively* impact your HTML diagnostics viewing experience. Tip: use PNG or JPEG if you want the output to be produced faster, WEBP if you want smaller image files with maximum precision. Set the jpeg_quality parameter for any of these formats for targeted compression ratio.", params()) + INT_MEMBER(debug_output_diagnostics_images_format, IFF_PNG, "The format of the images included in the debug/diagnostics output HTML file. Specify one of the Leptonica constants: IFF_WEBP=webp, IFF_PNG=png, IFF_JFIF_JPEG=jpeg. While we support the other formats, those are ill-advised to use as web browsers won't support those other formats out of the box and choosing those formats will strongly and *negatively* impact your HTML diagnostics viewing experience. Tip: use PNG or JPEG if you want the output to be produced faster, WEBP if you want smaller image files with maximum precision. Set the jpeg_quality parameter for any of these formats for targeted compression ratio.", params()) , pixa_debug_(this) , splitter_(this) From fa17bf5ece4e535ede407734743da4cffbb1b75c Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Thu, 25 Jul 2024 21:34:02 +0200 Subject: [PATCH 22/66] fix PVS Studio error: V576 Incorrect format. Consider checking the fifth actual argument of the 'fprintf' function. Under certain conditions the pointer can be null. paramsd.cpp 345 --- src/ccmain/paramsd.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ccmain/paramsd.cpp b/src/ccmain/paramsd.cpp index d6be1e4989..b66360b449 100644 --- a/src/ccmain/paramsd.cpp +++ b/src/ccmain/paramsd.cpp @@ -140,7 +140,8 @@ const char *ParamContent::GetDescription() const { } else if (param_type_ == VT_STRING) { return sIt->info_str(); } else { - return nullptr; + // V576 Incorrect format. Consider checking the fifth actual argument of the 'fprintf' function. Under certain conditions the pointer can be null. paramsd.cpp 345 + return "ERROR: ParamContent::GetDescription()"; } } From e17f92a9f0f5d1a8e09cc92d7bc76d504072bfdf Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Thu, 25 Jul 2024 21:51:33 +0200 Subject: [PATCH 23/66] fixed bug in the logic writing the LSTM feature input images to diagnostics/debug log/HTML output: was using the wrong debug_pixa API there so these image snippets were inadvertently scaled and stretched to snuggly fit the entire scanned page. Whoops! --- src/lstm/input.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lstm/input.cpp b/src/lstm/input.cpp index 80e036174b..90fa23f268 100644 --- a/src/lstm/input.cpp +++ b/src/lstm/input.cpp @@ -142,7 +142,7 @@ void Input::PreparePixInput(Tesseract *tess, const StaticShape &shape, const Ima } if (tess != nullptr && (verbose_process || tess->tessedit_dump_pageseg_images)) { - tess->AddPixCompedOverOrigDebugPage(normed_pix, fmt::format("LSTM normed input image: prepare to recognize one line of text. (height:{}, target_height:{}, scale_factor:{}, position box:{})", height, target_height, scale_factor, line_box.print_to_str())); + tess->AddPixDebugPage(normed_pix, fmt::format("LSTM normed input image: prepare to recognize one line of text. (height:{}, target_height:{}, scale_factor:{}, position box:{})", height, target_height, scale_factor, line_box.print_to_str())); } input->FromPix(shape, normed_pix, randomizer); } From 1b0863456ee2d395deb49277476b61d278489ee3 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Thu, 25 Jul 2024 21:53:10 +0200 Subject: [PATCH 24/66] Revert "diagnostics/debug HTML output: (temporarily) switching to PNG default output format: those files are larger yet are produced way faster than WebP format images." This reverts commit 723f84fcf605ea4a661d2e1d670d7144f93001c4. --- src/ccmain/tesseractclass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ccmain/tesseractclass.cpp b/src/ccmain/tesseractclass.cpp index e4de5a1db3..fbc01791f6 100644 --- a/src/ccmain/tesseractclass.cpp +++ b/src/ccmain/tesseractclass.cpp @@ -465,7 +465,7 @@ Tesseract::Tesseract(TessBaseAPI &owner, Tesseract *parent) , BOOL_MEMBER(debug_recog_word_recursion_depth, false, "Debug the word recognizer recursion depth by having peak call depths reported as they appear.", params()) , INT_MEMBER(recog_word_recursion_depth_limit, 10000, "Restrict the word recognizer from recursing more than N levels deep. Setting this to a lower number can speed up processing of very noisy images which produce a lot of semi-random text noise as output anyway (with low OCR confidence numbers), but setting this too low can negatively impact any images with large amounts of text, so tread carefully. Empirical numbers today are: 50 and higher is image noise, 40 and lower is complex text pages.", params()) , BOOL_MEMBER(debug_output_diagnostics_HTML, false, "Write the debug/diagnostics output to a HTML file, including the collected images of the various process stages inside tesseract. The content is equivalent to the debug info you see on stderr, but in a nicely formatted and easier to grok modern format. Also handy for sharing your sessions' diagnostics with others. The output filename is derived from the source image name and output base path.", params()), - INT_MEMBER(debug_output_diagnostics_images_format, IFF_PNG, "The format of the images included in the debug/diagnostics output HTML file. Specify one of the Leptonica constants: IFF_WEBP=webp, IFF_PNG=png, IFF_JFIF_JPEG=jpeg. While we support the other formats, those are ill-advised to use as web browsers won't support those other formats out of the box and choosing those formats will strongly and *negatively* impact your HTML diagnostics viewing experience. Tip: use PNG or JPEG if you want the output to be produced faster, WEBP if you want smaller image files with maximum precision. Set the jpeg_quality parameter for any of these formats for targeted compression ratio.", params()) + INT_MEMBER(debug_output_diagnostics_images_format, IFF_WEBP, "The format of the images included in the debug/diagnostics output HTML file. Specify one of the Leptonica constants: IFF_WEBP=webp, IFF_PNG=png, IFF_JFIF_JPEG=jpeg. While we support the other formats, those are ill-advised to use as web browsers won't support those other formats out of the box and choosing those formats will strongly and *negatively* impact your HTML diagnostics viewing experience. Tip: use PNG or JPEG if you want the output to be produced faster, WEBP if you want smaller image files with maximum precision. Set the jpeg_quality parameter for any of these formats for targeted compression ratio.", params()) , pixa_debug_(this) , splitter_(this) From 1b467bec109d06b8af1b564a2de6ae824dbf0305 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Thu, 25 Jul 2024 23:16:32 +0200 Subject: [PATCH 25/66] diagnostics/debug output: `debug_output_diagnostics_images_format` code changed/enhanced: we now properly support loosless WebP with quality factor control through leptonica. debug_output_diagnostics_images_format: (integer) The format of the images included in the debug/diagnostics output HTML file. Specify a number: 0:PNG, 1:JPEG, 2:WebP, 3:lossless-WebP, 4:TIFF. While we support TIFF and higher numbers, it is ill-advised to use as web browsers won't support those out of the box and choosing those formats will strongly and *negatively* impact your HTML diagnostics viewing experience. Tip: use PNG or JPEG if you want the output to be produced faster, lossless-WEBP if you want smaller image files with maximum precision. Set the jpeg_quality parameter for any of these formats for targeted compression ratio. --- src/ccmain/tesseractclass.cpp | 2 +- src/ccstruct/debugpixa.cpp | 62 ++++++++++++++++++++++++----------- src/ccstruct/debugpixa.h | 9 +++++ 3 files changed, 53 insertions(+), 20 deletions(-) diff --git a/src/ccmain/tesseractclass.cpp b/src/ccmain/tesseractclass.cpp index fbc01791f6..9cc465c14b 100644 --- a/src/ccmain/tesseractclass.cpp +++ b/src/ccmain/tesseractclass.cpp @@ -465,7 +465,7 @@ Tesseract::Tesseract(TessBaseAPI &owner, Tesseract *parent) , BOOL_MEMBER(debug_recog_word_recursion_depth, false, "Debug the word recognizer recursion depth by having peak call depths reported as they appear.", params()) , INT_MEMBER(recog_word_recursion_depth_limit, 10000, "Restrict the word recognizer from recursing more than N levels deep. Setting this to a lower number can speed up processing of very noisy images which produce a lot of semi-random text noise as output anyway (with low OCR confidence numbers), but setting this too low can negatively impact any images with large amounts of text, so tread carefully. Empirical numbers today are: 50 and higher is image noise, 40 and lower is complex text pages.", params()) , BOOL_MEMBER(debug_output_diagnostics_HTML, false, "Write the debug/diagnostics output to a HTML file, including the collected images of the various process stages inside tesseract. The content is equivalent to the debug info you see on stderr, but in a nicely formatted and easier to grok modern format. Also handy for sharing your sessions' diagnostics with others. The output filename is derived from the source image name and output base path.", params()), - INT_MEMBER(debug_output_diagnostics_images_format, IFF_WEBP, "The format of the images included in the debug/diagnostics output HTML file. Specify one of the Leptonica constants: IFF_WEBP=webp, IFF_PNG=png, IFF_JFIF_JPEG=jpeg. While we support the other formats, those are ill-advised to use as web browsers won't support those other formats out of the box and choosing those formats will strongly and *negatively* impact your HTML diagnostics viewing experience. Tip: use PNG or JPEG if you want the output to be produced faster, WEBP if you want smaller image files with maximum precision. Set the jpeg_quality parameter for any of these formats for targeted compression ratio.", params()) + INT_MEMBER(debug_output_diagnostics_images_format, IMG4W_WEBP_LOSSLESS, "The format of the images included in the debug/diagnostics output HTML file. Specify a number: 0:PNG, 1:JPEG, 2:WebP, 3:lossless-WebP, 4:TIFF. While we support TIFF and higher numbers, it is ill-advised to use as web browsers won't support those out of the box and choosing those formats will strongly and *negatively* impact your HTML diagnostics viewing experience. Tip: use PNG or JPEG if you want the output to be produced faster, lossless-WEBP if you want smaller image files with maximum precision. Set the jpeg_quality parameter for any of these formats for targeted compression ratio.", params()) , pixa_debug_(this) , splitter_(this) diff --git a/src/ccstruct/debugpixa.cpp b/src/ccstruct/debugpixa.cpp index 927d18fcb5..5b555f3018 100644 --- a/src/ccstruct/debugpixa.cpp +++ b/src/ccstruct/debugpixa.cpp @@ -1247,7 +1247,7 @@ namespace tesseract { } // takes a leptonica IFF_PNG, ... identifier and produces a sane bunch of datums for us: a *supported* image format id to use, plus the accompanying filename extension. - static std::tuple get_image_output_datums(int image_bitdepth, int debug_output_diagnostics_images_format) { + static std::tuple get_image_output_datums(int image_bitdepth, int debug_output_diagnostics_images_format) { // walk the leptonica table to see what we get; then decide on something sane? // // alas, leptonica doesn't offer the complement of its getFormatFromExtension() API. *snif* @@ -1258,26 +1258,29 @@ namespace tesseract { switch (debug_output_diagnostics_images_format) { // case IFF_PNM, IFF_JP2, IFF_PS, IFF_LPDF, IFF_TIFF_G4, IFF_GIF: default: - return {".png", IFF_PNG}; + return {".png", IFF_PNG, IMG4W_PNG}; - case IFF_BMP: - return {".bmp", IFF_BMP}; + case IMG4W_BMP: + return {".bmp", IFF_BMP, IMG4W_BMP}; - case IFF_JFIF_JPEG: - return {".jpg", IFF_JFIF_JPEG}; + case IMG4W_JPEG: + return {".jpg", IFF_JFIF_JPEG, IMG4W_JPEG}; - case IFF_PNG: - return {".png", IFF_PNG}; + case IMG4W_PNG: + return {".png", IFF_PNG, IMG4W_PNG}; - case IFF_TIFF: - return {".tiff", IFF_TIFF}; + case IMG4W_TIFF: + return {".tiff", IFF_TIFF, IMG4W_TIFF}; - case IFF_WEBP: - return {".webp", IFF_WEBP}; + case IMG4W_WEBP: + return {".webp", IFF_WEBP, IMG4W_WEBP}; + + case IMG4W_WEBP_LOSSLESS: + return {".webp", IFF_WEBP, IMG4W_WEBP_LOSSLESS}; } } - static void write_one_pix_for_html(FILE *html, int counter, int img_format_id, int img_quality, const std::string &img_filename, const Image &pix, const Image &original_image, const std::string &title, const std::string &description, const TBOX *cliprect = nullptr) { + static void write_one_pix_for_html(FILE *html, int counter, Image4WebOutputType img_format_id, int img_quality, const std::string &img_filename, const Image &pix, const Image &original_image, const std::string &title, const std::string &description, const TBOX *cliprect = nullptr) { if (!!pix) { const char *pixfname = fz_basename(img_filename.c_str()); int w, h, depth; @@ -1308,7 +1311,7 @@ namespace tesseract { ASSERT0(32 == pixGetDepth(img)); switch (img_format_id) { default: - case IFF_PNG: + case IMG4W_PNG: /* With best zlib compression (9), get between 1 and 10% improvement * over default (6), but the compression is 3 to 10 times slower. * Use the zlib default (6) as our default compression unless @@ -1328,7 +1331,7 @@ namespace tesseract { } break; - case IFF_JFIF_JPEG: + case IMG4W_JPEG: img_quality = std::max(0, std::min(100, img_quality)); pixSetSpecial(img, img_quality); if (pixWrite(img_filename.c_str(), img, IFF_JFIF_JPEG)) { @@ -1338,7 +1341,28 @@ namespace tesseract { } break; - case IFF_WEBP: { + case IMG4W_TIFF: + img_quality = std::max(0, std::min(100, img_quality)); + pixSetSpecial(img, img_quality); + if (pixWrite(img_filename.c_str(), img, IFF_TIFF)) { + tprintError("Did not succeeed writing the image data to file '{}' while generating the HTML diagnostic/log report.\n", img_filename); + // delete broken output file(s): + remove(img_filename.c_str()); + } + break; + + case IMG4W_BMP: + //img_quality = std::max(0, std::min(100, img_quality)); + //pixSetSpecial(img, img_quality); + if (pixWrite(img_filename.c_str(), img, IFF_BMP)) { + tprintError("Did not succeeed writing the image data to file '{}' while generating the HTML diagnostic/log report.\n", img_filename); + // delete broken output file(s): + remove(img_filename.c_str()); + } + break; + + case IMG4W_WEBP_LOSSLESS: + case IMG4W_WEBP: { FILE *fp = fopen(img_filename.c_str(), "wb+"); if (!fp) { tprintError("Failed to open file '{}' for writing one of the debug/diagnostics log impages.\n", img_filename); @@ -1347,7 +1371,7 @@ namespace tesseract { //img_quality += 5; //img_quality /= 10; img_quality = std::max(0, std::min(100, img_quality)); - auto rv = pixWriteStreamWebP(fp, img, img_quality, (img_quality > 99) /* lossless */); + auto rv = pixWriteStreamWebP(fp, img, img_quality, (img_format_id == IMG4W_WEBP_LOSSLESS)); fclose(fp); if (rv) { tprintError("Did not succeeed writing the image data to file '{}' while generating the HTML diagnostic/log report.\n", img_filename); @@ -1392,7 +1416,7 @@ namespace tesseract { int img_depth = pixGetDepth(pixs); ASSERT0(img_depth == 1 || img_depth == 8 || img_depth == 24 || img_depth == 32); - auto [image_extension, image_format_id] = get_image_output_datums(img_depth, tesseract_->debug_output_diagnostics_images_format); + auto [image_extension, pix_format_id, image_format_id] = get_image_output_datums(img_depth, tesseract_->debug_output_diagnostics_images_format); std::string fn(partname + SanitizeFilenamePart(fmt::format(".img{:04d}.", counter) + caption) + image_extension); TBOX cliprect = cliprects[idx]; @@ -1655,7 +1679,7 @@ namespace tesseract { { Image pixs = tesseract_->pix_original(); int img_depth = pixGetDepth(pixs); - auto [image_extension, image_format_id] = get_image_output_datums(img_depth, tesseract_->debug_output_diagnostics_images_format); + auto [image_extension, pix_format_id, image_format_id] = get_image_output_datums(img_depth, tesseract_->debug_output_diagnostics_images_format); std::string fn(partname + SanitizeFilenamePart(".img-original.") + image_extension); write_one_pix_for_html(html, 0, image_format_id, tesseract_->jpg_quality, fn, pixs, Image(), "original image", "The original image as registered with the Tesseract instance."); diff --git a/src/ccstruct/debugpixa.h b/src/ccstruct/debugpixa.h index 414e37b41f..dafebb7f91 100644 --- a/src/ccstruct/debugpixa.h +++ b/src/ccstruct/debugpixa.h @@ -20,6 +20,15 @@ namespace tesseract { class TESS_API Tesseract; class TESS_API TBOX; + enum Image4WebOutputType : int { + IMG4W_PNG = 0, + IMG4W_JPEG, + IMG4W_WEBP, + IMG4W_WEBP_LOSSLESS, + IMG4W_TIFF, + IMG4W_BMP, + }; + // Class to hold a Pixa collection of debug images with captions and save them // to a PDF file. // The class MAY also store additional diagnostic information, that's interspersed From 3b0fd398c31eaa6156e3833804fda46c5c00c10a Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Tue, 23 Jul 2024 21:46:41 +0200 Subject: [PATCH 26/66] there does not exist no file_type ".bl" --> config line removed. --- tessdata/configs/box.train | 1 - tessdata/configs/box.train.stderr | 1 - tessdata/configs/lstm.train | 1 - 3 files changed, 3 deletions(-) diff --git a/tessdata/configs/box.train b/tessdata/configs/box.train index d39f2687ef..9f9707a74a 100644 --- a/tessdata/configs/box.train +++ b/tessdata/configs/box.train @@ -1,5 +1,4 @@ disable_character_fragments T -file_type .bl textord_fast_pitch_test T tessedit_zero_rejection T tessedit_minimal_rejection F diff --git a/tessdata/configs/box.train.stderr b/tessdata/configs/box.train.stderr index 82754e9cc9..08b1eba358 100644 --- a/tessdata/configs/box.train.stderr +++ b/tessdata/configs/box.train.stderr @@ -1,4 +1,3 @@ -file_type .bl #tessedit_use_nn F textord_fast_pitch_test T tessedit_zero_rejection T diff --git a/tessdata/configs/lstm.train b/tessdata/configs/lstm.train index 5ff3772621..c33d106d5e 100644 --- a/tessdata/configs/lstm.train +++ b/tessdata/configs/lstm.train @@ -1,4 +1,3 @@ -file_type .bl textord_fast_pitch_test T tessedit_zero_rejection T tessedit_minimal_rejection F From ea0683c82e496e0d05b8dd722e173d70def584fb Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Wed, 24 Jul 2024 00:20:22 +0200 Subject: [PATCH 27/66] remove local class cache preserve_interword_spaces_ and access tesseract parameter preserve_interword_spaces everywhere directly. # Conflicts: # include/tesseract/resultiterator.h --- include/tesseract/resultiterator.h | 6 ------ src/ccmain/resultiterator.cpp | 10 +--------- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/include/tesseract/resultiterator.h b/include/tesseract/resultiterator.h index a0f287a2fa..a8b3c07752 100644 --- a/include/tesseract/resultiterator.h +++ b/include/tesseract/resultiterator.h @@ -237,12 +237,6 @@ class TESS_API ResultIterator : public LTRResultIterator { /** Is the currently pointed-at character in a minor-direction sequence? */ bool in_minor_direction_; - - /** - * Should detected inter-word spaces be preserved, or "compressed" to a single - * space character (default behavior). - */ - bool preserve_interword_spaces_; }; } // namespace tesseract. diff --git a/src/ccmain/resultiterator.cpp b/src/ccmain/resultiterator.cpp index a7e9b37983..4aef0fd109 100644 --- a/src/ccmain/resultiterator.cpp +++ b/src/ccmain/resultiterator.cpp @@ -42,14 +42,6 @@ namespace tesseract { ResultIterator::ResultIterator(const LTRResultIterator &resit) : LTRResultIterator(resit) { in_minor_direction_ = false; at_beginning_of_minor_run_ = false; - preserve_interword_spaces_ = false; - - auto *p = ParamUtils::FindParam( - "preserve_interword_spaces", GlobalParams()->bool_params_c(), tesseract_->params()->bool_params_c()); - if (p != nullptr) { - preserve_interword_spaces_ = (bool)(*p); - } - current_paragraph_is_ltr_ = CurrentParagraphIsLtr(); MoveToLogicalStartOfTextline(); } @@ -748,7 +740,7 @@ void ResultIterator::IterateAndAppendUTF8TextlineText(std::string *text) { int words_appended = 0; do { - int numSpaces = preserve_interword_spaces_ ? it_->word()->word->space() : (words_appended > 0); + int numSpaces = tesseract_->preserve_interword_spaces ? it_->word()->word->space() : (words_appended > 0); for (int i = 0; i < numSpaces; ++i) { *text += " "; } From ad34a68fcd130059734d6d0e6bab4465e0df0757 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Wed, 24 Jul 2024 00:24:44 +0200 Subject: [PATCH 28/66] WS --- src/ccstruct/blobbox.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/ccstruct/blobbox.h b/src/ccstruct/blobbox.h index 3dc5452a4e..118ee3aa99 100644 --- a/src/ccstruct/blobbox.h +++ b/src/ccstruct/blobbox.h @@ -72,7 +72,9 @@ enum TabType { DECL_FMT_FORMAT_TESSENUMTYPE(TabType); // The possible region types of a BLOBNBOX. +// // Note: keep all the text types > BRT_UNKNOWN and all the image types less. +// // Keep in sync with kBlobTypes in colpartition.cpp and BoxColor, and the // *Type static functions below. enum BlobRegionType { @@ -90,6 +92,7 @@ enum BlobRegionType { DECL_FMT_FORMAT_TESSENUMTYPE(BlobRegionType); // enum for elements of arrays that refer to neighbours. +// // NOTE: keep in this order, so ^2 can be used to flip direction. enum BlobNeighbourDir { BND_LEFT, From ebfaab60c68d5d62689ae4b76bad5e5d440e5a9a Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Wed, 24 Jul 2024 00:25:33 +0200 Subject: [PATCH 29/66] functions defined in headerfiles should be `static inline` instead of just `inline`. --- src/ccutil/genericvector.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ccutil/genericvector.h b/src/ccutil/genericvector.h index 4dc9606b85..635045eac7 100644 --- a/src/ccutil/genericvector.h +++ b/src/ccutil/genericvector.h @@ -236,7 +236,7 @@ class GenericVector { // The default FileReader loads the whole file into the vector of char, // returning false on error. -inline bool LoadDataFromFile(const char *filename, GenericVector *data) { +static inline bool LoadDataFromFile(const char *filename, GenericVector *data) { bool result = false; FILE *fp = fopenUtf8(filename, "rb"); if (fp != nullptr) { @@ -257,7 +257,7 @@ inline bool LoadDataFromFile(const char *filename, GenericVector *data) { // The default FileWriter writes the vector of char to the filename file, // returning false on error. -inline bool SaveDataToFile(const GenericVector &data, const char *filename) { +static inline bool SaveDataToFile(const GenericVector &data, const char *filename) { FILE *fp = fopenUtf8(filename, "wb"); if (fp == nullptr) { return false; From 3e9a83d0c98a880d2542a3becbabab6cda7f6f5b Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Wed, 24 Jul 2024 00:26:52 +0200 Subject: [PATCH 30/66] FGets(): lways deliver the line/string read from file with NUL sentinel guaranteed. --- src/ccutil/serialis.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ccutil/serialis.cpp b/src/ccutil/serialis.cpp index 9f9f7e742c..c4f5b4d3b1 100644 --- a/src/ccutil/serialis.cpp +++ b/src/ccutil/serialis.cpp @@ -205,9 +205,9 @@ char *TFile::FGets(char *buffer, int buffer_size) { break; } } - if (size < buffer_size) { - buffer[size] = '\0'; - } + ASSERT0(size < buffer_size); + buffer[size] = '\0'; + return size > 0 ? buffer : nullptr; } From 327987deacb5b07a2e4cb14b000a11b394717a85 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Wed, 24 Jul 2024 00:29:10 +0200 Subject: [PATCH 31/66] fmt::format enum types: the out-of-range values should be clearly identifiable as 'illegal', so don't just name them 'unknown' but rather 'unknown/illegal': this also disambiguates expectations for the 'unknown' names as there are some *legal* enum values which carry the name 'unknown', which could be a little confusing otherwise. --- src/ccutil/tprintf_fmt_types_support.cpp | 313 +++-------------------- 1 file changed, 41 insertions(+), 272 deletions(-) diff --git a/src/ccutil/tprintf_fmt_types_support.cpp b/src/ccutil/tprintf_fmt_types_support.cpp index 0ec8578749..f1b85f977a 100644 --- a/src/ccutil/tprintf_fmt_types_support.cpp +++ b/src/ccutil/tprintf_fmt_types_support.cpp @@ -74,7 +74,7 @@ auto fmt::formatter::format(PITCH_TYPE c, format_context &ctx) const name = "corrected_proportional"; break; default: - name = "unknown_pitch"; + name = "unknown/illegal_pitch"; break; } auto id = fmt::format("{}({})", name, static_cast(c)); @@ -82,8 +82,7 @@ auto fmt::formatter::format(PITCH_TYPE c, format_context &ctx) const return formatter::format(id, ctx); } -auto fmt::formatter::format(PolyBlockType c, - format_context &ctx) const +auto fmt::formatter::format(PolyBlockType c, format_context &ctx) const -> decltype(ctx.out()) { const char *name; // enum PolyBlockType: @@ -134,7 +133,7 @@ auto fmt::formatter::format(PolyBlockType c, name = "PT_NOISE"; break; default: - name = "unknown_blocktype"; + name = "unknown/illegal_blocktype"; break; } auto id = fmt::format("{}({})", name, static_cast(c)); @@ -142,8 +141,7 @@ auto fmt::formatter::format(PolyBlockType c, return formatter::format(id, ctx); } -auto fmt::formatter::format(Orientation c, - format_context &ctx) const +auto fmt::formatter::format(Orientation c, format_context &ctx) const -> decltype(ctx.out()) { const char *name; // enum Orientation: @@ -161,7 +159,7 @@ auto fmt::formatter::format(Orientation c, name = "page_left"; break; default: - name = "unknown_orientation"; + name = "unknown/illegal_orientation"; break; } auto id = fmt::format("{}({})", name, static_cast(c)); @@ -169,8 +167,7 @@ auto fmt::formatter::format(Orientation c, return formatter::format(id, ctx); } -auto fmt::formatter::format(WritingDirection c, - format_context &ctx) const +auto fmt::formatter::format(WritingDirection c, format_context &ctx) const -> decltype(ctx.out()) { const char *name; // enum WritingDirection: @@ -185,7 +182,7 @@ auto fmt::formatter::format(WritingDirection c, name = "top_to_bottom"; break; default: - name = "unknown_direction"; + name = "unknown/illegal_direction"; break; } auto id = fmt::format("{}({})", name, static_cast(c)); @@ -193,8 +190,7 @@ auto fmt::formatter::format(WritingDirection c, return formatter::format(id, ctx); } -auto fmt::formatter::format(TextlineOrder c, - format_context &ctx) const +auto fmt::formatter::format(TextlineOrder c, format_context &ctx) const -> decltype(ctx.out()) { const char *name; // enum TextlineOrder: @@ -209,7 +205,7 @@ auto fmt::formatter::format(TextlineOrder c, name = "order_top_to_bottom"; break; default: - name = "order_unknown"; + name = "unknown/illegal_line_order"; break; } auto id = fmt::format("{}({})", name, static_cast(c)); @@ -217,8 +213,7 @@ auto fmt::formatter::format(TextlineOrder c, return formatter::format(id, ctx); } -auto fmt::formatter::format(PageSegMode c, - format_context &ctx) const +auto fmt::formatter::format(PageSegMode c, format_context &ctx) const -> decltype(ctx.out()) { const char *name; // enum PageSegMode: @@ -266,7 +261,7 @@ auto fmt::formatter::format(PageSegMode c, name = "Treat_as_a_single_text_line_bypassing_all_tesseract_hacks"; break; default: - name = "unknown_page_seg_mode"; + name = "unknown/illegal_page_seg_mode"; break; } auto id = fmt::format("{}({})", name, static_cast(c)); @@ -280,25 +275,25 @@ auto fmt::formatter::format(TabType c, format_context &ctx) const // enum TabType: switch (c) { case TabType::TT_NONE: - name = "not_a_tab"; + name = "not_a_TAB"; break; case TabType::TT_DELETED: - name = "deleted_not_a_tab_after_analysis"; + name = "deleted_not_a_TAB_after_analysis"; break; case TabType::TT_MAYBE_RAGGED: - name = "maybe_ragged"; + name = "maybe_ragged_TAB"; break; case TabType::TT_MAYBE_ALIGNED: - name = "maybe_aligned"; + name = "maybe_aligned_TAB"; break; case TabType::TT_CONFIRMED: - name = "aligned_with_neighbours"; + name = "confirmed_TAB_aligned_with_neighbours"; break; case TabType::TT_VLINE: - name = "vertical_line"; + name = "vertical_line_TAB"; break; default: - name = "unknown_tab"; + name = "unknown/illegal_TAB"; break; } auto id = fmt::format("{}({})", name, static_cast(c)); @@ -306,14 +301,13 @@ auto fmt::formatter::format(TabType c, format_context &ctx) const return formatter::format(id, ctx); } -auto fmt::formatter::format(BlobRegionType c, - format_context &ctx) const +auto fmt::formatter::format(BlobRegionType c, format_context &ctx) const -> decltype(ctx.out()) { const char *name; // enum BlobRegionType: switch (c) { case BlobRegionType::BRT_NOISE: - name = "neither_text_nor_image"; + name = "neither_text_nor_image_region"; break; case BlobRegionType::BRT_HLINE: name = "horizontal_separator_line"; @@ -328,7 +322,7 @@ auto fmt::formatter::format(BlobRegionType c, name = "nonrectangular_image"; break; case BlobRegionType::BRT_UNKNOWN: - name = "not_determined_yet"; + name = "region_type_not_determined_yet"; break; case BlobRegionType::BRT_VERT_TEXT: name = "vertical_aligned_text"; @@ -337,7 +331,7 @@ auto fmt::formatter::format(BlobRegionType c, name = "convincing_text"; break; default: - name = "unknown_blob_region"; + name = "unknown/illegal_blob_region"; break; } auto id = fmt::format("{}({})", name, static_cast(c)); @@ -345,8 +339,7 @@ auto fmt::formatter::format(BlobRegionType c, return formatter::format(id, ctx); } -auto fmt::formatter::format(BlobNeighbourDir c, - format_context &ctx) const +auto fmt::formatter::format(BlobNeighbourDir c, format_context &ctx) const -> decltype(ctx.out()) { const char *name; // enum BlobNeighbourDir: @@ -364,7 +357,7 @@ auto fmt::formatter::format(BlobNeighbourDir c, name = "above"; break; default: - name = "unknown_neighbour_dir"; + name = "unknown/illegal_neighbour_dir"; break; } auto id = fmt::format("{}({})", name, static_cast(c)); @@ -372,8 +365,7 @@ auto fmt::formatter::format(BlobNeighbourDir c, return formatter::format(id, ctx); } -auto fmt::formatter::format(BlobSpecialTextType c, - format_context &ctx) const +auto fmt::formatter::format(BlobSpecialTextType c, format_context &ctx) const -> decltype(ctx.out()) { const char *name; // enum BlobSpecialTextType: @@ -397,7 +389,7 @@ auto fmt::formatter::format(BlobSpecialTextType c, name = "BSTT_SKIP"; break; default: - name = "unknown_special_text_type"; + name = "unknown/illegal_special_text_type"; break; } auto id = fmt::format("{}({})", name, static_cast(c)); @@ -405,8 +397,7 @@ auto fmt::formatter::format(BlobSpecialTextType c, return formatter::format(id, ctx); } -auto fmt::formatter::format(BlobTextFlowType c, - format_context &ctx) const +auto fmt::formatter::format(BlobTextFlowType c, format_context &ctx) const -> decltype(ctx.out()) { const char *name; // enum BlobTextFlowType: @@ -433,7 +424,7 @@ auto fmt::formatter::format(BlobTextFlowType c, name = "BTFT_LEADER"; break; default: - name = "unknown_textflow"; + name = "unknown/illegal_textflow"; break; } auto id = fmt::format("{}({})", name, static_cast(c)); @@ -441,8 +432,7 @@ auto fmt::formatter::format(BlobTextFlowType c, return formatter::format(id, ctx); } -auto fmt::formatter::format(NetworkType c, - format_context &ctx) const +auto fmt::formatter::format(NetworkType c, format_context &ctx) const -> decltype(ctx.out()) { const char *name; // enum NetworkType: @@ -529,7 +519,7 @@ auto fmt::formatter::format(NetworkType c, name = "NT_TENSORFLOW"; break; default: - name = "unknown_networktype"; + name = "unknown/illegal_networktype"; break; } auto id = fmt::format("{}({})", name, static_cast(c)); @@ -556,7 +546,7 @@ auto fmt::formatter::format(LineType c, format_context &ctx) const name = "multiple"; break; default: - name = "unknown_linetype"; + name = "unknown/illegal_linetype"; break; } auto id = fmt::format("{}({})", name, static_cast(c)); @@ -564,8 +554,7 @@ auto fmt::formatter::format(LineType c, format_context &ctx) const return formatter::format(id, ctx); } -auto fmt::formatter::format(BlobChoiceClassifier c, - format_context &ctx) const +auto fmt::formatter::format(BlobChoiceClassifier c, format_context &ctx) const -> decltype(ctx.out()) { const char *name; // enum PITCH_TYPE: @@ -586,7 +575,7 @@ auto fmt::formatter::format(BlobChoiceClassifier c, name = "fake"; break; default: - name = "unknown_blobchoice"; + name = "unknown/illegal_blobchoice"; break; } auto id = fmt::format("{}({})", name, static_cast(c)); @@ -595,8 +584,7 @@ auto fmt::formatter::format(BlobChoiceClassifier c, } -auto fmt::formatter::format(PermuterType c, - format_context &ctx) const +auto fmt::formatter::format(PermuterType c, format_context &ctx) const -> decltype(ctx.out()) { const char *name; // enum PITCH_TYPE: @@ -641,7 +629,7 @@ auto fmt::formatter::format(PermuterType c, name = "compound"; break; default: - name = "unknown_permuter"; + name = "unknown/illegal_permuter"; break; } auto id = fmt::format("{}({})", name, static_cast(c)); @@ -653,7 +641,7 @@ auto fmt::formatter::format(PermuterType c, auto fmt::formatter::format(DawgType c, format_context &ctx) const -> decltype(ctx.out()) { const char *name; - // enum PITCH_TYPE: + // enum DawgType: switch (c) { case DawgType::DAWG_TYPE_PUNCTUATION: name = "Punctuation"; @@ -668,7 +656,7 @@ auto fmt::formatter::format(DawgType c, format_context &ctx) const name = "Pattern"; break; default: - name = "Unknown"; + name = "unknown/illegal-dawg"; break; } auto id = fmt::format("{}({})", name, static_cast(c)); @@ -680,7 +668,7 @@ auto fmt::formatter::format(DawgType c, format_context &ctx) const auto fmt::formatter::format(LossType c, format_context &ctx) const -> decltype(ctx.out()) { const char *name; - // enum PITCH_TYPE: + // enum LossType: switch (c) { case LossType::LT_NONE: name = "None/Undefined"; @@ -695,7 +683,7 @@ auto fmt::formatter::format(LossType c, format_context &ctx) const name = "Logistic"; break; default: - name = "Unknown"; + name = "unknown/illegal-loss"; break; } auto id = fmt::format("{}({})", name, static_cast(c)); @@ -731,7 +719,7 @@ auto fmt::formatter::format(ThresholdMethod c, format_context & name = "MaxThreshold"; break; default: - name = "unknown_threshold_method"; + name = "unknown/illegal_threshold_method"; break; } auto id = fmt::format("{}({})", name, static_cast(c)); @@ -819,7 +807,7 @@ auto fmt::formatter::format(UNICHARSET::Direction c, form break; #endif // U_HIDE_DEPRECATED_API default: - name = "unknown_threshold_method"; + name = "unknown/illegal_threshold_method"; break; } auto id = fmt::format("{}({})", name, static_cast(c)); @@ -828,223 +816,4 @@ auto fmt::formatter::format(UNICHARSET::Direction c, form } -#if 0 - -auto fmt::formatter::format(PITCH_TYPE c, format_context &ctx) const - -> decltype(ctx.out()) { - const char *name; - // enum PITCH_TYPE: - switch (c) { - case PITCH_TYPE::PITCH_DUNNO: - name = "insufficient_data"; - break; - case PITCH_TYPE::PITCH_DEF_FIXED: - name = "definitely_fixed"; - break; - case PITCH_TYPE::PITCH_MAYBE_FIXED: - name = "maybe_fixed"; - break; - case PITCH_TYPE::PITCH_DEF_PROP: - name = "definitely_proportional"; - break; - case PITCH_TYPE::PITCH_MAYBE_PROP: - name = "maybe_proportional"; - break; - case PITCH_TYPE::PITCH_CORR_FIXED: - name = "corrected_fixed"; - break; - case PITCH_TYPE::PITCH_CORR_PROP: - name = "corrected_proportional"; - break; - default: - name = "unknown"; - break; - } - auto id = fmt::format("{}({})", name, static_cast(c)); - - return formatter::format(id, ctx); -} - - -auto fmt::formatter::format(PITCH_TYPE c, format_context &ctx) const - -> decltype(ctx.out()) { - const char *name; - // enum PITCH_TYPE: - switch (c) { - case PITCH_TYPE::PITCH_DUNNO: - name = "insufficient_data"; - break; - case PITCH_TYPE::PITCH_DEF_FIXED: - name = "definitely_fixed"; - break; - case PITCH_TYPE::PITCH_MAYBE_FIXED: - name = "maybe_fixed"; - break; - case PITCH_TYPE::PITCH_DEF_PROP: - name = "definitely_proportional"; - break; - case PITCH_TYPE::PITCH_MAYBE_PROP: - name = "maybe_proportional"; - break; - case PITCH_TYPE::PITCH_CORR_FIXED: - name = "corrected_fixed"; - break; - case PITCH_TYPE::PITCH_CORR_PROP: - name = "corrected_proportional"; - break; - default: - name = "unknown"; - break; - } - auto id = fmt::format("{}({})", name, static_cast(c)); - - return formatter::format(id, ctx); -} - - -auto fmt::formatter::format(PITCH_TYPE c, format_context &ctx) const - -> decltype(ctx.out()) { - const char *name; - // enum PITCH_TYPE: - switch (c) { - case PITCH_TYPE::PITCH_DUNNO: - name = "insufficient_data"; - break; - case PITCH_TYPE::PITCH_DEF_FIXED: - name = "definitely_fixed"; - break; - case PITCH_TYPE::PITCH_MAYBE_FIXED: - name = "maybe_fixed"; - break; - case PITCH_TYPE::PITCH_DEF_PROP: - name = "definitely_proportional"; - break; - case PITCH_TYPE::PITCH_MAYBE_PROP: - name = "maybe_proportional"; - break; - case PITCH_TYPE::PITCH_CORR_FIXED: - name = "corrected_fixed"; - break; - case PITCH_TYPE::PITCH_CORR_PROP: - name = "corrected_proportional"; - break; - default: - name = "unknown"; - break; - } - auto id = fmt::format("{}({})", name, static_cast(c)); - - return formatter::format(id, ctx); -} - - -auto fmt::formatter::format(PITCH_TYPE c, format_context &ctx) const - -> decltype(ctx.out()) { - const char *name; - // enum PITCH_TYPE: - switch (c) { - case PITCH_TYPE::PITCH_DUNNO: - name = "insufficient_data"; - break; - case PITCH_TYPE::PITCH_DEF_FIXED: - name = "definitely_fixed"; - break; - case PITCH_TYPE::PITCH_MAYBE_FIXED: - name = "maybe_fixed"; - break; - case PITCH_TYPE::PITCH_DEF_PROP: - name = "definitely_proportional"; - break; - case PITCH_TYPE::PITCH_MAYBE_PROP: - name = "maybe_proportional"; - break; - case PITCH_TYPE::PITCH_CORR_FIXED: - name = "corrected_fixed"; - break; - case PITCH_TYPE::PITCH_CORR_PROP: - name = "corrected_proportional"; - break; - default: - name = "unknown"; - break; - } - auto id = fmt::format("{}({})", name, static_cast(c)); - - return formatter::format(id, ctx); -} - - -auto fmt::formatter::format(PITCH_TYPE c, format_context &ctx) const - -> decltype(ctx.out()) { - const char *name; - // enum PITCH_TYPE: - switch (c) { - case PITCH_TYPE::PITCH_DUNNO: - name = "insufficient_data"; - break; - case PITCH_TYPE::PITCH_DEF_FIXED: - name = "definitely_fixed"; - break; - case PITCH_TYPE::PITCH_MAYBE_FIXED: - name = "maybe_fixed"; - break; - case PITCH_TYPE::PITCH_DEF_PROP: - name = "definitely_proportional"; - break; - case PITCH_TYPE::PITCH_MAYBE_PROP: - name = "maybe_proportional"; - break; - case PITCH_TYPE::PITCH_CORR_FIXED: - name = "corrected_fixed"; - break; - case PITCH_TYPE::PITCH_CORR_PROP: - name = "corrected_proportional"; - break; - default: - name = "unknown"; - break; - } - auto id = fmt::format("{}({})", name, static_cast(c)); - - return formatter::format(id, ctx); -} - - -auto fmt::formatter::format(PITCH_TYPE c, format_context &ctx) const - -> decltype(ctx.out()) { - const char *name; - // enum PITCH_TYPE: - switch (c) { - case PITCH_TYPE::PITCH_DUNNO: - name = "insufficient_data"; - break; - case PITCH_TYPE::PITCH_DEF_FIXED: - name = "definitely_fixed"; - break; - case PITCH_TYPE::PITCH_MAYBE_FIXED: - name = "maybe_fixed"; - break; - case PITCH_TYPE::PITCH_DEF_PROP: - name = "definitely_proportional"; - break; - case PITCH_TYPE::PITCH_MAYBE_PROP: - name = "maybe_proportional"; - break; - case PITCH_TYPE::PITCH_CORR_FIXED: - name = "corrected_fixed"; - break; - case PITCH_TYPE::PITCH_CORR_PROP: - name = "corrected_proportional"; - break; - default: - name = "unknown"; - break; - } - auto id = fmt::format("{}({})", name, static_cast(c)); - - return formatter::format(id, ctx); -} - -#endif - } // namespace fmt From e4add8d12a0d6486906eba2c32d6bb3805981872 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Wed, 24 Jul 2024 00:30:31 +0200 Subject: [PATCH 32/66] disabled code in #if 0..#endif wrappers instead plonking extra comment markers around such blocks: the preprocessor tactic is safe vs. "nested comments" problems with the latter approach. --- src/classify/adaptmatch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/classify/adaptmatch.cpp b/src/classify/adaptmatch.cpp index 657c6fca2b..fcc8b2c17e 100644 --- a/src/classify/adaptmatch.cpp +++ b/src/classify/adaptmatch.cpp @@ -345,7 +345,7 @@ void Classify::LearnWord(const char *fontname, WERD_RES *word) { // TODO(rays): re-enable this part of the code when we switch to the // new classifier that needs to see examples of garbage. - /* +#if 0 if (word->best_state[ch] > 1) { // If the next blob is good, make junk with the rightmost fragment. if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) { @@ -368,7 +368,7 @@ if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) { word->best_state[ch] + word->best_state[ch + 1], threshold, CST_NGRAM, joined_text.c_str(), word); } -*/ +#endif } start_blob += word->best_state[ch]; } From d1a075cf673e909e323a0e2c2a2e1cef5aa901a6 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Wed, 24 Jul 2024 00:31:27 +0200 Subject: [PATCH 33/66] - UniformCertainties() returns bool, not int. - improved legibility of a few debug/diagnostics messages produced by tesseract. --- src/dict/dict.h | 2 +- src/dict/stopper.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dict/dict.h b/src/dict/dict.h index 8197243ee7..9345e08c15 100644 --- a/src/dict/dict.h +++ b/src/dict/dict.h @@ -309,7 +309,7 @@ class TESS_API Dict : public DictSettings { /// word (i.e. false will be returned in that case). The algorithm computes /// the mean and std deviation of the certainties in the word with the worst /// certainty thrown out. - int UniformCertainties(const WERD_CHOICE &word); + bool UniformCertainties(const WERD_CHOICE &word); /// Returns true if the given best_choice is good enough to stop. bool AcceptableChoice(const WERD_CHOICE &best_choice, XHeightConsistencyEnum xheight_consistency); /// Returns false if the best choice for the current word is questionable diff --git a/src/dict/stopper.cpp b/src/dict/stopper.cpp index d2585d4609..3b2f1d90c7 100644 --- a/src/dict/stopper.cpp +++ b/src/dict/stopper.cpp @@ -229,7 +229,7 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, DANGERR *fixpt, bool fix_r UnicharIdArrayUtils::print(wrong_ngram, getUnicharset()); tprintDebug("Current ngram from spec: "); UnicharIdArrayUtils::print(ambig_spec->wrong_ngram, getUnicharset()); - tprintDebug("Comparison result: {}\n", compare); + tprintDebug("Ambiguity comparison result: {}{}\n", compare, (compare == 0 ? " (we found an ambiguity)" : "")); } if (compare == 0) { // Record the place where we found an ambiguity. @@ -239,7 +239,7 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, DANGERR *fixpt, bool fix_r getUnicharset().get_isngram(ambig_spec->correct_ngram_id), leftmost_id)); if (stopper_debug_level > 1) { - tprintDebug("fixpt+=(blob_index:{} index+num_wrong_blobs:{} isngram:{} leftmost_id:`{}`)\n", blob_index, blob_index + num_wrong_blobs, + tprintDebug("fixpt+=(blob_index:{} index+num_wrong_blobs:{} replace:{} isngram:{} leftmost_id:`{}`)\n", blob_index, blob_index + num_wrong_blobs, replace, getUnicharset().get_isngram(ambig_spec->correct_ngram_id), getUnicharset().id_to_unichar(leftmost_id)); } @@ -466,7 +466,7 @@ int Dict::LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice) const { return shortest; } -int Dict::UniformCertainties(const WERD_CHOICE &word) { +bool Dict::UniformCertainties(const WERD_CHOICE &word) { float Certainty; float WorstCertainty = FLT_MAX; float CertaintyThreshold; From bc0199ee5e96a2c368ef5bbfdf49f1d0700f4b75 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Wed, 24 Jul 2024 00:32:21 +0200 Subject: [PATCH 34/66] safer inplementation of the debug_level_offset-based debug level temporary bumping. --- src/textord/baselinedetect.cpp | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/src/textord/baselinedetect.cpp b/src/textord/baselinedetect.cpp index cd695cf408..1f6d575fbf 100644 --- a/src/textord/baselinedetect.cpp +++ b/src/textord/baselinedetect.cpp @@ -194,9 +194,10 @@ bool BaselineRow::FitBaseline(bool use_box_bottoms) { } } int debug_level_offset = 0; + int old_level = debug_baseline_detector_level; if (debug_baseline_detector_level + is_within_enhanced_debug_y_coord_range(bounding_box_) > 1) { debug_level_offset = 2; - debug_baseline_detector_level = debug_baseline_detector_level + debug_level_offset; + debug_baseline_detector_level = old_level + debug_level_offset; } // Now we obtained a direction from that fit, see if we can improve the @@ -221,7 +222,7 @@ bool BaselineRow::FitBaseline(bool use_box_bottoms) { good_baseline_ = false; } - debug_baseline_detector_level = debug_baseline_detector_level - debug_level_offset; + debug_baseline_detector_level = old_level; return good_baseline_; } @@ -235,14 +236,15 @@ void BaselineRow::AdjustBaselineToParallel(const FCOORD &direction) { } int debug_level_offset = 0; + int old_level = debug_baseline_detector_level; if (debug_baseline_detector_level + is_within_enhanced_debug_y_coord_range(bounding_box_) > 1) { debug_level_offset = 2; - debug_baseline_detector_level = debug_baseline_detector_level + debug_level_offset; + debug_baseline_detector_level = old_level + debug_level_offset; } FitConstrainedIfBetter(direction, 0.0, displacement_modes_[0]); - debug_baseline_detector_level = debug_baseline_detector_level - debug_level_offset; + debug_baseline_detector_level = old_level; } // Modifies the baseline to snap to the textline grid if the existing @@ -315,26 +317,28 @@ void BaselineRow::SetupBlobDisplacements(const FCOORD &direction) { BLOBNBOX *blob = blob_it.data(); const TBOX &box = blob->bounding_box(); - int debug_level_offset = 0; - if (debug_baseline_detector_level + is_within_enhanced_debug_y_coord_range(box) > 1) { - debug_level_offset = 2; - debug_baseline_detector_level = debug_baseline_detector_level + debug_level_offset; - } + int debug_level_offset = 0; + int old_level = debug_baseline_detector_level; + if (debug_baseline_detector_level + is_within_enhanced_debug_y_coord_range(box) > 1) { + debug_level_offset = 2; + debug_baseline_detector_level = old_level + debug_level_offset; + } - FCOORD blob_pos((box.left() + box.right()) / 2.0f, + FCOORD blob_pos((box.left() + box.right()) / 2.0f, blob->baseline_position()); double offset = direction * blob_pos; perp_blob_dists.push_back(offset); - if (debug_baseline_detector_level > 0) { + if (debug_baseline_detector_level > 0) { tprintDebug("Displacement {} for blob at:", offset); box.print(); } - UpdateRange(offset, &min_dist, &max_dist); + UpdateRange(offset, &min_dist, &max_dist); - debug_baseline_detector_level = debug_baseline_detector_level - debug_level_offset; + debug_baseline_detector_level = old_level; } + // Set up a histogram using disp_quant_factor_ as the bucket size. STATS dist_stats(IntCastRounded(min_dist / disp_quant_factor_), IntCastRounded(max_dist / disp_quant_factor_)); @@ -347,7 +351,7 @@ void BaselineRow::SetupBlobDisplacements(const FCOORD &direction) { if (debug_baseline_detector_level > 0) { for (int i = 0; i < scaled_modes.size(); ++i) { tprintDebug("Top mode = {} * {}\n", scaled_modes[i].key() * disp_quant_factor_, - scaled_modes[i].data()); + scaled_modes[i].data()); } } From 9a777b587df75b7965b01168b3ea18c5efb91d8d Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Wed, 24 Jul 2024 00:33:44 +0200 Subject: [PATCH 35/66] fix iterator coding slip-up for the inner loop of ColPartitionSet::UnmatchedWidth(): surely the intent was to use the inner iterator as otherwise you'ld be chewing loop-invariant blocks every round. Picked up from dev/master branch; originally this was commit commit 0082dae6f1c767ecf5f148124e59233a2d13be8f (HEAD), Author: Balearica , Date: Sat Apr 20 12:06:55 2024 -0700 :: Fixed column set quality calculation. --- src/textord/colpartitionset.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/textord/colpartitionset.cpp b/src/textord/colpartitionset.cpp index b539ff5f58..5f7c6d8ca1 100644 --- a/src/textord/colpartitionset.cpp +++ b/src/textord/colpartitionset.cpp @@ -323,7 +323,7 @@ int ColPartitionSet::UnmatchedWidth(ColPartitionSet *part_set) { int y = part->MidY(); BLOBNBOX_C_IT box_it(part->boxes()); for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) { - const TBOX &box = it.data()->bounding_box(); + const TBOX &box = box_it.data()->bounding_box(); // Assume that the whole blob is outside any column iff its x-middle // is outside. int x = (box.left() + box.right()) / 2; From 123a6e86cac48d733ffbbb0a26a6a4c03aea5ffe Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Wed, 24 Jul 2024 01:30:37 +0200 Subject: [PATCH 36/66] Report timeout/cancel events triggered by the monitor as ERRORS instead of WARNINGS: after all, these events abort major parts of the process or the OCR process entirely. --- src/api/baseapi.cpp | 4 ++-- src/ccmain/control.cpp | 6 +++--- src/ccmain/fixspace.cpp | 4 ++-- src/ccmain/tfacepp.cpp | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index f7ec2bf047..b29d322acb 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -601,7 +601,7 @@ int TessBaseAPI::InitFullWithReader(const char *data, int data_size, const char #endif // !DISABLED_LEGACY_ENGINE if (Monitor().kick_watchdog_and_check_for_cancel()) { - tprintWarn("Timeout/cancel: abort the tesseract initialization stage.\n"); + tprintError("Timeout/cancel: abort the tesseract initialization stage.\n"); return -1; } @@ -2729,7 +2729,7 @@ bool TessBaseAPI::Threshold(Pix **pix) { } if (Monitor().bump_progress().exec_progress_func().kick_watchdog_and_check_for_cancel()) { - tprintWarn("Timeout/cancel: abort the image threshold preprocessing stage.\n"); + tprintError("Timeout/cancel: abort the image threshold preprocessing stage.\n"); return false; } diff --git a/src/ccmain/control.cpp b/src/ccmain/control.cpp index 35f976435d..cc39f4758f 100644 --- a/src/ccmain/control.cpp +++ b/src/ccmain/control.cpp @@ -472,7 +472,7 @@ bool Tesseract::RecogAllWordsPassN(int pass_n, PAGE_RES_IT *pr_it, std::vectorsize()) .exec_progress_func(pr_it->word()->word->bounding_box()) .kick_watchdog_and_check_for_cancel(words->size())) { - tprintWarn("Timeout/cancel: fake out the rest of the words. {}/{} words processed.\n", w, words->size()); + tprintError("Timeout/cancel: fake out the rest of the words. {}/{} words processed.\n", w, words->size()); for (; w < words->size(); ++w) { (*words)[w].word->SetupFake(unicharset_); } @@ -715,7 +715,7 @@ bool Tesseract::recog_all_words(PAGE_RES *page_res, } if (owner_.Monitor().bump_progress().exec_progress_func().kick_watchdog_and_check_for_cancel(stats_.word_count)) { - tprintWarn("Timeout/cancel: signaled but we're not doing anything as we're at the end of the session already anyway. {} words processed.\n", stats_.word_count); + tprintError("Timeout/cancel: signaled but we're not doing anything as we're at the end of the session already anyway. {} words processed.\n", stats_.word_count); //return false; } return true; @@ -877,7 +877,7 @@ void Tesseract::rejection_passes(PAGE_RES *page_res, const TBOX *target_word_box if (owner_.Monitor().bump_progress(word_index, stats_.word_count) .exec_progress_func(target_word_box != nullptr ? target_word_box : nullptr) .kick_watchdog_and_check_for_cancel(stats_.word_count)) { - tprintWarn("Timeout/cancel: aborting the rejection pass. {}/{} words processed.\n", word_index / stats_.word_count); + tprintError("Timeout/cancel: aborting the rejection pass. {}/{} words processed.\n", word_index / stats_.word_count); return; } if (word->rebuild_word == nullptr) { diff --git a/src/ccmain/fixspace.cpp b/src/ccmain/fixspace.cpp index 263c11bd80..b90963b597 100644 --- a/src/ccmain/fixspace.cpp +++ b/src/ccmain/fixspace.cpp @@ -111,7 +111,7 @@ void Tesseract::fix_fuzzy_spaces(int32_t word_count, PAGE_RES *page_res) { if (owner_.Monitor().bump_progress(word_index, word_count). exec_progress_func() .kick_watchdog_and_check_for_cancel(stats_.dict_words)) { - tprintWarn("Timeout/cancel: abort the fuzzy space cleanup action. {}/{} words processed.\n", word_index, word_count); + tprintError("Timeout/cancel: abort the fuzzy space cleanup action. {}/{} words processed.\n", word_index, word_count); return; } } @@ -134,7 +134,7 @@ void Tesseract::fix_fuzzy_spaces(int32_t word_count, PAGE_RES *page_res) { if (owner_.Monitor().bump_progress(word_index, word_count) .exec_progress_func() .kick_watchdog_and_check_for_cancel(stats_.dict_words)) { - tprintWarn("Timeout/cancel: abort the fuzzy space cleanup action. {}/{} words processed.\n", word_index, word_count); + tprintError("Timeout/cancel: abort the fuzzy space cleanup action. {}/{} words processed.\n", word_index, word_count); return; } while (!word_res_it_to.at_last() && diff --git a/src/ccmain/tfacepp.cpp b/src/ccmain/tfacepp.cpp index 363fab2193..62749450f4 100644 --- a/src/ccmain/tfacepp.cpp +++ b/src/ccmain/tfacepp.cpp @@ -134,7 +134,7 @@ void Tesseract::recog_word_recursive(WERD_RES *word, int call_depth) { // deadline that's expired, not just the entire session's -- this is us anticipating tesseract core readying for batch processing in a single session. recog_word_recursion_depth_limit.set_value(-recog_word_recursion_depth_limit.value()); - tprintInfo("recog_word_recursive call depth is restricted by CANCEL SIGNAL at level {} --> peak.EMA: {}, word length: {}\n", call_depth, depth_ema, word_length); + tprintError("Timeout/cancel: recog_word_recursive call depth is restricted by CANCEL SIGNAL at level {} --> peak.EMA: {}, word length: {}\n", call_depth, depth_ema, word_length); // set word as faked/failed and call it a day. word->SetupFake(*word->uch_set); From f11442cb2f7401cb92aba6124dd0875fd3bb7ea8 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Wed, 24 Jul 2024 00:24:26 +0200 Subject: [PATCH 37/66] TessBaseAPI::ClearResults : when clearing results, also make sure to clear the OSD (orientation & scale detect) results. --- src/api/baseapi.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index b29d322acb..c37f1074c9 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -2972,6 +2972,11 @@ void TessBaseAPI::ClearResults() { if (tesseract_ != nullptr) { tesseract_->Clear(); } + if (osd_tesseract_ != nullptr) { + if (osd_tesseract_ != tesseract_) { + osd_tesseract_->Clear(); + } + } delete page_res_; page_res_ = nullptr; recognition_done_ = false; From 20e445684b58b8f1a7365c40ab18490438ddb408 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Thu, 25 Jul 2024 23:30:01 +0200 Subject: [PATCH 38/66] - remove superfluous BidiDebug() method: everywhere else in the tesseract code the debug settings are checked directly so do the same here: consistent coding idiom. - removing obsoleted/superfluous methods. --- include/tesseract/baseapi.h | 27 --------------------------- include/tesseract/resultiterator.h | 3 --- src/ccmain/pageiterator.cpp | 1 + src/ccmain/resultiterator.cpp | 15 +++++---------- 4 files changed, 6 insertions(+), 40 deletions(-) diff --git a/include/tesseract/baseapi.h b/include/tesseract/baseapi.h index 3c74c531d2..5248890617 100644 --- a/include/tesseract/baseapi.h +++ b/include/tesseract/baseapi.h @@ -1032,33 +1032,6 @@ std::string HOcrEscape(const char *text); */ std::string mkUniqueOutputFilePath(const char *basepath, int page_number, const char *label, const char *filename_extension); -/** - * Helper function around leptonica's `pixWrite()` which writes the given `pic` image to file, in the `file_type` format. - * - * The `file_type` format is defined in leptonica's `imageio.h`. Here's an (possibly incomplete) extract: - * - * - IFF_BMP = 1 (Windows BMP) - * - IFF_JFIF_JPEG = 2 (regular JPEG, default quality 75%) - * - IFF_PNG = 3 (PNG, lossless) - * - IFF_TIFF = 4 (TIFF) - * - IFF_TIFF_PACKBITS = 5 (TIFF, lossless) - * - IFF_TIFF_RLE = 6 (TIFF, lossless) - * - IFF_TIFF_G3 = 7 (TIFF, lossless) - * - IFF_TIFF_G4 = 8 (TIFF, lossless) - * - IFF_TIFF_LZW = 9 (TIFF, lossless) - * - IFF_TIFF_ZIP = 10 (TIFF, lossless) - * - IFF_PNM = 11 (PNM) - * - IFF_PS = 12 (PS: PostScript) - * - IFF_GIF = 13 (GIF) - * - IFF_JP2 = 14 (JP2 - * - IFF_WEBP = 15 (WebP) - * - IFF_LPDF = 16 (LDPF) - * - IFF_TIFF_JPEG = 17 (JPEG embedded in TIFF) - * - IFF_DEFAULT = 18 (The IFF_DEFAULT flag is used to write the file out in the same (input) file format that the pix was read from. If the pix was not read from file, the input format field will be IFF_UNKNOWN and the output file format will be chosen to be compressed and lossless; namely: IFF_TIFF_G4 for depth = 1 bit and IFF_PNG for everything else.) - * - IFF_SPIX = 19 (SPIX: serialized PIX, a leptonica-specific file format) - */ -void WritePix(const std::string &filepath, Pix *pic, int file_type); - } // namespace tesseract #endif // TESSERACT_API_BASEAPI_H_ diff --git a/include/tesseract/resultiterator.h b/include/tesseract/resultiterator.h index a8b3c07752..a040703006 100644 --- a/include/tesseract/resultiterator.h +++ b/include/tesseract/resultiterator.h @@ -224,9 +224,6 @@ class TESS_API ResultIterator : public LTRResultIterator { */ void AppendUTF8ParagraphText(std::string *text) const; - /** Returns whether the bidi_debug flag is set to at least min_level. */ - bool BidiDebug(int min_level) const; - bool current_paragraph_is_ltr_; /** diff --git a/src/ccmain/pageiterator.cpp b/src/ccmain/pageiterator.cpp index 62c99b1e37..8e40d6e92a 100644 --- a/src/ccmain/pageiterator.cpp +++ b/src/ccmain/pageiterator.cpp @@ -80,6 +80,7 @@ PageIterator::PageIterator(const PageIterator &src) } const PageIterator &PageIterator::operator=(const PageIterator &src) { + ASSERT_HOST_MSG(tesseract_ != src.tesseract_, "Software coding error: you are trying or assign/copy PageIterator instances which were created referencing different Tesseract instances.\n"); if (this != &src) { page_res_ = src.page_res_; tesseract_ = src.tesseract_; diff --git a/src/ccmain/resultiterator.cpp b/src/ccmain/resultiterator.cpp index 4aef0fd109..804d24372c 100644 --- a/src/ccmain/resultiterator.cpp +++ b/src/ccmain/resultiterator.cpp @@ -537,7 +537,7 @@ bool ResultIterator::Next(PageIteratorLevel level) { } at_beginning_of_minor_run_ = (word_indices[j - 1] == kMinorRunStart); // awesome, we move to word_indices[j] - if (BidiDebug(3)) { + if (tesseract_->bidi_debug >= 3) { tprintDebug("Next(RIL_WORD): {} -> {}\n", this_word_index, word_indices[j]); } PageIterator::RestartRow(); @@ -548,7 +548,7 @@ bool ResultIterator::Next(PageIteratorLevel level) { return true; } } - if (BidiDebug(3)) { + if (tesseract_->bidi_debug >= 3) { tprintDebug("Next(RIL_WORD): {} -> EOL\n", this_word_index); } // we're going off the end of the text line. @@ -721,7 +721,7 @@ void ResultIterator::IterateAndAppendUTF8TextlineText(std::string *text) { Next(RIL_WORD); return; } - if (BidiDebug(1)) { + if (tesseract_->bidi_debug >= 1) { std::vector textline_order; std::vector dirs; CalculateTextlineOrder(current_paragraph_is_ltr_, *this, &dirs, &textline_order); @@ -746,11 +746,11 @@ void ResultIterator::IterateAndAppendUTF8TextlineText(std::string *text) { } AppendUTF8WordText(text); words_appended++; - if (BidiDebug(2)) { + if (tesseract_->bidi_debug >= 2) { tprintDebug("Num spaces={}, text={}\n", numSpaces, *text); } } while (Next(RIL_WORD) && !IsAtBeginningOf(RIL_TEXTLINE)); - if (BidiDebug(1)) { + if (tesseract_->bidi_debug >= 1) { tprintDebug("{} words printed\n", words_appended); } *text += line_separator_; @@ -772,9 +772,4 @@ void ResultIterator::AppendUTF8ParagraphText(std::string *text) const { } while (it.it_->block() != nullptr && !it.IsAtBeginningOf(RIL_PARA)); } -bool ResultIterator::BidiDebug(int min_level) const { - const int debug_level = tesseract_->bidi_debug; - return debug_level >= min_level; -} - } // namespace tesseract. From 3add19dd29aebcbbf48b3c3bb38b0868271d54f8 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Fri, 26 Jul 2024 01:17:10 +0200 Subject: [PATCH 39/66] dealing with several PVS Studio reported issues: // V1003 The macro 'SetForProto' is a dangerous expression. The parameter 'P' must be surrounded by parentheses. intproto.h 146 // V522 Dereferencing of the null pointer 'osd_tess' might take place. baseapi.cpp 3017 // V522 There might be dereferencing of a potential null pointer 'current'. elst.h 579 // V522 There might be dereferencing of a potential null pointer 'current'. elst2.h 568 // V522 There might be dereferencing of a potential null pointer 'current->next'. elst2.h 522 // V522 There might be dereferencing of a potential null pointer 'it'. baseapi.cpp 2233 // V522 There might be dereferencing of a potential null pointer 'prev->next'. elst2.h 531 // V522 There might be dereferencing of a potential null pointer 'prev->next'. elst2.h 572 // V522 There might be dereferencing of a potential null pointer 'renderer'. baseapi.cpp 1530 // V522 There might be dereferencing of a potential null pointer 'truth_text'. baseapi.cpp 1039 // V550 An odd precise comparison: classify_rotation.y() != 0.0f. It's probably better to use a comparison with defined precision: fabs(A - B) > Epsilon. baseapi.cpp 3198 // V595 The 'renderer' pointer was utilized before it was verified against nullptr. Check lines: 1530, 1545. baseapi.cpp 1530 // V601 The 'false' value is implicitly cast to the integer type. dawg.h 200 // V636 The 'wi0[num_in] * 127i8' expression was implicitly cast from 'int' type to 'float' type. Consider utilizing an explicit type cast to avoid overflow. An example: double A = (double)(X) * Y;. intsimdmatrix.cpp 103 // V690 The 'REJ' class implements a copy constructor, but lacks the copy assignment operator. It is dangerous to use such a class. rejctmap.h 101 // V729 Function body contains the 'word_end' label that is not used by any 'goto' statements. hocrrenderer.cpp 454 // V730 Not all members of a class are initialized inside the constructor. Consider inspecting: prev, current, next, cycle_pt, ex_current_was_last, ex_current_was_cycle_pt, ... elst.h 204 # Conflicts: # src/api/baseapi.cpp --- include/tesseract/pageiterator.h | 2 +- src/api/baseapi.cpp | 60 +++++++++++++++++++------------- src/api/hocrrenderer.cpp | 2 +- src/arch/intsimdmatrix.cpp | 23 ++++++------ src/ccmain/control.cpp | 4 +-- src/ccmain/equationdetect.cpp | 4 +-- src/ccmain/tesseractclass.cpp | 2 +- src/ccstruct/matrix.h | 10 ++++-- src/ccstruct/pageres.h | 20 +++++------ src/ccstruct/rejctmap.h | 13 ++++--- src/ccutil/elst.h | 24 ++++++++----- src/ccutil/elst2.h | 24 +++++++++++++ src/classify/classify.h | 1 + src/classify/intproto.h | 11 +++--- src/cutil/bitvec.h | 6 ++-- src/dict/dawg.h | 3 +- src/textord/makerow.cpp | 2 +- 17 files changed, 135 insertions(+), 76 deletions(-) diff --git a/include/tesseract/pageiterator.h b/include/tesseract/pageiterator.h index 6873971505..7330861707 100644 --- a/include/tesseract/pageiterator.h +++ b/include/tesseract/pageiterator.h @@ -334,7 +334,7 @@ class TESS_API PageIterator { PAGE_RES_IT *it_; /** * The current input WERD being iterated. If there is an output from OCR, - * then word_ is nullptr. Owned by the API + * then word_ is nullptr. Owned by the API. */ WERD *word_; /** The length of the current word_. */ diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index c37f1074c9..9716ab3639 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -1022,7 +1022,8 @@ bool TessBaseAPI::WriteLSTMFLineData(const char *name, const char *path, return false; } // Check if truth_text exists - if ((truth_text != NULL) && (truth_text[0] == '\0') || + // V522 There might be dereferencing of a potential null pointer 'truth_text'. baseapi.cpp 1039 + if ((truth_text == NULL) || (truth_text[0] == '\0') || (truth_text[0] == '\n')) { tprintError("Ground truth text is empty or starts with newline.\n"); return false; @@ -1498,7 +1499,11 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, SetImage(newpix); r = r && !Recognize(); - renderer->AddImage(this); + // V522 There might be dereferencing of a potential null pointer 'renderer'. baseapi.cpp 1530 + // V595 The 'renderer' pointer was utilized before it was verified against nullptr. Check lines: 1530, 1545. baseapi.cpp 1530 + if (renderer) { + renderer->AddImage(this); + } boxaDestroy(&default_boxes); } @@ -2169,27 +2174,30 @@ char *TessBaseAPI::GetBoxText(int page_number) { result[0] = '\0'; int output_length = 0; LTRResultIterator *it = GetLTRIterator(); - do { - int left, top, right, bottom; - if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) { - const std::unique_ptr text(it->GetUTF8Text(RIL_SYMBOL)); - // Tesseract uses space for recognition failure. Fix to a reject - // character, kTesseractReject so we don't create illegal box files. - for (int i = 0; text[i] != '\0'; ++i) { - if (text[i] == ' ') { - text[i] = kTesseractReject; + // V522 There might be dereferencing of a potential null pointer 'it'. baseapi.cpp 2233 + if (it != nullptr) { + do { + int left, top, right, bottom; + if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) { + const std::unique_ptr text(it->GetUTF8Text(RIL_SYMBOL)); + // Tesseract uses space for recognition failure. Fix to a reject + // character, kTesseractReject so we don't create illegal box files. + for (int i = 0; text[i] != '\0'; ++i) { + if (text[i] == ' ') { + text[i] = kTesseractReject; + } + } + snprintf(result + output_length, total_length - output_length, "%s %d %d %d %d %d\n", + text.get(), left, image_height_ - bottom, right, image_height_ - top, page_number); + output_length += strlen(result + output_length); + // Just in case... + if (output_length + kMaxBytesPerLine > total_length) { + break; } } - snprintf(result + output_length, total_length - output_length, "%s %d %d %d %d %d\n", - text.get(), left, image_height_ - bottom, right, image_height_ - top, page_number); - output_length += strlen(result + output_length); - // Just in case... - if (output_length + kMaxBytesPerLine > total_length) { - break; - } - } - } while (it->Next(RIL_SYMBOL)); - delete it; + } while (it->Next(RIL_SYMBOL)); + delete it; + } return result; } @@ -2935,10 +2943,13 @@ int TessBaseAPI::FindLines() { " but data path is undefined\n"); delete osd_tesseract_; osd_tesseract_ = nullptr; - } else if (osd_tesseract_->init_tesseract(datapath_, "", "osd", OEM_TESSERACT_ONLY, + // V522 Dereferencing of the null pointer 'osd_tess' might take place. baseapi.cpp 3017 + } else if (osd_tesseract_ != nullptr && osd_tesseract_->init_tesseract(datapath_, "", "osd", OEM_TESSERACT_ONLY, nullptr, 0, nullptr, nullptr, false, &mgr) == 0) { osd_tess = osd_tesseract_; - osd_tesseract_->set_source_resolution(thresholder_->GetSourceYResolution()); + ASSERT0(osd_tess != nullptr); + ASSERT0(thresholder_ != nullptr); + osd_tess->set_source_resolution(thresholder_->GetSourceYResolution()); } else { tprintWarn("Auto orientation and script detection requested," " but osd language failed to load\n"); @@ -3113,7 +3124,8 @@ void TessBaseAPI::GetBlockTextOrientations(int **block_orientation, bool **verti (*block_orientation)[i] = num_rotations; // The classify_rotation is non-zero only if the text has vertical // writing direction. - (*vertical_writing)[i] = (classify_rotation.y() != 0.0f); + // V550 An odd precise comparison: classify_rotation.y() != 0.0f. It's probably better to use a comparison with defined precision: fabs(A - B) > Epsilon. baseapi.cpp 3198 + (*vertical_writing)[i] = (fabs(classify_rotation.y()) > FLT_EPSILON); ++i; } } diff --git a/src/api/hocrrenderer.cpp b/src/api/hocrrenderer.cpp index 0d07098d24..cb11390292 100644 --- a/src/api/hocrrenderer.cpp +++ b/src/api/hocrrenderer.cpp @@ -451,7 +451,7 @@ char *TessBaseAPI::GetHOCRText(int page_number) { bcnt++; } } -word_end: + // V729 Function body contains the 'word_end' label that is not used by any 'goto' statements. hocrrenderer.cpp 454 hocr_str << " \n"; return copy_string(hocr_str.str()); diff --git a/src/arch/intsimdmatrix.cpp b/src/arch/intsimdmatrix.cpp index fb87bde5d0..32360fc988 100644 --- a/src/arch/intsimdmatrix.cpp +++ b/src/arch/intsimdmatrix.cpp @@ -94,16 +94,18 @@ void IntSimdMatrix::MatrixDotVector(const GENERIC_2D_ARRAY &w, int total2 = 0; int total3 = 0; for (int j = 0; j < num_in; ++j) { - total0 += wi0[j] * u[j]; - total1 += wi1[j] * u[j]; - total2 += wi2[j] * u[j]; - total3 += wi3[j] * u[j]; + int uj = u[j]; + total0 += wi0[j] * uj; + total1 += wi1[j] * uj; + total2 += wi2[j] * uj; + total3 += wi3[j] * uj; } // Add in the bias and correct for integer values. - v[i + 0] = (total0 + wi0[num_in] * INT8_MAX) * scales[i + 0]; - v[i + 1] = (total1 + wi1[num_in] * INT8_MAX) * scales[i + 1]; - v[i + 2] = (total2 + wi2[num_in] * INT8_MAX) * scales[i + 2]; - v[i + 3] = (total3 + wi3[num_in] * INT8_MAX) * scales[i + 3]; + // V636 The 'wi0[num_in] * 127i8' expression was implicitly cast from 'int' type to 'float' type. Consider utilizing an explicit type cast to avoid overflow. An example: double A = (double)(X) * Y;. intsimdmatrix.cpp 103 + v[i + 0] = (total0 + wi0[num_in] * int(INT8_MAX)) * scales[i + 0]; + v[i + 1] = (total1 + wi1[num_in] * int(INT8_MAX)) * scales[i + 1]; + v[i + 2] = (total2 + wi2[num_in] * int(INT8_MAX)) * scales[i + 2]; + v[i + 3] = (total3 + wi3[num_in] * int(INT8_MAX)) * scales[i + 3]; } // Capture the remainder mod four @@ -111,10 +113,11 @@ void IntSimdMatrix::MatrixDotVector(const GENERIC_2D_ARRAY &w, const int8_t *wi = w[i]; int total = 0; for (int j = 0; j < num_in; ++j) { - total += wi[j] * u[j]; + int uj = u[j]; + total += wi[j] * uj; } // Add in the bias and correct for integer values. - v[i] = (total + wi[num_in] * INT8_MAX) * scales[i]; + v[i] = (total + wi[num_in] * int(INT8_MAX)) * scales[i]; } } diff --git a/src/ccmain/control.cpp b/src/ccmain/control.cpp index cc39f4758f..ce00d9ff6c 100644 --- a/src/ccmain/control.cpp +++ b/src/ccmain/control.cpp @@ -1661,8 +1661,8 @@ void Tesseract::classify_word_pass1(const WordData &word_data, WERD_RES **in_wor PointerVector *out_words) { ROW *row = word_data.row; BLOCK *block = word_data.block; - prev_word_best_choice_ = - word_data.prev_word != nullptr ? word_data.prev_word->word->best_choice : nullptr; + ASSERT0(*in_word != nullptr); + prev_word_best_choice_ = (word_data.prev_word != nullptr ? word_data.prev_word->word->best_choice : nullptr); #if DISABLED_LEGACY_ENGINE if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) { #else diff --git a/src/ccmain/equationdetect.cpp b/src/ccmain/equationdetect.cpp index a7be77465a..f3a023dace 100644 --- a/src/ccmain/equationdetect.cpp +++ b/src/ccmain/equationdetect.cpp @@ -807,7 +807,7 @@ void EquationDetect::IdentifyInlinePartsHorizontal() { ColPartition *neighbor = nullptr; bool side_neighbor_found = false; while ((neighbor = search.NextSideSearch(right_to_left)) != nullptr) { - const TBOX &neighbor_box(neighbor->bounding_box()); + const TBOX &neighbor_box = neighbor->bounding_box(); if (!IsTextOrEquationType(neighbor->type()) || part_box.x_gap(neighbor_box) > kGapTh || !part_box.major_y_overlap(neighbor_box) || part_box.major_x_overlap(neighbor_box)) { continue; @@ -820,7 +820,7 @@ void EquationDetect::IdentifyInlinePartsHorizontal() { part->set_type(PT_INLINE_EQUATION); } else { // Check the geometric feature of neighbor. - const TBOX &neighbor_box(neighbor->bounding_box()); + const TBOX &neighbor_box = neighbor->bounding_box(); if (neighbor_box.width() > part_box.width() && neighbor->type() != PT_EQUATION) { // Mark as PT_INLINE_EQUATION. part->set_type(PT_INLINE_EQUATION); diff --git a/src/ccmain/tesseractclass.cpp b/src/ccmain/tesseractclass.cpp index 9cc465c14b..aa555debb0 100644 --- a/src/ccmain/tesseractclass.cpp +++ b/src/ccmain/tesseractclass.cpp @@ -1067,7 +1067,7 @@ void Tesseract::ResyncVariablesInternally() { // init sub-languages: for (auto &sub_tess : sub_langs_) { if (sub_tess != nullptr) { - auto lvl = (bool)sub_tess->debug_display_page; + auto lvl = bool(sub_tess->debug_display_page); } } } diff --git a/src/ccstruct/matrix.h b/src/ccstruct/matrix.h index a7b428d7f7..1483e1b5c1 100644 --- a/src/ccstruct/matrix.h +++ b/src/ccstruct/matrix.h @@ -264,7 +264,9 @@ class GENERIC_2D_ARRAY { // stored COLUMN-major, so the left-most index is the most significant. // This allows [][] access to use indices in the same order as (,). virtual int index(int column, int row) const { - return (column * dim2_ + row); + auto rv = column * dim2_ + row; + ASSERT_HOST(rv >= 0); + return rv; } // Put a list element into the matrix at a specific location. @@ -634,7 +636,9 @@ class BandTriMatrix : public GENERIC_2D_ARRAY { int index(int column, int row) const override { ASSERT_HOST(row >= column); ASSERT_HOST(row - column < this->dim2_); - return column * this->dim2_ + row - column; + auto rv = column * this->dim2_ + row - column; + ASSERT_HOST(rv >= 0); + return rv; } // Appends array2 corner-to-corner to *this, making an array of dimension @@ -671,7 +675,7 @@ class MATRIX : public BandTriMatrix { MATRIX(int dimension, int bandwidth) : BandTriMatrix(dimension, bandwidth, NOT_CLASSIFIED) {} - ~MATRIX() override; + virtual ~MATRIX() override; // Returns true if there are any real classification results. bool Classified(int col, int row, int wildcard_id) const; diff --git a/src/ccstruct/pageres.h b/src/ccstruct/pageres.h index 0e17dae438..b9789cb72b 100644 --- a/src/ccstruct/pageres.h +++ b/src/ccstruct/pageres.h @@ -685,7 +685,7 @@ class TESS_API WERD_RES : public ELIST_LINK { class TESS_API PAGE_RES_IT { public: - PAGE_RES *page_res; // page being iterated + PAGE_RES *page_res = nullptr; // page being iterated PAGE_RES_IT() = default; @@ -788,17 +788,17 @@ class TESS_API PAGE_RES_IT { private: WERD_RES *internal_forward(bool new_block, bool empty_ok); - WERD_RES *prev_word_res; // previous word - ROW_RES *prev_row_res; // row of prev word - BLOCK_RES *prev_block_res; // block of prev word + WERD_RES *prev_word_res = nullptr; // previous word + ROW_RES *prev_row_res = nullptr; // row of prev word + BLOCK_RES *prev_block_res = nullptr; // block of prev word - WERD_RES *word_res; // current word - ROW_RES *row_res; // row of current word - BLOCK_RES *block_res; // block of cur. word + WERD_RES *word_res = nullptr; // current word + ROW_RES *row_res = nullptr; // row of current word + BLOCK_RES *block_res = nullptr; // block of cur. word - WERD_RES *next_word_res; // next word - ROW_RES *next_row_res; // row of next word - BLOCK_RES *next_block_res; // block of next word + WERD_RES *next_word_res = nullptr; // next word + ROW_RES *next_row_res = nullptr; // row of next word + BLOCK_RES *next_block_res = nullptr; // block of next word BLOCK_RES_IT block_res_it; // iterators ROW_RES_IT row_res_it; diff --git a/src/ccstruct/rejctmap.h b/src/ccstruct/rejctmap.h index edb06243e4..a3aa4332e6 100644 --- a/src/ccstruct/rejctmap.h +++ b/src/ccstruct/rejctmap.h @@ -108,13 +108,18 @@ class REJ { public: REJ() = default; - REJ( // classwise copy - const REJ &source) { + // copy constructor + REJ(const REJ &source) { flags = source.flags; } - REJ &operator=( // assign REJ - const REJ &source) = default; + // V690 The 'REJ' class implements a copy constructor, but lacks the copy assignment operator. It is dangerous to use such a class. rejctmap.h 101 + REJ &operator=(const REJ &source) { + if (this != &source) { + flags = source.flags; + } + return *this; + } bool flag(REJ_FLAGS rej_flag) const { return flags[rej_flag]; diff --git a/src/ccutil/elst.h b/src/ccutil/elst.h index 8c87dabbd7..dc21e76fb3 100644 --- a/src/ccutil/elst.h +++ b/src/ccutil/elst.h @@ -112,6 +112,7 @@ class TESS_API ELIST { friend class ELIST_ITERATOR; ELIST_LINK *last = nullptr; // End of list + //(Points to head) ELIST_LINK *First() { // return first return last ? last->next : nullptr; @@ -188,14 +189,15 @@ class TESS_API ELIST { class TESS_API ELIST_ITERATOR { friend void ELIST::assign_to_sublist(ELIST_ITERATOR *, ELIST_ITERATOR *); - ELIST *list; // List being iterated - ELIST_LINK *prev; // prev element - ELIST_LINK *current; // current element - ELIST_LINK *next; // next element - ELIST_LINK *cycle_pt; // point we are cycling the list to. - bool ex_current_was_last; // current extracted was end of list - bool ex_current_was_cycle_pt; // current extracted was cycle point - bool started_cycling; // Have we moved off the start? + // V730 Not all members of a class are initialized inside the constructor. Consider inspecting: prev, current, next, cycle_pt, ex_current_was_last, ex_current_was_cycle_pt, ... elst.h 204 + ELIST *list = nullptr; // List being iterated + ELIST_LINK *prev = nullptr; // prev element + ELIST_LINK *current = nullptr; // current element + ELIST_LINK *next = nullptr; // next element + ELIST_LINK *cycle_pt = nullptr; // point we are cycling the list to. + bool ex_current_was_last = false; // current extracted was end of list + bool ex_current_was_cycle_pt = false; // current extracted was cycle point + bool started_cycling = false; // Have we moved off the start? ELIST_LINK *extract_sublist( // from this current... ELIST_ITERATOR *other_it); // to other current @@ -576,6 +578,12 @@ inline void ELIST_ITERATOR::add_list_before(ELIST *list_to_add) { list->last = list_to_add->last; prev = list->last; current = list->First(); +#ifndef NDEBUG + // V522 There might be dereferencing of a potential null pointer 'current'. elst.h 579 + if (!current) { + BAD_PARAMETER.abort("ELIST_ITERATOR::add_list_before", "current is nullptr"); + } +#endif next = current->next; ex_current_was_last = false; } else { diff --git a/src/ccutil/elst2.h b/src/ccutil/elst2.h index 9afb27a003..1da1413e2f 100644 --- a/src/ccutil/elst2.h +++ b/src/ccutil/elst2.h @@ -519,6 +519,12 @@ inline void ELIST2_ITERATOR::add_list_after(ELIST2 *list_to_add) { } else { if (current) { // not extracted current->next = list_to_add->First(); +#ifndef NDEBUG + // V522 There might be dereferencing of a potential null pointer 'current->next'. elst2.h 522 + if (!current->next) { + BAD_PARAMETER.abort("ELIST2_ITERATOR::add_list_after", "current->next is nullptr"); + } +#endif current->next->prev = current; if (current == list->last) { list->last = list_to_add->last; @@ -528,6 +534,12 @@ inline void ELIST2_ITERATOR::add_list_after(ELIST2 *list_to_add) { next = current->next; } else { // current extracted prev->next = list_to_add->First(); +#ifndef NDEBUG + // V522 There might be dereferencing of a potential null pointer 'prev->next'. elst2.h 531 + if (!prev->next) { + BAD_PARAMETER.abort("ELIST2_ITERATOR::add_list_after", "prev->next is nullptr"); + } +#endif prev->next->prev = prev; if (ex_current_was_last) { list->last = list_to_add->last; @@ -565,10 +577,22 @@ inline void ELIST2_ITERATOR::add_list_before(ELIST2 *list_to_add) { list->last = list_to_add->last; prev = list->last; current = list->First(); +#ifndef NDEBUG + // V522 There might be dereferencing of a potential null pointer 'current'. elst2.h 568 + if (!current) { + BAD_PARAMETER.abort("ELIST2_ITERATOR::add_list_before", "current is nullptr"); + } +#endif next = current->next; ex_current_was_last = false; } else { prev->next = list_to_add->First(); +#ifndef NDEBUG + // V522 There might be dereferencing of a potential null pointer 'prev->next'. elst2.h 572 + if (!prev->next) { + BAD_PARAMETER.abort("ELIST2_ITERATOR::add_list_before", "prev->next is nullptr"); + } +#endif prev->next->prev = prev; if (current) { // not extracted diff --git a/src/classify/classify.h b/src/classify/classify.h index a6fd75e247..82bead4317 100644 --- a/src/classify/classify.h +++ b/src/classify/classify.h @@ -269,6 +269,7 @@ class TESS_API Classify : public CCStruct { return AdaptedTemplates->NumPermClasses == 0; } bool LooksLikeGarbage(TBLOB *blob); + #if !GRAPHICS_DISABLED void RefreshDebugWindow(ScrollViewReference &win, const char *msg, int y_offset, const TBOX &wbox); #endif diff --git a/src/classify/intproto.h b/src/classify/intproto.h index b38e095c13..7530284b35 100644 --- a/src/classify/intproto.h +++ b/src/classify/intproto.h @@ -142,15 +142,16 @@ enum IntmatcherDebugAction { IDA_ADAPTIVE, IDA_STATIC, IDA_SHAPE_INDEX, IDA_BOTH Macros ----------------------------------------------------------------------------**/ -#define MaxNumIntProtosIn(C) (C->NumProtoSets * PROTOS_PER_PROTO_SET) -#define SetForProto(P) (P / PROTOS_PER_PROTO_SET) -#define IndexForProto(P) (P % PROTOS_PER_PROTO_SET) -#define ProtoForProtoId(C, P) (&((C->ProtoSets[SetForProto(P)])->Protos[IndexForProto(P)])) +// V1003 The macro 'SetForProto' is a dangerous expression. The parameter 'P' must be surrounded by parentheses. intproto.h 146 +#define MaxNumIntProtosIn(C) ((C)->NumProtoSets * PROTOS_PER_PROTO_SET) +#define SetForProto(P) ((P) / PROTOS_PER_PROTO_SET) +#define IndexForProto(P) ((P) % PROTOS_PER_PROTO_SET) +#define ProtoForProtoId(C, P) (&(((C)->ProtoSets[SetForProto(P)])->Protos[IndexForProto(P)])) #define PPrunerWordIndexFor(I) (((I) % PROTOS_PER_PROTO_SET) / PROTOS_PER_PP_WERD) #define PPrunerBitIndexFor(I) ((I) % PROTOS_PER_PP_WERD) #define PPrunerMaskFor(I) (1 << PPrunerBitIndexFor(I)) -#define MaxNumClassesIn(T) (T->NumClassPruners * CLASSES_PER_CP) +#define MaxNumClassesIn(T) ((T)->NumClassPruners * CLASSES_PER_CP) #define LegalClassId(c) ((c) >= 0 && (c) < MAX_NUM_CLASSES) #define UnusedClassIdIn(T, c) ((T)->Class[c] == nullptr) #define ClassForClassId(T, c) ((T)->Class[c]) diff --git a/src/cutil/bitvec.h b/src/cutil/bitvec.h index 9178fb497e..7beaec3c9e 100644 --- a/src/cutil/bitvec.h +++ b/src/cutil/bitvec.h @@ -52,11 +52,11 @@ static inline void copy_all_bits(BIT_VECTOR source, BIT_VECTOR dest, size_t leng } } -#define SET_BIT(array, bit) (array[bit / BITSINLONG] |= 1 << (bit & (BITSINLONG - 1))) +#define SET_BIT(array, bit) (array[(bit) / BITSINLONG] |= 1 << ((bit) & (BITSINLONG - 1))) -#define reset_bit(array, bit) (array[bit / BITSINLONG] &= ~(1 << (bit & (BITSINLONG - 1)))) +#define reset_bit(array, bit) (array[(bit) / BITSINLONG] &= ~(1 << ((bit) & (BITSINLONG - 1)))) -#define test_bit(array, bit) (array[bit / BITSINLONG] & (1 << (bit & (BITSINLONG - 1)))) +#define test_bit(array, bit) (array[(bit) / BITSINLONG] & (1 << ((bit) & (BITSINLONG - 1)))) static inline size_t WordsInVectorOfSize(size_t NumBits) { return (NumBits + BITSINLONG - 1) / BITSINLONG; diff --git a/src/dict/dawg.h b/src/dict/dawg.h index 22bf9e037d..40755f5c32 100644 --- a/src/dict/dawg.h +++ b/src/dict/dawg.h @@ -197,7 +197,8 @@ class TESS_API Dawg { (void)edge_ref; (void)unichar_id; (void)word_end; - return false; + // V601 The 'false' value is implicitly cast to the integer type. dawg.h 200 + return 0; } protected: diff --git a/src/textord/makerow.cpp b/src/textord/makerow.cpp index 39639f79a8..d9aef8d32a 100644 --- a/src/textord/makerow.cpp +++ b/src/textord/makerow.cpp @@ -1605,7 +1605,7 @@ int32_t compute_row_descdrop(TO_ROW *row, float gradient, int xheight_blob_count if (static_cast(blob_count + num_potential_asc) < xheight_blob_count * total_fraction) { blob_count = 0; } - int descdrop = blob_count > 0 ? -blob_index : 0; + int descdrop = (blob_count > 0 ? -blob_index : 0); if (textord_debug_xheights) { tprintDebug("Descdrop: {} (potential ascenders {}, descenders {})\n", descdrop, num_potential_asc, blob_count); From 1cf409ef9f9827f754e0767ff1cf8abfb22bf120 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Thu, 25 Jul 2024 21:32:36 +0200 Subject: [PATCH 40/66] intentionally permanently ignoring a PVS Studio reported error: V1053 Calling the 'Clean' virtual function in the destructor may lead to unexpected result at runtime. plumbing.h 33 # Conflicts: # src/lstm/plumbing.h --- src/lstm/parallel.h | 1 + src/lstm/plumbing.h | 2 +- src/lstm/series.h | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/lstm/parallel.h b/src/lstm/parallel.h index 0d7ce094fb..e5f21d54c7 100644 --- a/src/lstm/parallel.h +++ b/src/lstm/parallel.h @@ -28,6 +28,7 @@ class Parallel : public Plumbing { // ni_ and no_ will be set by AddToStack. TESS_API Parallel(const std::string &name, NetworkType type); + virtual ~Parallel() override = default; // Returns the shape output from the network given an input shape (which may // be partially unknown ie zero). diff --git a/src/lstm/plumbing.h b/src/lstm/plumbing.h index 0b3442b661..b58a30bc00 100644 --- a/src/lstm/plumbing.h +++ b/src/lstm/plumbing.h @@ -29,7 +29,7 @@ class TESS_API Plumbing : public Network { public: // ni_ and no_ will be set by AddToStack. explicit Plumbing(const std::string &name); - ~Plumbing() override { + virtual ~Plumbing() override { for (auto data : stack_) { delete data; } diff --git a/src/lstm/series.h b/src/lstm/series.h index fc63f28414..7b65ac9443 100644 --- a/src/lstm/series.h +++ b/src/lstm/series.h @@ -28,7 +28,7 @@ class Series : public Plumbing { // ni_ and no_ will be set by AddToStack. TESS_API explicit Series(const std::string &name); - ~Series() override = default; + virtual ~Series() override = default; // Returns the shape output from the network given an input shape (which may // be partially unknown ie zero). From 4680e668495ae56fa6344d7e363668e81252fbe7 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Thu, 25 Jul 2024 21:34:02 +0200 Subject: [PATCH 41/66] fix PVS Studio error: V576 Incorrect format. Consider checking the fifth actual argument of the 'fprintf' function. Under certain conditions the pointer can be null. paramsd.cpp 345 --- src/ccmain/paramsd.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ccmain/paramsd.cpp b/src/ccmain/paramsd.cpp index 398e34b1bd..726f0f4c93 100644 --- a/src/ccmain/paramsd.cpp +++ b/src/ccmain/paramsd.cpp @@ -137,7 +137,8 @@ const char *ParamContent::GetDescription() const { } else if (param_type_ == VT_STRING) { return sIt->info_str(); } else { - return nullptr; + // V576 Incorrect format. Consider checking the fifth actual argument of the 'fprintf' function. Under certain conditions the pointer can be null. paramsd.cpp 345 + return "ERROR: ParamContent::GetDescription()"; } } From 0144b9cb0e71f0132e071347e48a77b844689970 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Tue, 23 Jul 2024 21:46:41 +0200 Subject: [PATCH 42/66] there does not exist no file_type ".bl" --> config line removed. --- tessdata/configs/box.train | 1 - tessdata/configs/box.train.stderr | 1 - tessdata/configs/lstm.train | 1 - 3 files changed, 3 deletions(-) diff --git a/tessdata/configs/box.train b/tessdata/configs/box.train index d39f2687ef..9f9707a74a 100644 --- a/tessdata/configs/box.train +++ b/tessdata/configs/box.train @@ -1,5 +1,4 @@ disable_character_fragments T -file_type .bl textord_fast_pitch_test T tessedit_zero_rejection T tessedit_minimal_rejection F diff --git a/tessdata/configs/box.train.stderr b/tessdata/configs/box.train.stderr index 82754e9cc9..08b1eba358 100644 --- a/tessdata/configs/box.train.stderr +++ b/tessdata/configs/box.train.stderr @@ -1,4 +1,3 @@ -file_type .bl #tessedit_use_nn F textord_fast_pitch_test T tessedit_zero_rejection T diff --git a/tessdata/configs/lstm.train b/tessdata/configs/lstm.train index 5ff3772621..c33d106d5e 100644 --- a/tessdata/configs/lstm.train +++ b/tessdata/configs/lstm.train @@ -1,4 +1,3 @@ -file_type .bl textord_fast_pitch_test T tessedit_zero_rejection T tessedit_minimal_rejection F From 2dfcf05a8c53fb5ba3f0b2334544c13d326c0b8d Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Wed, 24 Jul 2024 00:20:22 +0200 Subject: [PATCH 43/66] remove local class cache preserve_interword_spaces_ and access tesseract parameter preserve_interword_spaces everywhere directly. # Conflicts: # include/tesseract/resultiterator.h # Conflicts: # src/ccmain/resultiterator.cpp --- include/tesseract/resultiterator.h | 6 ------ src/ccmain/resultiterator.cpp | 10 +--------- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/include/tesseract/resultiterator.h b/include/tesseract/resultiterator.h index a0f287a2fa..a8b3c07752 100644 --- a/include/tesseract/resultiterator.h +++ b/include/tesseract/resultiterator.h @@ -237,12 +237,6 @@ class TESS_API ResultIterator : public LTRResultIterator { /** Is the currently pointed-at character in a minor-direction sequence? */ bool in_minor_direction_; - - /** - * Should detected inter-word spaces be preserved, or "compressed" to a single - * space character (default behavior). - */ - bool preserve_interword_spaces_; }; } // namespace tesseract. diff --git a/src/ccmain/resultiterator.cpp b/src/ccmain/resultiterator.cpp index f378b4b8f7..2531af8130 100644 --- a/src/ccmain/resultiterator.cpp +++ b/src/ccmain/resultiterator.cpp @@ -38,14 +38,6 @@ namespace tesseract { ResultIterator::ResultIterator(const LTRResultIterator &resit) : LTRResultIterator(resit) { in_minor_direction_ = false; at_beginning_of_minor_run_ = false; - preserve_interword_spaces_ = false; - - auto *p = ParamUtils::FindParam( - "preserve_interword_spaces", GlobalParams()->bool_params, tesseract_->params()->bool_params); - if (p != nullptr) { - preserve_interword_spaces_ = (bool)(*p); - } - current_paragraph_is_ltr_ = CurrentParagraphIsLtr(); MoveToLogicalStartOfTextline(); } @@ -745,7 +737,7 @@ void ResultIterator::IterateAndAppendUTF8TextlineText(std::string *text) { int words_appended = 0; do { - int numSpaces = preserve_interword_spaces_ ? it_->word()->word->space() : (words_appended > 0); + int numSpaces = tesseract_->preserve_interword_spaces ? it_->word()->word->space() : (words_appended > 0); for (int i = 0; i < numSpaces; ++i) { *text += " "; } From 4fede411bc40ec9c098ed21f46738dd2b4de0bc4 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Wed, 24 Jul 2024 00:24:44 +0200 Subject: [PATCH 44/66] WS --- src/ccstruct/blobbox.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/ccstruct/blobbox.h b/src/ccstruct/blobbox.h index 0d0f694ec6..09d9566fbb 100644 --- a/src/ccstruct/blobbox.h +++ b/src/ccstruct/blobbox.h @@ -68,7 +68,9 @@ enum TabType { }; // The possible region types of a BLOBNBOX. +// // Note: keep all the text types > BRT_UNKNOWN and all the image types less. +// // Keep in sync with kBlobTypes in colpartition.cpp and BoxColor, and the // *Type static functions below. enum BlobRegionType { @@ -85,6 +87,7 @@ enum BlobRegionType { }; // enum for elements of arrays that refer to neighbours. +// // NOTE: keep in this order, so ^2 can be used to flip direction. enum BlobNeighbourDir { BND_LEFT, BND_BELOW, BND_RIGHT, BND_ABOVE, BND_COUNT }; From 9b616706e73e8765bbe9f3fa8067741615c2edd6 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Wed, 24 Jul 2024 00:25:33 +0200 Subject: [PATCH 45/66] functions defined in headerfiles should be `static inline` instead of just `inline`. # Conflicts: # src/ccutil/genericvector.h --- src/ccutil/genericvector.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ccutil/genericvector.h b/src/ccutil/genericvector.h index 4a5bbe12d6..94dc941a8e 100644 --- a/src/ccutil/genericvector.h +++ b/src/ccutil/genericvector.h @@ -230,7 +230,7 @@ class GenericVector { // The default FileReader loads the whole file into the vector of char, // returning false on error. -inline bool LoadDataFromFile(const char *filename, GenericVector *data) { +static inline bool LoadDataFromFile(const char *filename, GenericVector *data) { bool result = false; FILE *fp = fopen(filename, "rb"); if (fp != nullptr) { @@ -251,7 +251,7 @@ inline bool LoadDataFromFile(const char *filename, GenericVector *data) { // The default FileWriter writes the vector of char to the filename file, // returning false on error. -inline bool SaveDataToFile(const GenericVector &data, const char *filename) { +static inline bool SaveDataToFile(const GenericVector &data, const char *filename) { FILE *fp = fopen(filename, "wb"); if (fp == nullptr) { return false; From b02646459bd9ae8d7cc6ca2e89bbcd402228c4c4 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Wed, 24 Jul 2024 00:26:52 +0200 Subject: [PATCH 46/66] FGets(): lways deliver the line/string read from file with NUL sentinel guaranteed. --- src/ccutil/serialis.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ccutil/serialis.cpp b/src/ccutil/serialis.cpp index d9c9a8d418..21c64c110e 100644 --- a/src/ccutil/serialis.cpp +++ b/src/ccutil/serialis.cpp @@ -201,9 +201,9 @@ char *TFile::FGets(char *buffer, int buffer_size) { break; } } - if (size < buffer_size) { - buffer[size] = '\0'; - } + ASSERT0(size < buffer_size); + buffer[size] = '\0'; + return size > 0 ? buffer : nullptr; } From 8410ccf4ba507b5d1d2278bc2c99c749b2343b74 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Wed, 24 Jul 2024 00:30:31 +0200 Subject: [PATCH 47/66] disabled code in #if 0..#endif wrappers instead plonking extra comment markers around such blocks: the preprocessor tactic is safe vs. "nested comments" problems with the latter approach. --- src/classify/adaptmatch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/classify/adaptmatch.cpp b/src/classify/adaptmatch.cpp index 5c02c6f363..1f56c2b920 100644 --- a/src/classify/adaptmatch.cpp +++ b/src/classify/adaptmatch.cpp @@ -343,7 +343,7 @@ void Classify::LearnWord(const char *fontname, WERD_RES *word) { // TODO(rays): re-enable this part of the code when we switch to the // new classifier that needs to see examples of garbage. - /* +#if 0 if (word->best_state[ch] > 1) { // If the next blob is good, make junk with the rightmost fragment. if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) { @@ -366,7 +366,7 @@ if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) { word->best_state[ch] + word->best_state[ch + 1], threshold, CST_NGRAM, joined_text.c_str(), word); } -*/ +#endif } start_blob += word->best_state[ch]; } From c061720f13d5d6bf547b7812dd196e182f2892c5 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Wed, 24 Jul 2024 00:31:27 +0200 Subject: [PATCH 48/66] - UniformCertainties() returns bool, not int. - improved legibility of a few debug/diagnostics messages produced by tesseract. # Conflicts: # src/dict/stopper.cpp --- src/dict/dict.h | 2 +- src/dict/stopper.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dict/dict.h b/src/dict/dict.h index 78e5712ee0..6889a2c0c9 100644 --- a/src/dict/dict.h +++ b/src/dict/dict.h @@ -254,7 +254,7 @@ class TESS_API Dict { /// word (i.e. false will be returned in that case). The algorithm computes /// the mean and std deviation of the certainties in the word with the worst /// certainty thrown out. - int UniformCertainties(const WERD_CHOICE &word); + bool UniformCertainties(const WERD_CHOICE &word); /// Returns true if the given best_choice is good enough to stop. bool AcceptableChoice(const WERD_CHOICE &best_choice, XHeightConsistencyEnum xheight_consistency); /// Returns false if the best choice for the current word is questionable diff --git a/src/dict/stopper.cpp b/src/dict/stopper.cpp index ccafa714c5..d4b6a2d066 100644 --- a/src/dict/stopper.cpp +++ b/src/dict/stopper.cpp @@ -225,7 +225,7 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, DANGERR *fixpt, bool fix_r UnicharIdArrayUtils::print(wrong_ngram, getUnicharset()); tprintf("current ngram from spec: "); UnicharIdArrayUtils::print(ambig_spec->wrong_ngram, getUnicharset()); - tprintf("comparison result: %d\n", compare); + tprintf("Ambiguity comparison result: %d%s\n", compare, (compare == 0 ? " (we found an ambiguity)" : "")); } if (compare == 0) { // Record the place where we found an ambiguity. @@ -235,7 +235,7 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, DANGERR *fixpt, bool fix_r getUnicharset().get_isngram(ambig_spec->correct_ngram_id), leftmost_id)); if (stopper_debug_level > 1) { - tprintf("fixpt+=(%d %d %d %d %s)\n", blob_index, blob_index + num_wrong_blobs, false, + tprintf("fixpt+=(blob_index:%d index+num_wrong_blobs:%d replace:%d isngram:%d leftmost_id:`%s`)\n", blob_index, blob_index + num_wrong_blobs, replace, getUnicharset().get_isngram(ambig_spec->correct_ngram_id), getUnicharset().id_to_unichar(leftmost_id)); } @@ -462,7 +462,7 @@ int Dict::LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice) const { return shortest; } -int Dict::UniformCertainties(const WERD_CHOICE &word) { +bool Dict::UniformCertainties(const WERD_CHOICE &word) { float Certainty; float WorstCertainty = FLT_MAX; float CertaintyThreshold; From a24c0060f939dc09c18809c94bf805481bd3736b Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Wed, 24 Jul 2024 00:33:44 +0200 Subject: [PATCH 49/66] fix iterator coding slip-up for the inner loop of ColPartitionSet::UnmatchedWidth(): surely the intent was to use the inner iterator as otherwise you'ld be chewing loop-invariant blocks every round. Picked up from dev/master branch; originally this was commit commit 0082dae6f1c767ecf5f148124e59233a2d13be8f (HEAD), Author: Balearica , Date: Sat Apr 20 12:06:55 2024 -0700 :: Fixed column set quality calculation. --- src/textord/colpartitionset.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/textord/colpartitionset.cpp b/src/textord/colpartitionset.cpp index a1dc82203b..e7647a30a2 100644 --- a/src/textord/colpartitionset.cpp +++ b/src/textord/colpartitionset.cpp @@ -325,7 +325,7 @@ int ColPartitionSet::UnmatchedWidth(ColPartitionSet *part_set) { int y = part->MidY(); BLOBNBOX_C_IT box_it(part->boxes()); for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) { - const TBOX &box = it.data()->bounding_box(); + const TBOX &box = box_it.data()->bounding_box(); // Assume that the whole blob is outside any column iff its x-middle // is outside. int x = (box.left() + box.right()) / 2; From 768d2adbc838437c909703c6cb9103dae3308d24 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Wed, 24 Jul 2024 00:24:26 +0200 Subject: [PATCH 50/66] TessBaseAPI::ClearResults : when clearing results, also make sure to clear the OSD (orientation & scale detect) results. --- src/api/baseapi.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 90c6847d4a..00e5e1baff 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -2209,6 +2209,11 @@ void TessBaseAPI::ClearResults() { if (tesseract_ != nullptr) { tesseract_->Clear(); } + if (osd_tesseract_ != nullptr) { + if (osd_tesseract_ != tesseract_) { + osd_tesseract_->Clear(); + } + } delete page_res_; page_res_ = nullptr; recognition_done_ = false; From 4480143efb145f1841fa23f81bf97bba8b8d6534 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Thu, 25 Jul 2024 23:30:01 +0200 Subject: [PATCH 51/66] - remove superfluous BidiDebug() method: everywhere else in the tesseract code the debug settings are checked directly so do the same here: consistent coding idiom. - removing obsoleted/superfluous methods. # Conflicts: # include/tesseract/baseapi.h # src/ccmain/resultiterator.cpp --- include/tesseract/resultiterator.h | 3 --- src/ccmain/pageiterator.cpp | 1 + src/ccmain/resultiterator.cpp | 20 +++++--------------- 3 files changed, 6 insertions(+), 18 deletions(-) diff --git a/include/tesseract/resultiterator.h b/include/tesseract/resultiterator.h index a8b3c07752..a040703006 100644 --- a/include/tesseract/resultiterator.h +++ b/include/tesseract/resultiterator.h @@ -224,9 +224,6 @@ class TESS_API ResultIterator : public LTRResultIterator { */ void AppendUTF8ParagraphText(std::string *text) const; - /** Returns whether the bidi_debug flag is set to at least min_level. */ - bool BidiDebug(int min_level) const; - bool current_paragraph_is_ltr_; /** diff --git a/src/ccmain/pageiterator.cpp b/src/ccmain/pageiterator.cpp index ae7d9340ff..c5d580e7ff 100644 --- a/src/ccmain/pageiterator.cpp +++ b/src/ccmain/pageiterator.cpp @@ -78,6 +78,7 @@ PageIterator::PageIterator(const PageIterator &src) } const PageIterator &PageIterator::operator=(const PageIterator &src) { + ASSERT_HOST_MSG(tesseract_ != src.tesseract_, "Software coding error: you are trying or assign/copy PageIterator instances which were created referencing different Tesseract instances.\n"); if (this != &src) { page_res_ = src.page_res_; tesseract_ = src.tesseract_; diff --git a/src/ccmain/resultiterator.cpp b/src/ccmain/resultiterator.cpp index 2531af8130..d9503c3412 100644 --- a/src/ccmain/resultiterator.cpp +++ b/src/ccmain/resultiterator.cpp @@ -534,7 +534,7 @@ bool ResultIterator::Next(PageIteratorLevel level) { } at_beginning_of_minor_run_ = (word_indices[j - 1] == kMinorRunStart); // awesome, we move to word_indices[j] - if (BidiDebug(3)) { + if (tesseract_->bidi_debug >= 3) { tprintf("Next(RIL_WORD): %d -> %d\n", this_word_index, word_indices[j]); } PageIterator::RestartRow(); @@ -545,7 +545,7 @@ bool ResultIterator::Next(PageIteratorLevel level) { return true; } } - if (BidiDebug(3)) { + if (tesseract_->bidi_debug >= 3) { tprintf("Next(RIL_WORD): %d -> EOL\n", this_word_index); } // we're going off the end of the text line. @@ -718,7 +718,7 @@ void ResultIterator::IterateAndAppendUTF8TextlineText(std::string *text) { Next(RIL_WORD); return; } - if (BidiDebug(1)) { + if (tesseract_->bidi_debug >= 1) { std::vector textline_order; std::vector dirs; CalculateTextlineOrder(current_paragraph_is_ltr_, *this, &dirs, &textline_order); @@ -743,11 +743,11 @@ void ResultIterator::IterateAndAppendUTF8TextlineText(std::string *text) { } AppendUTF8WordText(text); words_appended++; - if (BidiDebug(2)) { + if (tesseract_->bidi_debug >= 2) { tprintf("Num spaces=%d, text=%s\n", numSpaces, text->c_str()); } } while (Next(RIL_WORD) && !IsAtBeginningOf(RIL_TEXTLINE)); - if (BidiDebug(1)) { + if (tesseract_->bidi_debug >= 1) { tprintf("%d words printed\n", words_appended); } *text += line_separator_; @@ -769,14 +769,4 @@ void ResultIterator::AppendUTF8ParagraphText(std::string *text) const { } while (it.it_->block() != nullptr && !it.IsAtBeginningOf(RIL_PARA)); } -bool ResultIterator::BidiDebug(int min_level) const { - int debug_level = 1; - auto *p = ParamUtils::FindParam("bidi_debug", GlobalParams()->int_params, - tesseract_->params()->int_params); - if (p != nullptr) { - debug_level = (int32_t)(*p); - } - return debug_level >= min_level; -} - } // namespace tesseract. From f87bc28a8646a282762bbe51382772cc34b979b9 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Fri, 26 Jul 2024 01:17:10 +0200 Subject: [PATCH 52/66] dealing with several PVS Studio reported issues: // V1003 The macro 'SetForProto' is a dangerous expression. The parameter 'P' must be surrounded by parentheses. intproto.h 146 // V522 Dereferencing of the null pointer 'osd_tess' might take place. baseapi.cpp 3017 // V522 There might be dereferencing of a potential null pointer 'current'. elst.h 579 // V522 There might be dereferencing of a potential null pointer 'current'. elst2.h 568 // V522 There might be dereferencing of a potential null pointer 'current->next'. elst2.h 522 // V522 There might be dereferencing of a potential null pointer 'it'. baseapi.cpp 2233 // V522 There might be dereferencing of a potential null pointer 'prev->next'. elst2.h 531 // V522 There might be dereferencing of a potential null pointer 'prev->next'. elst2.h 572 // V522 There might be dereferencing of a potential null pointer 'renderer'. baseapi.cpp 1530 // V522 There might be dereferencing of a potential null pointer 'truth_text'. baseapi.cpp 1039 // V550 An odd precise comparison: classify_rotation.y() != 0.0f. It's probably better to use a comparison with defined precision: fabs(A - B) > Epsilon. baseapi.cpp 3198 // V595 The 'renderer' pointer was utilized before it was verified against nullptr. Check lines: 1530, 1545. baseapi.cpp 1530 // V601 The 'false' value is implicitly cast to the integer type. dawg.h 200 // V636 The 'wi0[num_in] * 127i8' expression was implicitly cast from 'int' type to 'float' type. Consider utilizing an explicit type cast to avoid overflow. An example: double A = (double)(X) * Y;. intsimdmatrix.cpp 103 // V690 The 'REJ' class implements a copy constructor, but lacks the copy assignment operator. It is dangerous to use such a class. rejctmap.h 101 // V729 Function body contains the 'word_end' label that is not used by any 'goto' statements. hocrrenderer.cpp 454 // V730 Not all members of a class are initialized inside the constructor. Consider inspecting: prev, current, next, cycle_pt, ex_current_was_last, ex_current_was_cycle_pt, ... elst.h 204 # Conflicts: # src/api/baseapi.cpp # Conflicts: # src/api/baseapi.cpp # src/api/hocrrenderer.cpp # src/ccmain/control.cpp # src/ccmain/tesseractclass.cpp # src/classify/classify.h --- include/tesseract/pageiterator.h | 2 +- src/api/baseapi.cpp | 51 ++++++++++++++++++-------------- src/arch/intsimdmatrix.cpp | 23 +++++++------- src/ccmain/control.cpp | 4 +-- src/ccmain/equationdetect.cpp | 4 +-- src/ccstruct/matrix.h | 10 +++++-- src/ccstruct/pageres.h | 20 ++++++------- src/ccstruct/rejctmap.h | 13 +++++--- src/ccutil/elst.h | 24 ++++++++++----- src/ccutil/elst2.h | 24 +++++++++++++++ src/classify/classify.h | 1 + src/classify/intproto.h | 11 +++---- src/cutil/bitvec.h | 6 ++-- src/dict/dawg.h | 3 +- src/textord/makerow.cpp | 2 +- 15 files changed, 126 insertions(+), 72 deletions(-) diff --git a/include/tesseract/pageiterator.h b/include/tesseract/pageiterator.h index 6873971505..7330861707 100644 --- a/include/tesseract/pageiterator.h +++ b/include/tesseract/pageiterator.h @@ -334,7 +334,7 @@ class TESS_API PageIterator { PAGE_RES_IT *it_; /** * The current input WERD being iterated. If there is an output from OCR, - * then word_ is nullptr. Owned by the API + * then word_ is nullptr. Owned by the API. */ WERD *word_; /** The length of the current word_. */ diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 00e5e1baff..982ccf9c57 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -1559,27 +1559,30 @@ char *TessBaseAPI::GetBoxText(int page_number) { result[0] = '\0'; int output_length = 0; LTRResultIterator *it = GetLTRIterator(); - do { - int left, top, right, bottom; - if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) { - const std::unique_ptr text(it->GetUTF8Text(RIL_SYMBOL)); - // Tesseract uses space for recognition failure. Fix to a reject - // character, kTesseractReject so we don't create illegal box files. - for (int i = 0; text[i] != '\0'; ++i) { - if (text[i] == ' ') { - text[i] = kTesseractReject; + // V522 There might be dereferencing of a potential null pointer 'it'. baseapi.cpp 2233 + if (it != nullptr) { + do { + int left, top, right, bottom; + if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) { + const std::unique_ptr text(it->GetUTF8Text(RIL_SYMBOL)); + // Tesseract uses space for recognition failure. Fix to a reject + // character, kTesseractReject so we don't create illegal box files. + for (int i = 0; text[i] != '\0'; ++i) { + if (text[i] == ' ') { + text[i] = kTesseractReject; + } + } + snprintf(result + output_length, total_length - output_length, "%s %d %d %d %d %d\n", + text.get(), left, image_height_ - bottom, right, image_height_ - top, page_number); + output_length += strlen(result + output_length); + // Just in case... + if (output_length + kMaxBytesPerLine > total_length) { + break; } } - snprintf(result + output_length, total_length - output_length, "%s %d %d %d %d %d\n", - text.get(), left, image_height_ - bottom, right, image_height_ - top, page_number); - output_length += strlen(result + output_length); - // Just in case... - if (output_length + kMaxBytesPerLine > total_length) { - break; - } - } - } while (it->Next(RIL_SYMBOL)); - delete it; + } while (it->Next(RIL_SYMBOL)); + delete it; + } return result; } @@ -2172,10 +2175,13 @@ int TessBaseAPI::FindLines() { " but data path is undefined\n"); delete osd_tesseract_; osd_tesseract_ = nullptr; - } else if (osd_tesseract_->init_tesseract(datapath_, "", "osd", OEM_TESSERACT_ONLY, + // V522 Dereferencing of the null pointer 'osd_tess' might take place. baseapi.cpp 3017 + } else if (osd_tesseract_ != nullptr && osd_tesseract_->init_tesseract(datapath_, "", "osd", OEM_TESSERACT_ONLY, nullptr, 0, nullptr, nullptr, false, &mgr) == 0) { osd_tess = osd_tesseract_; - osd_tesseract_->set_source_resolution(thresholder_->GetSourceYResolution()); + ASSERT0(osd_tess != nullptr); + ASSERT0(thresholder_ != nullptr); + osd_tess->set_source_resolution(thresholder_->GetSourceYResolution()); } else { tprintf( "Warning: Auto orientation and script detection requested," @@ -2344,7 +2350,8 @@ void TessBaseAPI::GetBlockTextOrientations(int **block_orientation, bool **verti (*block_orientation)[i] = num_rotations; // The classify_rotation is non-zero only if the text has vertical // writing direction. - (*vertical_writing)[i] = classify_rotation.y() != 0.0f; + // V550 An odd precise comparison: classify_rotation.y() != 0.0f. It's probably better to use a comparison with defined precision: fabs(A - B) > Epsilon. baseapi.cpp 3198 + (*vertical_writing)[i] = (fabs(classify_rotation.y()) > FLT_EPSILON); ++i; } } diff --git a/src/arch/intsimdmatrix.cpp b/src/arch/intsimdmatrix.cpp index 9929e8cc32..af46a68f78 100644 --- a/src/arch/intsimdmatrix.cpp +++ b/src/arch/intsimdmatrix.cpp @@ -92,16 +92,18 @@ void IntSimdMatrix::MatrixDotVector(const GENERIC_2D_ARRAY &w, int total2 = 0; int total3 = 0; for (int j = 0; j < num_in; ++j) { - total0 += wi0[j] * u[j]; - total1 += wi1[j] * u[j]; - total2 += wi2[j] * u[j]; - total3 += wi3[j] * u[j]; + int uj = u[j]; + total0 += wi0[j] * uj; + total1 += wi1[j] * uj; + total2 += wi2[j] * uj; + total3 += wi3[j] * uj; } // Add in the bias and correct for integer values. - v[i + 0] = (total0 + wi0[num_in] * INT8_MAX) * scales[i + 0]; - v[i + 1] = (total1 + wi1[num_in] * INT8_MAX) * scales[i + 1]; - v[i + 2] = (total2 + wi2[num_in] * INT8_MAX) * scales[i + 2]; - v[i + 3] = (total3 + wi3[num_in] * INT8_MAX) * scales[i + 3]; + // V636 The 'wi0[num_in] * 127i8' expression was implicitly cast from 'int' type to 'float' type. Consider utilizing an explicit type cast to avoid overflow. An example: double A = (double)(X) * Y;. intsimdmatrix.cpp 103 + v[i + 0] = (total0 + wi0[num_in] * int(INT8_MAX)) * scales[i + 0]; + v[i + 1] = (total1 + wi1[num_in] * int(INT8_MAX)) * scales[i + 1]; + v[i + 2] = (total2 + wi2[num_in] * int(INT8_MAX)) * scales[i + 2]; + v[i + 3] = (total3 + wi3[num_in] * int(INT8_MAX)) * scales[i + 3]; } // Capture the remainder mod four @@ -109,10 +111,11 @@ void IntSimdMatrix::MatrixDotVector(const GENERIC_2D_ARRAY &w, const int8_t *wi = w[i]; int total = 0; for (int j = 0; j < num_in; ++j) { - total += wi[j] * u[j]; + int uj = u[j]; + total += wi[j] * uj; } // Add in the bias and correct for integer values. - v[i] = (total + wi[num_in] * INT8_MAX) * scales[i]; + v[i] = (total + wi[num_in] * int(INT8_MAX)) * scales[i]; } } diff --git a/src/ccmain/control.cpp b/src/ccmain/control.cpp index c9f0f38ccc..6ef0705787 100644 --- a/src/ccmain/control.cpp +++ b/src/ccmain/control.cpp @@ -1400,8 +1400,8 @@ void Tesseract::classify_word_pass1(const WordData &word_data, WERD_RES **in_wor PointerVector *out_words) { ROW *row = word_data.row; BLOCK *block = word_data.block; - prev_word_best_choice_ = - word_data.prev_word != nullptr ? word_data.prev_word->word->best_choice : nullptr; + ASSERT0(*in_word != nullptr); + prev_word_best_choice_ = (word_data.prev_word != nullptr ? word_data.prev_word->word->best_choice : nullptr); #ifdef DISABLED_LEGACY_ENGINE if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) { #else diff --git a/src/ccmain/equationdetect.cpp b/src/ccmain/equationdetect.cpp index 7e4bbe6d33..e2304ea0df 100644 --- a/src/ccmain/equationdetect.cpp +++ b/src/ccmain/equationdetect.cpp @@ -799,7 +799,7 @@ void EquationDetect::IdentifyInlinePartsHorizontal() { ColPartition *neighbor = nullptr; bool side_neighbor_found = false; while ((neighbor = search.NextSideSearch(right_to_left)) != nullptr) { - const TBOX &neighbor_box(neighbor->bounding_box()); + const TBOX &neighbor_box = neighbor->bounding_box(); if (!IsTextOrEquationType(neighbor->type()) || part_box.x_gap(neighbor_box) > kGapTh || !part_box.major_y_overlap(neighbor_box) || part_box.major_x_overlap(neighbor_box)) { continue; @@ -812,7 +812,7 @@ void EquationDetect::IdentifyInlinePartsHorizontal() { part->set_type(PT_INLINE_EQUATION); } else { // Check the geometric feature of neighbor. - const TBOX &neighbor_box(neighbor->bounding_box()); + const TBOX &neighbor_box = neighbor->bounding_box(); if (neighbor_box.width() > part_box.width() && neighbor->type() != PT_EQUATION) { // Mark as PT_INLINE_EQUATION. part->set_type(PT_INLINE_EQUATION); diff --git a/src/ccstruct/matrix.h b/src/ccstruct/matrix.h index f2437182de..4edafd988d 100644 --- a/src/ccstruct/matrix.h +++ b/src/ccstruct/matrix.h @@ -253,7 +253,9 @@ class GENERIC_2D_ARRAY { // stored COLUMN-major, so the left-most index is the most significant. // This allows [][] access to use indices in the same order as (,). virtual int index(int column, int row) const { - return (column * dim2_ + row); + auto rv = column * dim2_ + row; + ASSERT_HOST(rv >= 0); + return rv; } // Put a list element into the matrix at a specific location. @@ -623,7 +625,9 @@ class BandTriMatrix : public GENERIC_2D_ARRAY { int index(int column, int row) const override { ASSERT_HOST(row >= column); ASSERT_HOST(row - column < this->dim2_); - return column * this->dim2_ + row - column; + auto rv = column * this->dim2_ + row - column; + ASSERT_HOST(rv >= 0); + return rv; } // Appends array2 corner-to-corner to *this, making an array of dimension @@ -659,7 +663,7 @@ class MATRIX : public BandTriMatrix { MATRIX(int dimension, int bandwidth) : BandTriMatrix(dimension, bandwidth, NOT_CLASSIFIED) {} - ~MATRIX() override; + virtual ~MATRIX() override; // Returns true if there are any real classification results. bool Classified(int col, int row, int wildcard_id) const; diff --git a/src/ccstruct/pageres.h b/src/ccstruct/pageres.h index 9901f66d1e..62ba323346 100644 --- a/src/ccstruct/pageres.h +++ b/src/ccstruct/pageres.h @@ -683,7 +683,7 @@ class TESS_API WERD_RES : public ELIST_LINK { class TESS_API PAGE_RES_IT { public: - PAGE_RES *page_res; // page being iterated + PAGE_RES *page_res = nullptr; // page being iterated PAGE_RES_IT() = default; @@ -786,17 +786,17 @@ class TESS_API PAGE_RES_IT { private: WERD_RES *internal_forward(bool new_block, bool empty_ok); - WERD_RES *prev_word_res; // previous word - ROW_RES *prev_row_res; // row of prev word - BLOCK_RES *prev_block_res; // block of prev word + WERD_RES *prev_word_res = nullptr; // previous word + ROW_RES *prev_row_res = nullptr; // row of prev word + BLOCK_RES *prev_block_res = nullptr; // block of prev word - WERD_RES *word_res; // current word - ROW_RES *row_res; // row of current word - BLOCK_RES *block_res; // block of cur. word + WERD_RES *word_res = nullptr; // current word + ROW_RES *row_res = nullptr; // row of current word + BLOCK_RES *block_res = nullptr; // block of cur. word - WERD_RES *next_word_res; // next word - ROW_RES *next_row_res; // row of next word - BLOCK_RES *next_block_res; // block of next word + WERD_RES *next_word_res = nullptr; // next word + ROW_RES *next_row_res = nullptr; // row of next word + BLOCK_RES *next_block_res = nullptr; // block of next word BLOCK_RES_IT block_res_it; // iterators ROW_RES_IT row_res_it; diff --git a/src/ccstruct/rejctmap.h b/src/ccstruct/rejctmap.h index beeb537379..f66de92c92 100644 --- a/src/ccstruct/rejctmap.h +++ b/src/ccstruct/rejctmap.h @@ -106,13 +106,18 @@ class REJ { public: REJ() = default; - REJ( // classwise copy - const REJ &source) { + // copy constructor + REJ(const REJ &source) { flags = source.flags; } - REJ &operator=( // assign REJ - const REJ &source) = default; + // V690 The 'REJ' class implements a copy constructor, but lacks the copy assignment operator. It is dangerous to use such a class. rejctmap.h 101 + REJ &operator=(const REJ &source) { + if (this != &source) { + flags = source.flags; + } + return *this; + } bool flag(REJ_FLAGS rej_flag) const { return flags[rej_flag]; diff --git a/src/ccutil/elst.h b/src/ccutil/elst.h index 040ce2a488..a41d7ac215 100644 --- a/src/ccutil/elst.h +++ b/src/ccutil/elst.h @@ -112,6 +112,7 @@ class TESS_API ELIST { friend class ELIST_ITERATOR; ELIST_LINK *last = nullptr; // End of list + //(Points to head) ELIST_LINK *First() { // return first return last ? last->next : nullptr; @@ -188,14 +189,15 @@ class TESS_API ELIST { class TESS_API ELIST_ITERATOR { friend void ELIST::assign_to_sublist(ELIST_ITERATOR *, ELIST_ITERATOR *); - ELIST *list; // List being iterated - ELIST_LINK *prev; // prev element - ELIST_LINK *current; // current element - ELIST_LINK *next; // next element - ELIST_LINK *cycle_pt; // point we are cycling the list to. - bool ex_current_was_last; // current extracted was end of list - bool ex_current_was_cycle_pt; // current extracted was cycle point - bool started_cycling; // Have we moved off the start? + // V730 Not all members of a class are initialized inside the constructor. Consider inspecting: prev, current, next, cycle_pt, ex_current_was_last, ex_current_was_cycle_pt, ... elst.h 204 + ELIST *list = nullptr; // List being iterated + ELIST_LINK *prev = nullptr; // prev element + ELIST_LINK *current = nullptr; // current element + ELIST_LINK *next = nullptr; // next element + ELIST_LINK *cycle_pt = nullptr; // point we are cycling the list to. + bool ex_current_was_last = false; // current extracted was end of list + bool ex_current_was_cycle_pt = false; // current extracted was cycle point + bool started_cycling = false; // Have we moved off the start? ELIST_LINK *extract_sublist( // from this current... ELIST_ITERATOR *other_it); // to other current @@ -576,6 +578,12 @@ inline void ELIST_ITERATOR::add_list_before(ELIST *list_to_add) { list->last = list_to_add->last; prev = list->last; current = list->First(); +#ifndef NDEBUG + // V522 There might be dereferencing of a potential null pointer 'current'. elst.h 579 + if (!current) { + BAD_PARAMETER.abort("ELIST_ITERATOR::add_list_before", "current is nullptr"); + } +#endif next = current->next; ex_current_was_last = false; } else { diff --git a/src/ccutil/elst2.h b/src/ccutil/elst2.h index c75105f4c9..b998449efb 100644 --- a/src/ccutil/elst2.h +++ b/src/ccutil/elst2.h @@ -519,6 +519,12 @@ inline void ELIST2_ITERATOR::add_list_after(ELIST2 *list_to_add) { } else { if (current) { // not extracted current->next = list_to_add->First(); +#ifndef NDEBUG + // V522 There might be dereferencing of a potential null pointer 'current->next'. elst2.h 522 + if (!current->next) { + BAD_PARAMETER.abort("ELIST2_ITERATOR::add_list_after", "current->next is nullptr"); + } +#endif current->next->prev = current; if (current == list->last) { list->last = list_to_add->last; @@ -528,6 +534,12 @@ inline void ELIST2_ITERATOR::add_list_after(ELIST2 *list_to_add) { next = current->next; } else { // current extracted prev->next = list_to_add->First(); +#ifndef NDEBUG + // V522 There might be dereferencing of a potential null pointer 'prev->next'. elst2.h 531 + if (!prev->next) { + BAD_PARAMETER.abort("ELIST2_ITERATOR::add_list_after", "prev->next is nullptr"); + } +#endif prev->next->prev = prev; if (ex_current_was_last) { list->last = list_to_add->last; @@ -565,10 +577,22 @@ inline void ELIST2_ITERATOR::add_list_before(ELIST2 *list_to_add) { list->last = list_to_add->last; prev = list->last; current = list->First(); +#ifndef NDEBUG + // V522 There might be dereferencing of a potential null pointer 'current'. elst2.h 568 + if (!current) { + BAD_PARAMETER.abort("ELIST2_ITERATOR::add_list_before", "current is nullptr"); + } +#endif next = current->next; ex_current_was_last = false; } else { prev->next = list_to_add->First(); +#ifndef NDEBUG + // V522 There might be dereferencing of a potential null pointer 'prev->next'. elst2.h 572 + if (!prev->next) { + BAD_PARAMETER.abort("ELIST2_ITERATOR::add_list_before", "prev->next is nullptr"); + } +#endif prev->next->prev = prev; if (current) { // not extracted diff --git a/src/classify/classify.h b/src/classify/classify.h index 2225e5feab..4791176126 100644 --- a/src/classify/classify.h +++ b/src/classify/classify.h @@ -269,6 +269,7 @@ class TESS_API Classify : public CCStruct { return AdaptedTemplates->NumPermClasses == 0; } bool LooksLikeGarbage(TBLOB *blob); + #ifndef GRAPHICS_DISABLED void RefreshDebugWindow(ScrollView **win, const char *msg, int y_offset, const TBOX &wbox); #endif diff --git a/src/classify/intproto.h b/src/classify/intproto.h index 35a1c75e36..c48cc4ead4 100644 --- a/src/classify/intproto.h +++ b/src/classify/intproto.h @@ -142,15 +142,16 @@ enum IntmatcherDebugAction { IDA_ADAPTIVE, IDA_STATIC, IDA_SHAPE_INDEX, IDA_BOTH Macros ----------------------------------------------------------------------------**/ -#define MaxNumIntProtosIn(C) (C->NumProtoSets * PROTOS_PER_PROTO_SET) -#define SetForProto(P) (P / PROTOS_PER_PROTO_SET) -#define IndexForProto(P) (P % PROTOS_PER_PROTO_SET) -#define ProtoForProtoId(C, P) (&((C->ProtoSets[SetForProto(P)])->Protos[IndexForProto(P)])) +// V1003 The macro 'SetForProto' is a dangerous expression. The parameter 'P' must be surrounded by parentheses. intproto.h 146 +#define MaxNumIntProtosIn(C) ((C)->NumProtoSets * PROTOS_PER_PROTO_SET) +#define SetForProto(P) ((P) / PROTOS_PER_PROTO_SET) +#define IndexForProto(P) ((P) % PROTOS_PER_PROTO_SET) +#define ProtoForProtoId(C, P) (&(((C)->ProtoSets[SetForProto(P)])->Protos[IndexForProto(P)])) #define PPrunerWordIndexFor(I) (((I) % PROTOS_PER_PROTO_SET) / PROTOS_PER_PP_WERD) #define PPrunerBitIndexFor(I) ((I) % PROTOS_PER_PP_WERD) #define PPrunerMaskFor(I) (1 << PPrunerBitIndexFor(I)) -#define MaxNumClassesIn(T) (T->NumClassPruners * CLASSES_PER_CP) +#define MaxNumClassesIn(T) ((T)->NumClassPruners * CLASSES_PER_CP) #define LegalClassId(c) ((c) >= 0 && (c) < MAX_NUM_CLASSES) #define UnusedClassIdIn(T, c) ((T)->Class[c] == nullptr) #define ClassForClassId(T, c) ((T)->Class[c]) diff --git a/src/cutil/bitvec.h b/src/cutil/bitvec.h index 9178fb497e..7beaec3c9e 100644 --- a/src/cutil/bitvec.h +++ b/src/cutil/bitvec.h @@ -52,11 +52,11 @@ static inline void copy_all_bits(BIT_VECTOR source, BIT_VECTOR dest, size_t leng } } -#define SET_BIT(array, bit) (array[bit / BITSINLONG] |= 1 << (bit & (BITSINLONG - 1))) +#define SET_BIT(array, bit) (array[(bit) / BITSINLONG] |= 1 << ((bit) & (BITSINLONG - 1))) -#define reset_bit(array, bit) (array[bit / BITSINLONG] &= ~(1 << (bit & (BITSINLONG - 1)))) +#define reset_bit(array, bit) (array[(bit) / BITSINLONG] &= ~(1 << ((bit) & (BITSINLONG - 1)))) -#define test_bit(array, bit) (array[bit / BITSINLONG] & (1 << (bit & (BITSINLONG - 1)))) +#define test_bit(array, bit) (array[(bit) / BITSINLONG] & (1 << ((bit) & (BITSINLONG - 1)))) static inline size_t WordsInVectorOfSize(size_t NumBits) { return (NumBits + BITSINLONG - 1) / BITSINLONG; diff --git a/src/dict/dawg.h b/src/dict/dawg.h index 1d6de23a87..70fd7cbcd6 100644 --- a/src/dict/dawg.h +++ b/src/dict/dawg.h @@ -194,7 +194,8 @@ class TESS_API Dawg { (void)edge_ref; (void)unichar_id; (void)word_end; - return false; + // V601 The 'false' value is implicitly cast to the integer type. dawg.h 200 + return 0; } protected: diff --git a/src/textord/makerow.cpp b/src/textord/makerow.cpp index 60a76a5206..de77cabda9 100644 --- a/src/textord/makerow.cpp +++ b/src/textord/makerow.cpp @@ -1613,7 +1613,7 @@ int32_t compute_row_descdrop(TO_ROW *row, float gradient, int xheight_blob_count if (static_cast(blob_count + num_potential_asc) < xheight_blob_count * total_fraction) { blob_count = 0; } - int descdrop = blob_count > 0 ? -blob_index : 0; + int descdrop = (blob_count > 0 ? -blob_index : 0); if (textord_debug_xheights) { tprintf("Descdrop: %d (potential ascenders %d, descenders %d)\n", descdrop, num_potential_asc, blob_count); From d89af16a8aa6015b353da1fb7e29100a6ab108da Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Fri, 26 Jul 2024 02:15:10 +0200 Subject: [PATCH 53/66] - remove incorrect assertion assumption - remove lingering dead/obsoleted code. --- src/api/baseapi.cpp | 12 ------------ src/ccmain/pageiterator.cpp | 3 ++- src/ccmain/paragraphs.cpp | 2 +- 3 files changed, 3 insertions(+), 14 deletions(-) diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 9716ab3639..7231a3cf78 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -3291,16 +3291,4 @@ std::string mkUniqueOutputFilePath(const char* basepath, int page_number, const return std::move(f); } -void WritePix(const std::string &file_path, Pix *pic, int file_type) -{ - tprintInfo("Saving image file {}\n", file_path); -#if defined(HAVE_MUPDF) - fz_mkdir_for_file(fz_get_global_context(), file_path.c_str()); -#endif - if (pixWrite(file_path.c_str(), pic, file_type)) - { - tprintError("Writing image file {} failed\n", file_path); - } -} - } // namespace tesseract diff --git a/src/ccmain/pageiterator.cpp b/src/ccmain/pageiterator.cpp index 8e40d6e92a..4cf6b821e4 100644 --- a/src/ccmain/pageiterator.cpp +++ b/src/ccmain/pageiterator.cpp @@ -80,7 +80,8 @@ PageIterator::PageIterator(const PageIterator &src) } const PageIterator &PageIterator::operator=(const PageIterator &src) { - ASSERT_HOST_MSG(tesseract_ != src.tesseract_, "Software coding error: you are trying or assign/copy PageIterator instances which were created referencing different Tesseract instances.\n"); + //ASSERT_HOST_MSG(tesseract_ != src.tesseract_, "Software coding error: you are trying or assign/copy PageIterator instances which were created referencing different Tesseract instances.\n"); + // ^-------- this one triggers in paragraphs.cpp@2646, hence this iterator class hierarchy MUST track the related Tesseract instance as a C++ pointer rather than a C++ &reference! if (this != &src) { page_res_ = src.page_res_; tesseract_ = src.tesseract_; diff --git a/src/ccmain/paragraphs.cpp b/src/ccmain/paragraphs.cpp index e83d049a82..fbba78aee9 100644 --- a/src/ccmain/paragraphs.cpp +++ b/src/ccmain/paragraphs.cpp @@ -2643,7 +2643,7 @@ void Tesseract::DetectParagraphs(bool after_text_recognition, } // Now stitch in the row_owners into the rows. - row = *block_start; + row = *block_start; // <-- Warning/Note: this statement MAY also alter the tesseract instance reference tracked by the iterator; see also PageIterator::operator=() for (auto &row_owner : row_owners) { while (!row.PageResIt()->row()) { row.Next(RIL_TEXTLINE); From 482a5b1b60219a7e130d0bace2571bef07f95d72 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Fri, 26 Jul 2024 12:28:55 +0200 Subject: [PATCH 54/66] guaranteed init all members of the class: assign the default/start values in the class declaration code chunk. # Conflicts: # include/tesseract/baseapi.h --- include/tesseract/baseapi.h | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/include/tesseract/baseapi.h b/include/tesseract/baseapi.h index 5248890617..a88b5bd090 100644 --- a/include/tesseract/baseapi.h +++ b/include/tesseract/baseapi.h @@ -975,40 +975,41 @@ class TESS_API TessBaseAPI { } protected: - Tesseract *tesseract_; ///< The underlying data object. + mutable Tesseract *tesseract_ = nullptr; ///< The underlying data object. #if !DISABLED_LEGACY_ENGINE - Tesseract *osd_tesseract_; ///< For orientation & script detection. - EquationDetect *equ_detect_; ///< The equation detector. + Tesseract *osd_tesseract_ = nullptr; ///< For orientation & script detection. + EquationDetect *equ_detect_ = nullptr; ///< The equation detector. #endif ETEXT_DESC *monitor_ = nullptr; ETEXT_DESC default_minimal_monitor_; FileReader reader_; ///< Reads files from any filesystem. - ImageThresholder *thresholder_; ///< Image thresholding module. - std::vector *paragraph_models_; - BLOCK_LIST *block_list_; ///< The page layout. - PAGE_RES *page_res_; ///< The page-level data. + ImageThresholder *thresholder_ = nullptr; ///< Image thresholding module. + std::vector *paragraph_models_ = nullptr; + BLOCK_LIST *block_list_ = nullptr; ///< The page layout. + PAGE_RES *page_res_ = nullptr; ///< The page-level data. std::string visible_image_file_; Image pix_visible_image_; ///< Image used in output PDF std::string output_file_; ///< Name used by debug code. std::string datapath_; ///< Current location of tessdata. std::string language_; ///< Last initialized language. - OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested. - bool recognition_done_; ///< page_res_ contains recognition data. + OcrEngineMode last_oem_requested_ = OEM_DEFAULT; ///< Last ocr language mode requested. + bool recognition_done_ = false; ///< page_res_ contains recognition data. /** * @defgroup ThresholderParams Thresholder Parameters * Parameters saved from the Thresholder. Needed to rebuild coordinates. */ /* @{ */ - int rect_left_; - int rect_top_; - int rect_width_; - int rect_height_; - int image_width_; - int image_height_; + int rect_left_ = 0; + int rect_top_ = 0; + int rect_width_ = 0; + int rect_height_ = 0; + + int image_width_ = 0; + int image_height_ = 0; /* @} */ -private: +protected: // A list of image filenames gets special consideration // // If global parameter `tessedit_page_number` is non-negative, will only process that From 3bd99e17c55fa89e6be8fd1297c29fe4b4b7c69e Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Fri, 26 Jul 2024 12:58:40 +0200 Subject: [PATCH 55/66] part 1/N of the refactoring of API::tesseract() usage: calling this one now guarantees a live Tesseract object exists, i.e. `api.tesseract_ != nullptr` and all API methods, when previously using `api.tesseract_`, will now use that one through `api.tesseract()`. Later on this will be further migrated to using a more appropriate `Tesseract &` C++ reference type, but we ran into issues during the initial refactor and these commits are the stages of the original refactor action with the errors removed. --- Entire refactor commit message: bit of a brutalist `const_cast(...)` hack at a few spots (which will be refactored at a later date anyway): moving towards dev/master branch codebase: transitioned to using `Tesseract &` reference instead of `Tesseract *` pointer reference base in internal classes and iterators: their lifetimes are always supposed to be shorter than the Tesseract+TessBaseAPI class instance lifetimes, the `&` reference type communicates better that these expect these lifetime behaviours and we were already busy replacing the nasty "delete Tesseract instance + allocate new Tesseract instance" reset-to-defaults behaviour in TessBaseAPI's Init methods as we want to arrive at a Tesseract which can be fully controlled through *parameters*, i.e simplest, no-config-at-all call interfaces while same configuration power for both tesseract CLI and Tesseract C/C++ usage in larger applications (mupdf et al), where transporting complex configurations through deep call chains is often no sinecure. --- include/tesseract/baseapi.h | 21 ++- src/api/baseapi.cpp | 291 ++++++++++++++++++++++++------------ src/tesseract.cpp | 37 ++--- 3 files changed, 232 insertions(+), 117 deletions(-) diff --git a/include/tesseract/baseapi.h b/include/tesseract/baseapi.h index a88b5bd090..33302c71f2 100644 --- a/include/tesseract/baseapi.h +++ b/include/tesseract/baseapi.h @@ -917,9 +917,24 @@ class TESS_API TessBaseAPI { /** Return the number of dawgs loaded into tesseract_ object. */ int NumDawgs() const; - Tesseract *tesseract() const { - return tesseract_; - } + /// Returns a reference to the internal instance of the Tesseract class; + /// the presence of which is guaranteed, i.e. the returned pointer + /// WILL NOT be `nullptr`. + /// + /// Note that the reference's lifetime ends once the TessBaseAPI's instance + /// is deleted or its End() API is invoked, whichever comes first. + /// + /// \sa End() + /// \sa WipeSqueakyCleanForReUse() + /// + /// @{ + const Tesseract *tesseract() const; + Tesseract *tesseract(); + // https://stackoverflow.com/questions/856542/elegant-solution-to-duplicate-const-and-non-const-getters + //inline Tesseract &tesseract() { + // return const_cast(this->tesseract()); + //} + /// @} OcrEngineMode oem() const { return last_oem_requested_; diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 7231a3cf78..591734e47f 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -562,9 +562,7 @@ int TessBaseAPI::InitFullWithReader(const char *data, int data_size, const char tesseract_->WipeSqueakyCleanForReUse(); #endif } - if (tesseract_ == nullptr) { - tesseract_ = new Tesseract(*this, nullptr); - } + ASSERT_HOST(tesseract_ != nullptr); if (reader != nullptr) { reader_ = reader; } @@ -728,7 +726,10 @@ PageSegMode TessBaseAPI::GetPageSegMode() const { */ char *TessBaseAPI::TesseractRect(const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height) { - if (tesseract_ == nullptr || width < kMinRectSize || height < kMinRectSize) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + if (width < kMinRectSize || height < kMinRectSize) { return nullptr; // Nothing worth doing. } @@ -765,6 +766,9 @@ void TessBaseAPI::ClearAdaptiveClassifier() { */ void TessBaseAPI::SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line, float angle) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); if (InternalResetImage()) { thresholder_->SetImage(imagedata, width, height, bytes_per_pixel, bytes_per_line, angle); SetInputImage(thresholder_->GetPixRect()); @@ -772,11 +776,13 @@ void TessBaseAPI::SetImage(const unsigned char *imagedata, int width, int height } void TessBaseAPI::SetSourceResolution(int ppi) { - if (thresholder_) { - thresholder_->SetSourceYResolution(ppi); - } else { - tprintError("Please call SetImage before SetSourceResolution.\n"); - } + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init and/or SetImage before attempting this method.\n", + __func__); + ASSERT_HOST_MSG(thresholder_ != nullptr, + "{} was invoked without a live tesseract thresholder instance: please call SetImage before attempting this method.\n", + __func__); + thresholder_->SetSourceYResolution(ppi); } /** @@ -788,6 +794,9 @@ void TessBaseAPI::SetSourceResolution(int ppi) { * and it is therefore more efficient to provide a Pix directly. */ void TessBaseAPI::SetImage(Pix *pix, float angle) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); if (InternalResetImage()) { // as Image will take ownership and `pix` is not owned by us, we must clone it: Image img(false, pix); @@ -814,9 +823,13 @@ void TessBaseAPI::SetImage(Pix *pix, float angle) { * can be recognized with the same image. */ void TessBaseAPI::SetRectangle(int left, int top, int width, int height) { - if (thresholder_ == nullptr) { - return; - } + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init and/or SetImage before attempting this method.\n", + __func__); + ASSERT_HOST_MSG(thresholder_ != nullptr, + "{} was invoked without a live tesseract thresholder instance: please call SetImage before attempting this method.\n", + __func__); + // TODO: this ClearResults prematurely nukes the page image and pushes for the diagnostics log to be written to output file, // while this SetRectangle() very well may be meant to OCR a *second* rectangle in the existing page image, which will fail // today as the page image will be lost, thanks to ClearResults. @@ -832,30 +845,35 @@ void TessBaseAPI::SetRectangle(int left, int top, int width, int height) { * Get a copy of the internal thresholded image from Tesseract. */ Pix *TessBaseAPI::GetThresholdedImage() { - if (tesseract_ == nullptr || thresholder_ == nullptr) { - return nullptr; - } - if (tesseract_->pix_binary() == nullptr) { - if (verbose_process) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init and/or SetImage before attempting this method.\n", + __func__); + ASSERT_HOST_MSG(thresholder_ != nullptr, + "{} was invoked without a live tesseract thresholder instance: please call SetImage before attempting this method.\n", + __func__); + + Tesseract* tess = tesseract(); + if (tess->pix_binary() == nullptr) { + if (verbose_process) { tprintInfo("PROCESS: the source image is not a binary image, hence we apply a thresholding algo/subprocess to obtain a binarized image.\n"); - } + } Image pix; if (!Threshold(pix.obtains())) { return nullptr; } - tesseract_->set_pix_binary(pix); // candidate for move semantics + tess->set_pix_binary(pix); // candidate for move semantics - if (tesseract_->tessedit_dump_pageseg_images) { - tesseract_->AddPixDebugPage(tesseract_->pix_binary(), "Thresholded Image result (because it wasn't thresholded yet)"); + if (tess->tessedit_dump_pageseg_images) { + tess->AddPixDebugPage(tess->pix_binary(), "Thresholded Image result (because it wasn't thresholded yet)"); } } - // Image p1 = pixRotate(tesseract_->pix_binary(), 0.15, L_ROTATE_SHEAR, L_BRING_IN_WHITE, 0, 0); + // Image p1 = pixRotate(tess->pix_binary(), 0.15, L_ROTATE_SHEAR, L_BRING_IN_WHITE, 0, 0); // because we want to keep the public API as-is for now, instead of migrating it to using Image type directly, // we downgrade to `PIX *` at the exit point, hence the reponsibility to CLONE is ours: - return tesseract_->pix_binary().clone2pix(); + return tess->pix_binary().clone2pix(); } /** @@ -1124,65 +1142,68 @@ PageIterator *TessBaseAPI::AnalyseLayout(bool merge_similar_words) { * internal structures. */ int TessBaseAPI::Recognize() { - if (tesseract_ == nullptr) { - return -1; - } + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init and/or SetImage before attempting this method.\n", + __func__); + + Tesseract *tess = tesseract(); + if (FindLines() != 0) { return -1; } - AutoPopDebugSectionLevel section_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection("Recognize (OCR)")); + AutoPopDebugSectionLevel section_handle(tess, tess->PushSubordinatePixDebugSection("Recognize (OCR)")); delete page_res_; if (block_list_->empty()) { - page_res_ = new PAGE_RES(false, block_list_, &tesseract_->prev_word_best_choice_); + page_res_ = new PAGE_RES(false, block_list_, &tess->prev_word_best_choice_); return 0; // Empty page. } - tesseract_->SetBlackAndWhitelist(); + tess->SetBlackAndWhitelist(); recognition_done_ = true; #if !DISABLED_LEGACY_ENGINE - if (tesseract_->tessedit_resegment_from_line_boxes) { + if (tess->tessedit_resegment_from_line_boxes) { if (verbose_process) tprintInfo("PROCESS: Re-segment from line boxes.\n"); - page_res_ = tesseract_->ApplyBoxes(tesseract_->input_file_path_.c_str(), true, block_list_); - } else if (tesseract_->tessedit_resegment_from_boxes) { + page_res_ = tess->ApplyBoxes(tess->input_file_path_.c_str(), true, block_list_); + } else if (tess->tessedit_resegment_from_boxes) { if (verbose_process) tprintInfo("PROCESS: Re-segment from page boxes.\n"); - page_res_ = tesseract_->ApplyBoxes(tesseract_->input_file_path_.c_str(), false, block_list_); + page_res_ = tess->ApplyBoxes(tess->input_file_path_.c_str(), false, block_list_); } else #endif // !DISABLED_LEGACY_ENGINE { if (verbose_process) tprintInfo("PROCESS: Re-segment from LSTM / previous word best choice.\n"); - page_res_ = new PAGE_RES(tesseract_->AnyLSTMLang(), block_list_, &tesseract_->prev_word_best_choice_); + page_res_ = new PAGE_RES(tess->AnyLSTMLang(), block_list_, &tess->prev_word_best_choice_); } if (page_res_ == nullptr) { return -1; } - if (tesseract_->tessedit_train_line_recognizer) { - AutoPopDebugSectionLevel subsection_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection("Train Line Recognizer: Correct Classify Words")); - if (!tesseract_->TrainLineRecognizer(tesseract_->input_file_path_.c_str(), output_file_, block_list_)) { + if (tess->tessedit_train_line_recognizer) { + AutoPopDebugSectionLevel subsection_handle(tess, tess->PushSubordinatePixDebugSection("Train Line Recognizer: Correct Classify Words")); + if (!tess->TrainLineRecognizer(tess->input_file_path_.c_str(), output_file_, block_list_)) { return -1; } - tesseract_->CorrectClassifyWords(page_res_); + tess->CorrectClassifyWords(page_res_); return 0; } #if !DISABLED_LEGACY_ENGINE - if (tesseract_->tessedit_make_boxes_from_boxes) { - AutoPopDebugSectionLevel subsection_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection("Make Boxes From Boxes: Correct Classify Words")); - tesseract_->CorrectClassifyWords(page_res_); + if (tess->tessedit_make_boxes_from_boxes) { + AutoPopDebugSectionLevel subsection_handle(tess, tess->PushSubordinatePixDebugSection("Make Boxes From Boxes: Correct Classify Words")); + tess->CorrectClassifyWords(page_res_); return 0; } #endif // !DISABLED_LEGACY_ENGINE int result = 0; - if (tesseract_->SupportsInteractiveScrollView()) { - AutoPopDebugSectionLevel subsection_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection("PGEditor: Interactive Session")); + if (tess->SupportsInteractiveScrollView()) { + AutoPopDebugSectionLevel subsection_handle(tess, tess->PushSubordinatePixDebugSection("PGEditor: Interactive Session")); #if !GRAPHICS_DISABLED - tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_); + tess->pgeditor_main(rect_width_, rect_height_, page_res_); #endif // !GRAPHICS_DISABLED // The page_res is invalid after an interactive session, so cleanup // in a way that lets us continue to the next page without crashing. @@ -1190,45 +1211,45 @@ int TessBaseAPI::Recognize() { page_res_ = nullptr; return -1; #if !DISABLED_LEGACY_ENGINE - } else if (tesseract_->tessedit_train_from_boxes) { - AutoPopDebugSectionLevel subsection_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection("Train From Boxes")); + } else if (tess->tessedit_train_from_boxes) { + AutoPopDebugSectionLevel subsection_handle(tess, tess->PushSubordinatePixDebugSection("Train From Boxes")); std::string fontname; ExtractFontName(output_file_.c_str(), &fontname); - tesseract_->ApplyBoxTraining(fontname, page_res_); - } else if (tesseract_->tessedit_ambigs_training) { - AutoPopDebugSectionLevel subsection_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection("Train Ambigs")); - FILE *training_output_file = tesseract_->init_recog_training(tesseract_->input_file_path_.c_str()); + tess->ApplyBoxTraining(fontname, page_res_); + } else if (tess->tessedit_ambigs_training) { + AutoPopDebugSectionLevel subsection_handle(tess, tess->PushSubordinatePixDebugSection("Train Ambigs")); + FILE *training_output_file = tess->init_recog_training(tess->input_file_path_.c_str()); // OCR the page segmented into words by tesseract. - tesseract_->recog_training_segmented(tesseract_->input_file_path_.c_str(), page_res_, training_output_file); + tess->recog_training_segmented(tess->input_file_path_.c_str(), page_res_, training_output_file); fclose(training_output_file); #endif // !DISABLED_LEGACY_ENGINE } else { - AutoPopDebugSectionLevel subsection_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection("The Main Recognition Phase")); + AutoPopDebugSectionLevel subsection_handle(tess, tess->PushSubordinatePixDebugSection("The Main Recognition Phase")); if (scrollview_support) { - tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_); + tess->pgeditor_main(rect_width_, rect_height_, page_res_); } // Now run the main recognition. - if (!tesseract_->paragraph_text_based) { - AutoPopDebugSectionLevel subsection_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection("Detect Paragraphs (Before Recognition)")); + if (!tess->paragraph_text_based) { + AutoPopDebugSectionLevel subsection_handle(tess, tess->PushSubordinatePixDebugSection("Detect Paragraphs (Before Recognition)")); DetectParagraphs(false); if (scrollview_support) { - tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_); + tess->pgeditor_main(rect_width_, rect_height_, page_res_); } } - AutoPopDebugSectionLevel subsection_handle2(tesseract_, tesseract_->PushSubordinatePixDebugSection("Recognize All Words")); - if (tesseract_->recog_all_words(page_res_, nullptr, nullptr, 0)) { + AutoPopDebugSectionLevel subsection_handle2(tess, tess->PushSubordinatePixDebugSection("Recognize All Words")); + if (tess->recog_all_words(page_res_, nullptr, nullptr, 0)) { if (scrollview_support) { - tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_); + tess->pgeditor_main(rect_width_, rect_height_, page_res_); } subsection_handle2.pop(); - if (tesseract_->paragraph_text_based) { - AutoPopDebugSectionLevel subsection_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection("Detect Paragraphs (After Recognition)")); + if (tess->paragraph_text_based) { + AutoPopDebugSectionLevel subsection_handle(tess, tess->PushSubordinatePixDebugSection("Detect Paragraphs (After Recognition)")); DetectParagraphs(true); if (scrollview_support) { - tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_); + tess->pgeditor_main(rect_width_, rect_height_, page_res_); } } } else { @@ -1391,8 +1412,13 @@ Pix* TessBaseAPI::GetVisibleImage() { } const char *TessBaseAPI::GetInputName() { - if (tesseract_ != nullptr && !tesseract_->input_file_path_.empty()) { - return tesseract_->input_file_path_.c_str(); + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + + Tesseract *tess = tesseract(); + if (!tess->input_file_path_.empty()) { + return tess->input_file_path_.c_str(); } return nullptr; } @@ -1826,7 +1852,11 @@ bool TessBaseAPI::ProcessPage(Pix *pix, const char *filename, * Recognize. The returned iterator must be deleted after use. */ LTRResultIterator *TessBaseAPI::GetLTRIterator() { - if (tesseract_ == nullptr || page_res_ == nullptr) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + + if (page_res_ == nullptr) { return nullptr; } return new LTRResultIterator(page_res_, tesseract_, thresholder_->GetScaleFactor(), @@ -1843,7 +1873,10 @@ LTRResultIterator *TessBaseAPI::GetLTRIterator() { * DetectOS, or anything else that changes the internal PAGE_RES. */ ResultIterator *TessBaseAPI::GetIterator() { - if (tesseract_ == nullptr || page_res_ == nullptr) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + if (page_res_ == nullptr) { return nullptr; } return ResultIterator::StartOfParagraph(LTRResultIterator( @@ -1860,7 +1893,10 @@ ResultIterator *TessBaseAPI::GetIterator() { * DetectOS, or anything else that changes the internal PAGE_RES. */ MutableIterator *TessBaseAPI::GetMutableIterator() { - if (tesseract_ == nullptr || page_res_ == nullptr) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + if (page_res_ == nullptr) { return nullptr; } return new MutableIterator(page_res_, tesseract_, thresholder_->GetScaleFactor(), @@ -1870,7 +1906,11 @@ MutableIterator *TessBaseAPI::GetMutableIterator() { /** Make a text string from the internal data structures. */ char *TessBaseAPI::GetUTF8Text() { - if (tesseract_ == nullptr || (!recognition_done_ && Recognize() < 0)) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + + if (!recognition_done_ && Recognize() < 0) { return nullptr; } std::string text(""); @@ -1978,7 +2018,11 @@ static void AddBoxToTSV(const PageIterator *it, PageIteratorLevel level, std::st * Returned string must be freed with the delete [] operator. */ char *TessBaseAPI::GetTSVText(int page_number, bool lang_info) { - if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize() < 0)) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + + if (page_res_ == nullptr && Recognize() < 0) { return nullptr; } @@ -2164,7 +2208,11 @@ const int kMaxBytesPerLine = kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 + * Returned string must be freed with the delete [] operator. */ char *TessBaseAPI::GetBoxText(int page_number) { - if (tesseract_ == nullptr || (!recognition_done_ && Recognize() < 0)) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + + if (!recognition_done_ && Recognize() < 0) { return nullptr; } int blob_count; @@ -2216,7 +2264,11 @@ const int kLatinChs[] = {0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, * Returned string must be freed with the delete [] operator. */ char *TessBaseAPI::GetUNLVText() { - if (tesseract_ == nullptr || (!recognition_done_ && Recognize() < 0)) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + + if (!recognition_done_ && Recognize() < 0) { return nullptr; } bool tilde_crunch_written = false; @@ -2407,7 +2459,11 @@ int TessBaseAPI::MeanTextConf() { /** Returns an array of all word confidences, terminated by -1. */ int *TessBaseAPI::AllWordConfidences() { - if (tesseract_ == nullptr || (!recognition_done_ && Recognize() < 0)) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + + if (!recognition_done_ && Recognize() < 0) { return nullptr; } int n_word = 0; @@ -2587,6 +2643,12 @@ void TessBaseAPI::WipeSqueakyCleanForReUse() { * destructing and reconstructing your TessBaseAPI. * Once End() has been used, none of the other API functions may be used * other than Init and anything declared above it in the class definition. + * + * All `Tesseract*` reference pointers produced by the tesseract() API are invalid + * after this call. If you don't want that, i.e. wish to use tesseract + * some more, than consider using the new WipeSqueakyCleanForReUse() API + * instead: that one DOES NOT invalidate the active Tesseract instance + * nor the references to it obtained previously. */ void TessBaseAPI::End() { WipeSqueakyCleanForReUse(); @@ -2677,6 +2739,10 @@ void TessBaseAPI::SetDictFunc(DictFunc f) { * utf-8 string. */ void TessBaseAPI::SetProbabilityInContextFunc(ProbabilityInContextFunc f) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + if (tesseract_ != nullptr) { tesseract_->getDict().probability_in_context_ = f; // Set it for the sublangs too. @@ -2689,15 +2755,14 @@ void TessBaseAPI::SetProbabilityInContextFunc(ProbabilityInContextFunc f) { /** Common code for setting the image. */ bool TessBaseAPI::InternalResetImage() { - if (tesseract_ == nullptr) { - tprintError("Please call Init before attempting to set an image.\n"); - return false; - } + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + if (thresholder_ != nullptr) { thresholder_->Clear(); - } - if (thresholder_ == nullptr) { - thresholder_ = new ImageThresholder(tesseract_); + } else { + thresholder_ = new ImageThresholder(tesseract()); } ClearResults(); return true; @@ -2871,7 +2936,13 @@ bool TessBaseAPI::Threshold(Pix **pix) { /** Find lines from the image making the BLOCK_LIST. */ int TessBaseAPI::FindLines() { - if (thresholder_ == nullptr || thresholder_->IsEmpty()) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init and/or SetImage before attempting this method.\n", + __func__); + ASSERT_HOST_MSG(thresholder_ != nullptr, + "{} was invoked without a live tesseract thresholder instance: please call SetImage before attempting this method.\n", + __func__); + if (thresholder_->IsEmpty()) { tprintError("Please call SetImage before attempting recognition.\n"); return -1; } @@ -2881,6 +2952,7 @@ int TessBaseAPI::FindLines() { if (!block_list_->empty()) { return 0; } + ASSERT0(tesseract_ != nullptr); if (tesseract_ == nullptr) { tesseract_ = new Tesseract(*this, nullptr); #if !DISABLED_LEGACY_ENGINE @@ -3015,7 +3087,10 @@ void TessBaseAPI::ClearResults() { * Also return the number of recognized blobs in blob_count. */ int TessBaseAPI::TextLength(int *blob_count) const { - if (tesseract_ == nullptr || page_res_ == nullptr) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + if (page_res_ == nullptr) { return 0; } @@ -3048,9 +3123,9 @@ int TessBaseAPI::TextLength(int *blob_count) const { * Returns true if the image was processed successfully. */ bool TessBaseAPI::DetectOS(OSResults *osr) { - if (tesseract_ == nullptr) { - return false; - } + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); ClearResults(); if (tesseract_->pix_binary() == nullptr) { Image pix; @@ -3150,7 +3225,11 @@ const char *TessBaseAPI::GetUnichar(int unichar_id) const { /** Return the pointer to the i-th dawg loaded into tesseract_ object. */ const Dawg *TessBaseAPI::GetDawg(int i) const { - if (tesseract_ == nullptr || i >= NumDawgs()) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init and/or SetImage before attempting this method.\n", + __func__); + + if (i >= NumDawgs()) { return nullptr; } return tesseract_->getDict().GetDawg(i); @@ -3158,25 +3237,28 @@ const Dawg *TessBaseAPI::GetDawg(int i) const { /** Return the number of dawgs loaded into tesseract_ object. */ int TessBaseAPI::NumDawgs() const { - return tesseract_ == nullptr ? 0 : tesseract_->getDict().NumDawgs(); + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + return tesseract_->getDict().NumDawgs(); } void TessBaseAPI::ReportDebugInfo() { - if (tesseract_ == nullptr) { - return; - } + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a " + "live tesseract instance: you may have a bug that looses a " + "lot of tesseract diagnostics info + reporting for you.\n", + __func__); tesseract_->ReportDebugInfo(); } void TessBaseAPI::FinalizeAndWriteDiagnosticsReport() { - if (tesseract_ == nullptr) { - ASSERT_HOST_MSG(false, - "FinalizeAndWriteDiagnosticsReport was invoked without a " + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a " "live tesseract instance: you may have a bug that looses a " - "lot of tesseract diagnostics info + reporting for you.\n"); - return; - }; + "lot of tesseract diagnostics info + reporting for you.\n", + __func__); tesseract_->ReportDebugInfo(); } @@ -3208,8 +3290,23 @@ std::string HOcrEscape(const char *text) { return ret; } -std::string mkUniqueOutputFilePath(const char* basepath, int page_number, const char* label, const char* filename_extension) -{ +const Tesseract *TessBaseAPI::tesseract() const { + if (tesseract_ == nullptr) { + TessBaseAPI &owner = const_cast(*this); + tesseract_ = new tesseract::Tesseract(owner, nullptr); + } + return tesseract_; +} + +Tesseract *TessBaseAPI::tesseract() { + if (tesseract_ == nullptr) { + TessBaseAPI &owner = *this; + tesseract_ = new tesseract::Tesseract(owner, nullptr); + } + return tesseract_; +} + +std::string mkUniqueOutputFilePath(const char *basepath, int page_number, const char *label, const char *filename_extension) { size_t pos = strcspn(basepath, ":\\/"); const char* filename = basepath; const char* p = basepath + pos; diff --git a/src/tesseract.cpp b/src/tesseract.cpp index 20f56bd932..171cb2bae9 100644 --- a/src/tesseract.cpp +++ b/src/tesseract.cpp @@ -974,7 +974,8 @@ extern "C" int tesseract_main(int argc, const char **argv) } // TODO: set during init phase and/or when this parameter is edited. - monitor.set_deadline_msecs(api.tesseract()->activity_timeout_millisec); + Tesseract *tess = api.tesseract(); + monitor.set_deadline_msecs(tess->activity_timeout_millisec); // repeat the `-c var=val` load as debug_all MAY have overwritten some of these user-specified settings in the call above. if (!SetVariablesFromCLArgs(api, argc, argv)) { @@ -1055,14 +1056,15 @@ extern "C" int tesseract_main(int argc, const char **argv) // ambigs.train, box.train, box.train.stderr, linebox, rebox, lstm.train. // In this mode no other OCR result files are written. bool b = false; + ASSERT_HOST(api.tesseract() == tess); bool in_training_mode = - (bool(api.tesseract()->tessedit_ambigs_training)) || - (bool(api.tesseract()->tessedit_resegment_from_boxes)) || - (bool(api.tesseract()->tessedit_make_boxes_from_boxes)) || - (bool(api.tesseract()->tessedit_train_line_recognizer)); + (bool(tess->tessedit_ambigs_training)) || + (bool(tess->tessedit_resegment_from_boxes)) || + (bool(tess->tessedit_make_boxes_from_boxes)) || + (bool(tess->tessedit_train_line_recognizer)); if (api.GetPageSegMode() == tesseract::PSM_OSD_ONLY) { - if (!api.tesseract()->AnyTessLang()) { + if (!tess->AnyTessLang()) { fprintf(stderr, "Error, OSD requires a model for the legacy engine\n"); return EXIT_FAILURE; } @@ -1101,17 +1103,18 @@ extern "C" int tesseract_main(int argc, const char **argv) succeed &= !PreloadRenderers(api, renderers, pagesegmode, outputbase); if (succeed && renderers.empty()) { // default: TXT + HOCR renderer - api.tesseract()->tessedit_create_hocr.set_value(true); - api.tesseract()->tessedit_create_alto.set_value(true); - api.tesseract()->tessedit_create_page_xml.set_value(true); - api.tesseract()->tessedit_create_tsv.set_value(true); - api.tesseract()->tessedit_create_pdf.set_value(true); - api.tesseract()->textonly_pdf.set_value(true); - api.tesseract()->tessedit_write_unlv.set_value(true); - api.tesseract()->tessedit_create_lstmbox.set_value(true); - api.tesseract()->tessedit_create_boxfile.set_value(true); - api.tesseract()->tessedit_create_wordstrbox.set_value(true); - api.tesseract()->tessedit_create_txt.set_value(true); + ASSERT_HOST(api.tesseract() == tess); + tess->tessedit_create_hocr.set_value(true); + tess->tessedit_create_alto.set_value(true); + tess->tessedit_create_page_xml.set_value(true); + tess->tessedit_create_tsv.set_value(true); + tess->tessedit_create_pdf.set_value(true); + tess->textonly_pdf.set_value(true); + tess->tessedit_write_unlv.set_value(true); + tess->tessedit_create_lstmbox.set_value(true); + tess->tessedit_create_boxfile.set_value(true); + tess->tessedit_create_wordstrbox.set_value(true); + tess->tessedit_create_txt.set_value(true); succeed &= !PreloadRenderers(api, renderers, pagesegmode, outputbase); } From 43fbf2f032c3dc5e7983ec58409f166f7b0402a3 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Fri, 26 Jul 2024 13:25:08 +0200 Subject: [PATCH 56/66] part 2/N of the refactoring of API::tesseract() usage: calling this one now guarantees a live Tesseract object exists, i.e. `api.tesseract_ != nullptr` and all API methods, when previously using `api.tesseract_`, will now use that one through `api.tesseract()`. Later on this will be further migrated to using a more appropriate `Tesseract &` C++ reference type, but we ran into issues during the initial refactor and these commits are the stages of the original refactor action with the errors removed. --- Entire refactor commit message: bit of a brutalist `const_cast(...)` hack at a few spots (which will be refactored at a later date anyway): moving towards dev/master branch codebase: transitioned to using `Tesseract &` reference instead of `Tesseract *` pointer reference base in internal classes and iterators: their lifetimes are always supposed to be shorter than the Tesseract+TessBaseAPI class instance lifetimes, the `&` reference type communicates better that these expect these lifetime behaviours and we were already busy replacing the nasty "delete Tesseract instance + allocate new Tesseract instance" reset-to-defaults behaviour in TessBaseAPI's Init methods as we want to arrive at a Tesseract which can be fully controlled through *parameters*, i.e simplest, no-config-at-all call interfaces while same configuration power for both tesseract CLI and Tesseract C/C++ usage in larger applications (mupdf et al), where transporting complex configurations through deep call chains is often no sinecure. --- src/api/baseapi.cpp | 289 ++++++++++++++++++++++++-------------------- 1 file changed, 159 insertions(+), 130 deletions(-) diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 591734e47f..1557c8028e 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -276,10 +276,8 @@ const char *TessBaseAPI::Version() { * loading a UNLV zone file. */ void TessBaseAPI::SetInputName(const char *name) { - if (tesseract_ == nullptr) { - tesseract_ = new Tesseract(*this, nullptr); - } - tesseract_->input_file_path_ = name ? name : ""; + Tesseract *tess = tesseract(); + tess->input_file_path_ = name ? name : ""; } /** Set the name of the visible image files. Needed only for PDF output. */ @@ -332,7 +330,8 @@ ImageCostEstimate TessBaseAPI::EstimateImageMemoryCost(const Pix* pix, float all * and reports the cost estimate for the current instance/image. */ ImageCostEstimate TessBaseAPI::EstimateImageMemoryCost() const { - return tesseract_->EstimateImageMemoryCost(); + const Tesseract *tess = tesseract(); + return tess->EstimateImageMemoryCost(); } /** @@ -344,7 +343,8 @@ ImageCostEstimate TessBaseAPI::EstimateImageMemoryCost() const { * this same check as part of their startup routine. */ bool TessBaseAPI::CheckAndReportIfImageTooLarge(const Pix* pix) const { - return tesseract_->CheckAndReportIfImageTooLarge(pix); + const Tesseract *tess = tesseract(); + return tess->CheckAndReportIfImageTooLarge(pix); } /** Set the name of the output files. Needed only for debugging. */ @@ -357,29 +357,24 @@ const std::string &TessBaseAPI::GetOutputName() { } bool TessBaseAPI::SetVariable(const char *name, const char *value) { - if (tesseract_ == nullptr) { - tesseract_ = new Tesseract(*this, nullptr); - } - return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_NON_INIT_ONLY, tesseract_->params()); + Tesseract *tess = tesseract(); + return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_NON_INIT_ONLY, tess->params()); } bool TessBaseAPI::SetVariable(const char *name, int value) { - if (tesseract_ == nullptr) { - tesseract_ = new Tesseract(*this, nullptr); - } + Tesseract *tess = tesseract(); std::string v = fmt::format("{}", value); - return ParamUtils::SetParam(name, v.c_str(), SET_PARAM_CONSTRAINT_NON_INIT_ONLY, tesseract_->params()); + return ParamUtils::SetParam(name, v.c_str(), SET_PARAM_CONSTRAINT_NON_INIT_ONLY, tess->params()); } bool TessBaseAPI::SetDebugVariable(const char *name, const char *value) { - if (tesseract_ == nullptr) { - tesseract_ = new Tesseract(*this, nullptr); - } - return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_DEBUG_ONLY, tesseract_->params()); + Tesseract *tess = tesseract(); + return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_DEBUG_ONLY, tess->params()); } bool TessBaseAPI::GetIntVariable(const char *name, int *value) const { + Tesseract *tess = const_cast(tesseract()); auto *p = ParamUtils::FindParam(name, GlobalParams()->int_params(), - tesseract_->params()->int_params()); + tess->params()->int_params()); if (p == nullptr) { return false; } @@ -388,8 +383,9 @@ bool TessBaseAPI::GetIntVariable(const char *name, int *value) const { } bool TessBaseAPI::GetBoolVariable(const char *name, bool *value) const { + Tesseract *tess = const_cast(tesseract()); auto *p = ParamUtils::FindParam(name, GlobalParams()->bool_params(), - tesseract_->params()->bool_params()); + tess->params()->bool_params()); if (p == nullptr) { return false; } @@ -398,8 +394,9 @@ bool TessBaseAPI::GetBoolVariable(const char *name, bool *value) const { } const char *TessBaseAPI::GetStringVariable(const char *name) const { + Tesseract *tess = const_cast(tesseract()); auto *p = ParamUtils::FindParam(name, GlobalParams()->string_params(), - tesseract_->params()->string_params()); + tess->params()->string_params()); if (p == nullptr) { return nullptr; } @@ -407,8 +404,9 @@ const char *TessBaseAPI::GetStringVariable(const char *name) const { } bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const { + Tesseract *tess = const_cast(tesseract()); auto *p = ParamUtils::FindParam(name, GlobalParams()->double_params(), - tesseract_->params()->double_params()); + tess->params()->double_params()); if (p == nullptr) { return false; } @@ -418,7 +416,8 @@ bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const { /** Get value of named variable as a string, if it exists. */ bool TessBaseAPI::GetVariableAsString(const char *name, std::string *val) const { - return ParamUtils::GetParamAsString(name, tesseract_->params(), val); + Tesseract *tess = const_cast(tesseract()); + return ParamUtils::GetParamAsString(name, tess->params(), val); } #if !DISABLED_LEGACY_ENGINE @@ -428,9 +427,10 @@ void TessBaseAPI::PrintFontsTable(FILE *fp) const { if (!fp) fp = stdout; bool print_info = (fp == stdout || fp == stderr); - const int fontinfo_size = tesseract_->get_fontinfo_table().size(); + const Tesseract *tess = tesseract(); + const int fontinfo_size = tess->get_fontinfo_table().size(); for (int font_index = 1; font_index < fontinfo_size; ++font_index) { - FontInfo font = tesseract_->get_fontinfo_table().at(font_index); + FontInfo font = tess->get_fontinfo_table().at(font_index); if (print_info) { tprintInfo( "ID={}: {} is_italic={} is_bold={} is_fixed_pitch={} is_serif={} is_fraktur={}\n", @@ -462,7 +462,8 @@ void TessBaseAPI::PrintFontsTable(FILE *fp) const { * (use DumpVariables instead to create config files). */ void TessBaseAPI::PrintVariables(FILE *fp) const { - ParamUtils::PrintParams(fp, tesseract_->params(), true); + Tesseract *tess = const_cast(tesseract()); + ParamUtils::PrintParams(fp, tess->params(), true); } /** @@ -470,7 +471,8 @@ void TessBaseAPI::PrintVariables(FILE *fp) const { * Can be used as Tesseract configuration file. */ void TessBaseAPI::DumpVariables(FILE *fp) const { - ParamUtils::PrintParams(fp, tesseract_->params(), false); + Tesseract *tess = const_cast(tesseract()); + ParamUtils::PrintParams(fp, tess->params(), false); } // Report parameters' usage statistics, i.e. report which params have been @@ -481,10 +483,11 @@ void TessBaseAPI::DumpVariables(FILE *fp) const { // answering the question: // "Which of all those parameters are actually *relevant* to my use case today?" void TessBaseAPI::ReportParamsUsageStatistics() const { - tesseract::ParamsVectors *vec = tesseract_->params(); + Tesseract *tess = const_cast(tesseract()); + const tesseract::ParamsVectors *vec = tess->params(); std::string fpath = tesseract::vars_report_file; FILE *f = ParamUtils::OpenReportFile(fpath.c_str()); - int section_level = tesseract_->GetPixDebugSectionLevel(); + int section_level = tess->GetPixDebugSectionLevel(); ParamUtils::ReportParamsUsageStatistics(f, vec, section_level, nullptr); if (f) { if (f != stdout && f != stderr) { @@ -545,6 +548,8 @@ int TessBaseAPI::InitFullWithReader(const char *data, int data_size, const char data = ""; } std::string datapath = data_size == 0 ? data : language; + ASSERT_HOST(tesseract_ != nullptr); + // If the datapath, OcrEngineMode or the language have changed - start again. // Note that the language_ field stores the last requested language that was // initialized successfully, while tesseract_->lang stores the language @@ -623,10 +628,8 @@ const ETEXT_DESC &TessBaseAPI::Monitor() const { void TessBaseAPI::DebugAddCommandline(const std::vector& argv) { - if (tesseract_ == nullptr) { - tesseract_ = new Tesseract(*this, nullptr); - } - tesseract_->DebugAddCommandline(argv); + Tesseract *tess = tesseract(); + tess->DebugAddCommandline(argv); } @@ -649,12 +652,12 @@ const char *TessBaseAPI::GetInitLanguagesAsString() const { */ void TessBaseAPI::GetLoadedLanguagesAsVector(std::vector *langs) const { langs->clear(); - if (tesseract_ != nullptr) { - langs->push_back(tesseract_->lang_); - int num_subs = tesseract_->num_sub_langs(); - for (int i = 0; i < num_subs; ++i) { - langs->push_back(tesseract_->get_sub_lang(i)->lang_); - } + ASSERT_HOST(tesseract_ != nullptr); + const Tesseract *tess = tesseract(); + langs->push_back(tess->lang_); + int num_subs = tess->num_sub_langs(); + for (int i = 0; i < num_subs; ++i) { + langs->push_back(tess->get_sub_lang(i)->lang_); } } @@ -663,10 +666,10 @@ void TessBaseAPI::GetLoadedLanguagesAsVector(std::vector *langs) co */ void TessBaseAPI::GetAvailableLanguagesAsVector(std::vector *langs) const { langs->clear(); - if (tesseract_ != nullptr) { - addAvailableLanguages(tesseract_->datadir_, "", langs); - std::sort(langs->begin(), langs->end()); - } + ASSERT_HOST(tesseract_ != nullptr); + const Tesseract *tess = tesseract(); + addAvailableLanguages(tess->datadir_, "", langs); + std::sort(langs->begin(), langs->end()); } /** @@ -674,12 +677,11 @@ void TessBaseAPI::GetAvailableLanguagesAsVector(std::vector *langs) * AnalysePage. Calls that attempt recognition will generate an error. */ void TessBaseAPI::InitForAnalysePage() { - if (tesseract_ == nullptr) { - tesseract_ = new Tesseract(*this, nullptr); + ASSERT_HOST(tesseract_ != nullptr); + Tesseract *tess = tesseract(); #if !DISABLED_LEGACY_ENGINE - tesseract_->InitAdaptiveClassifier(nullptr); + tess->InitAdaptiveClassifier(nullptr); #endif - } } /** @@ -688,7 +690,8 @@ void TessBaseAPI::InitForAnalysePage() { * and also accepts a relative or absolute path name. */ void TessBaseAPI::ReadConfigFile(const char *filename) { - tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_NON_INIT_ONLY); + Tesseract *tess = tesseract(); + tess->read_config_file(filename, SET_PARAM_CONSTRAINT_NON_INIT_ONLY); } /** @@ -697,10 +700,8 @@ void TessBaseAPI::ReadConfigFile(const char *filename) { * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string). */ void TessBaseAPI::SetPageSegMode(PageSegMode mode) { - if (tesseract_ == nullptr) { - tesseract_ = new Tesseract(*this, nullptr); - } - tesseract_->tessedit_pageseg_mode.set_value(mode); + Tesseract *tess = tesseract(); + tess->tessedit_pageseg_mode.set_value(mode); } /** Return the current page segmentation mode. */ @@ -708,7 +709,8 @@ PageSegMode TessBaseAPI::GetPageSegMode() const { if (tesseract_ == nullptr) { return PSM_SINGLE_BLOCK; } - return static_cast(tesseract_->tessedit_pageseg_mode.value()); + const Tesseract *tess = tesseract(); + return static_cast(tess->tessedit_pageseg_mode.value()); } /** @@ -749,6 +751,7 @@ char *TessBaseAPI::TesseractRect(const unsigned char *imagedata, int bytes_per_p * adaptive data. */ void TessBaseAPI::ClearAdaptiveClassifier() { + ASSERT_HOST(tesseract_ != nullptr); if (tesseract_ == nullptr) { return; } @@ -1123,7 +1126,8 @@ int TessBaseAPI::GetThresholdedImageScaleFactor() const { */ PageIterator *TessBaseAPI::AnalyseLayout(bool merge_similar_words) { if (FindLines() == 0) { - AutoPopDebugSectionLevel section_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection("Analyse Layout")); + Tesseract *tess = tesseract(); + AutoPopDebugSectionLevel section_handle(tess, tess->PushSubordinatePixDebugSection("Analyse Layout")); if (block_list_->empty()) { return nullptr; // The page was empty. @@ -1263,34 +1267,38 @@ int TessBaseAPI::Recognize() { void TessBaseAPI::SetInputImage(Pix *pix) { Image img(false, pix); img = img.copy(); - tesseract_->set_pix_original(img); + Tesseract *tess = tesseract(); + tess->set_pix_original(img); } // Takes ownership of the input pix. void TessBaseAPI::SetInputImage(Image &&pix) { - tesseract_->set_pix_original(pix); + Tesseract *tess = tesseract(); + tess->set_pix_original(pix); } void TessBaseAPI::SetInputImage(const Image &pix) { - tesseract_->set_pix_original(pix); + Tesseract *tess = tesseract(); + tess->set_pix_original(pix); } void TessBaseAPI::SetVisibleImage(Pix *pix) { pix_visible_image_ = pixCopy(NULL, pix); - // tesseract_->set_pix_visible_image(pix); + // tess->set_pix_visible_image(pix); } void TessBaseAPI::SetVisibleImage(Image &&pix) { pix_visible_image_ = pix; - // tesseract_->set_pix_visible_image(pix); + // tess->set_pix_visible_image(pix); } void TessBaseAPI::SetVisibleImage(const Image &pix) { pix_visible_image_ = pix; //.clone(); - //tesseract_->set_pix_visible_image(pix); + //tess->set_pix_visible_image(pix); } Pix *TessBaseAPI::GetInputImage() const { - return tesseract_->pix_original().clone2pix(); + const Tesseract *tess = tesseract(); + return tess->pix_original().clone2pix(); } static const char* NormalizationProcessModeName(int mode) { @@ -1327,7 +1335,8 @@ static const char *NormalizationTargetModeName(int mode) { // Grayscale normalization (preprocessing) bool TessBaseAPI::NormalizeImage(int mode) { - AutoPopDebugSectionLevel section_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection("Normalize Image")); + Tesseract *tess = tesseract(); + AutoPopDebugSectionLevel section_handle(tess, tess->PushSubordinatePixDebugSection("Normalize Image")); // Get a clone/copy of the source image rectangle, reduced to normalized greyscale, // and at the same resolution as the output binary. @@ -1346,10 +1355,10 @@ bool TessBaseAPI::NormalizeImage(int mode) { // ... and feed the result into the designated target(s): thresholder and/or tesseract source image (which is used as LSTM v4/v5 engine input). int targets = (mode & 0x03); - bool debug = (tesseract_->debug_image_normalization || tesseract_->tessedit_write_images); + bool debug = (tess->debug_image_normalization || tess->tessedit_write_images); if (false && debug) { - tesseract_->AddPixDebugPage(pix, fmt::format("Grayscale normalization mode = {} ({} ({}) + {} ({}))", mode, NormalizationProcessModeName(process), process, NormalizationTargetModeName(targets), targets)); + tess->AddPixDebugPage(pix, fmt::format("Grayscale normalization mode = {} ({} ({}) + {} ({}))", mode, NormalizationProcessModeName(process), process, NormalizationTargetModeName(targets), targets)); } switch (process) { @@ -1376,7 +1385,7 @@ bool TessBaseAPI::NormalizeImage(int mode) { } if (debug) { - tesseract_->AddPixDebugPage(result_pix, fmt::format("Grayscale normalization mode = {} ({} ({}) + {} ({}))", mode, NormalizationProcessModeName(process), process, NormalizationTargetModeName(targets), targets)); + tess->AddPixDebugPage(result_pix, fmt::format("Grayscale normalization mode = {} ({} ({}) + {} ({}))", mode, NormalizationProcessModeName(process), process, NormalizationTargetModeName(targets), targets)); } switch (targets) { @@ -1431,7 +1440,8 @@ const char * TessBaseAPI::GetVisibleImageFilename() { } const char *TessBaseAPI::GetDatapath() { - return tesseract_->datadir_.c_str(); + Tesseract *tess = tesseract(); + return tess->datadir_.c_str(); } int TessBaseAPI::GetSourceYResolution() { @@ -1454,7 +1464,8 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, if (!flist && !buf) { return false; } - int page_number = (tesseract_->tessedit_page_number >= 0) ? tesseract_->tessedit_page_number : 0; + Tesseract *tess = tesseract(); + int page_number = (tess->tessedit_page_number >= 0) ? tess->tessedit_page_number : 0; char pagename[MAX_PATH]; std::vector lines; @@ -1504,7 +1515,7 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, return false; } tprintInfo("Processing page #{} : {}\n", page_number + 1, pagename); - tesseract_->applybox_page.set_value(page_number); + tess->applybox_page.set_value(page_number); bool r = ProcessPage(pix, pagename, renderer); bool two_pass = false; @@ -1519,7 +1530,7 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, SetPageSegMode(PSM_SINGLE_BLOCK); // Set thresholding method to 0 for second pass regardless - tesseract_->thresholding_method = (int)ThresholdMethod::Otsu; + tess->thresholding_method = (int)ThresholdMethod::Otsu; // SetPageSegMode(PSM_SPARSE_TEXT); SetImage(newpix); @@ -1537,7 +1548,7 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, if (!r) { return false; } - if (tesseract_->tessedit_page_number >= 0) { + if (tess->tessedit_page_number >= 0) { break; } ++page_number; @@ -1555,11 +1566,12 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data, size_t size, const char *filename, TessResultRenderer *renderer) { Image pix; - int page_number = (tesseract_->tessedit_page_number >= 0) ? tesseract_->tessedit_page_number : 0; + Tesseract *tess = tesseract(); + int page_number = (tess->tessedit_page_number >= 0) ? tess->tessedit_page_number : 0; size_t offset = 0; for (;; ++page_number) { - if (tesseract_->tessedit_page_number >= 0) { - page_number = tesseract_->tessedit_page_number; + if (tess->tessedit_page_number >= 0) { + page_number = tess->tessedit_page_number; pix = (data) ? pixReadMemTiff(data, size, page_number) : pixReadTiff(filename, page_number); } else { pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset) @@ -1569,12 +1581,12 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data, size_t size, co break; } tprintInfo("Processing page #{} of multipage TIFF {}\n", page_number + 1, filename ? filename : "(from internal storage)"); - tesseract_->applybox_page.set_value(page_number); + tess->applybox_page.set_value(page_number); bool r = ProcessPage(pix, filename, renderer); if (!r) { return false; } - if (tesseract_->tessedit_page_number >= 0) { + if (tess->tessedit_page_number >= 0) { break; } if (!offset) { @@ -1588,12 +1600,13 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data, size_t size, co // processing required due to being in a training mode. bool TessBaseAPI::ProcessPages(const char *filename, TessResultRenderer *renderer) { - AutoPopDebugSectionLevel section_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection("Process pages")); + Tesseract *tess = tesseract(); + AutoPopDebugSectionLevel section_handle(tess, tess->PushSubordinatePixDebugSection("Process pages")); bool result = ProcessPagesInternal(filename, renderer); #if !DISABLED_LEGACY_ENGINE if (result) { - if (tesseract_->tessedit_train_from_boxes && !tesseract_->WriteTRFile(output_file_.c_str())) { + if (tess->tessedit_train_from_boxes && !tess->WriteTRFile(output_file_.c_str())) { tprintError("Write of TR file failed: {}\n", output_file_.c_str()); return false; } @@ -1624,6 +1637,7 @@ static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, voi // stdin. We'll still do our best if the user likes pipes. bool TessBaseAPI::ProcessPagesInternal(const char *filename, TessResultRenderer *renderer) { + Tesseract *tess = tesseract(); bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "/dev/stdin") || !strcmp(filename, "-"); if (stdInput) { #if defined(WIN32) || defined(_WIN32) || defined(_WIN64) @@ -1771,7 +1785,7 @@ bool TessBaseAPI::ProcessPagesInternal(const char *filename, r = ProcessPagesMultipageTiff(data, buf.size(), filename, renderer); } else { - tesseract_->applybox_page.set_value(-1 /* all pages */); + tess->applybox_page.set_value(-1 /* all pages */); r = ProcessPage(pix, filename, renderer); } @@ -1786,7 +1800,8 @@ bool TessBaseAPI::ProcessPagesInternal(const char *filename, bool TessBaseAPI::ProcessPage(Pix *pix, const char *filename, TessResultRenderer *renderer) { - AutoPopDebugSectionLevel page_level_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection(fmt::format("Process a single page: page #{}", 1 + tesseract_->tessedit_page_number))); + Tesseract *tess = tesseract(); + AutoPopDebugSectionLevel page_level_handle(tess, tess->PushSubordinatePixDebugSection(fmt::format("Process a single page: page #{}", 1 + tess->tessedit_page_number))); //page_level_handle.SetAsRootLevelForParamUsageReporting(); SetInputName(filename); @@ -1812,7 +1827,7 @@ bool TessBaseAPI::ProcessPage(Pix *pix, const char *filename, // Image preprocessing on image // Grayscale normalization - int graynorm_mode = tesseract_->preprocess_graynorm_mode; + int graynorm_mode = tess->preprocess_graynorm_mode; { bool rc = NormalizeImage(graynorm_mode); if (!rc) @@ -1823,21 +1838,21 @@ bool TessBaseAPI::ProcessPage(Pix *pix, const char *filename, bool failed = false; - if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) { + if (tess->tessedit_pageseg_mode == PSM_AUTO_ONLY) { // Disabled character recognition if (! std::unique_ptr(AnalyseLayout())) { failed = true; } - } else if (tesseract_->tessedit_pageseg_mode == PSM_OSD_ONLY) { + } else if (tess->tessedit_pageseg_mode == PSM_OSD_ONLY) { failed = (FindLines() != 0); } else { // Normal layout and character recognition. failed = (Recognize() < 0); } - if (tesseract_->tessedit_write_images) { + if (tess->tessedit_write_images) { Image page_pix = GetThresholdedImage(); - tesseract_->AddPixDebugPage(page_pix, fmt::format("processed page #{} : text recog done", 1 + tesseract_->tessedit_page_number)); + tess->AddPixDebugPage(page_pix, fmt::format("processed page #{} : text recog done", 1 + tess->tessedit_page_number)); } if (renderer && !failed) { @@ -1954,7 +1969,8 @@ std::tuple TessBaseAPI::GetTableBoundingBox(unsigned i) return std::tuple(0, 0, 0, 0); } - const int height = tesseract_->ImageHeight(); + Tesseract *tess = tesseract(); + const int height = tess->ImageHeight(); return std::make_tuple( t[i].box.left(), height - t[i].box.top(), @@ -1969,8 +1985,9 @@ std::vector> TessBaseAPI::GetTableRows(unsigned i) return std::vector>(); } - std::vector> rows(t[i].rows.size()); - const int height = tesseract_->ImageHeight(); + Tesseract *tess = tesseract(); + std::vector> rows(t[i].rows.size()); + const int height = tess->ImageHeight(); for (unsigned j = 0; j < t[i].rows.size(); ++j) { rows[j] = @@ -1989,8 +2006,9 @@ std::vector> TessBaseAPI::GetTableCols(unsigned i) return std::vector>(); } - std::vector> cols(t[i].cols.size()); - const int height = tesseract_->ImageHeight(); + Tesseract *tess = tesseract(); + std::vector> cols(t[i].cols.size()); + const int height = tess->ImageHeight(); for (unsigned j = 0; j < t[i].cols.size(); ++j) { cols[j] = @@ -2680,11 +2698,13 @@ void TessBaseAPI::ClearPersistentCache() { * returns 0 if the word is invalid, non-zero if valid */ int TessBaseAPI::IsValidWord(const char *word) const { - return tesseract_->getDict().valid_word(word); + Tesseract *tess = const_cast(tesseract()); + return tess->getDict().valid_word(word); } // Returns true if utf8_character is defined in the UniCharset. bool TessBaseAPI::IsValidCharacter(const char *utf8_character) const { - return tesseract_->unicharset_.contains_unichar(utf8_character); + const Tesseract *tess = tesseract(); + return tess->unicharset_.contains_unichar(utf8_character); } // TODO(rays) Obsolete this function and replace with a more aptly named @@ -2726,7 +2746,8 @@ bool TessBaseAPI::GetTextDirection(int *out_offset, float *out_slope) { /** Sets Dict::letter_is_okay_ function to point to the given function. */ void TessBaseAPI::SetDictFunc(DictFunc f) { if (tesseract_ != nullptr) { - tesseract_->getDict().letter_is_okay_ = f; + Tesseract *tess = tesseract(); + tess->getDict().letter_is_okay_ = f; } } @@ -2743,14 +2764,13 @@ void TessBaseAPI::SetProbabilityInContextFunc(ProbabilityInContextFunc f) { "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", __func__); - if (tesseract_ != nullptr) { - tesseract_->getDict().probability_in_context_ = f; + Tesseract *tess = tesseract(); + tess->getDict().probability_in_context_ = f; // Set it for the sublangs too. - int num_subs = tesseract_->num_sub_langs(); + int num_subs = tess->num_sub_langs(); for (int i = 0; i < num_subs; ++i) { - tesseract_->get_sub_lang(i)->getDict().probability_in_context_ = f; + tess->get_sub_lang(i)->getDict().probability_in_context_ = f; } - } } /** Common code for setting the image. */ @@ -2953,46 +2973,46 @@ int TessBaseAPI::FindLines() { return 0; } ASSERT0(tesseract_ != nullptr); - if (tesseract_ == nullptr) { - tesseract_ = new Tesseract(*this, nullptr); + Tesseract *tess = tesseract(); +#if 0 #if !DISABLED_LEGACY_ENGINE tesseract_->InitAdaptiveClassifier(nullptr); #endif - } - if (tesseract_->pix_binary() == nullptr) { - if (verbose_process) { +#endif + if (tess->pix_binary() == nullptr) { + if (verbose_process) { tprintInfo("PROCESS: the source image is not a binary image, hence we apply a thresholding algo/subprocess to obtain a binarized image.\n"); - } + } Image pix; if (!Threshold(pix.obtains())) { - return -1; - } - tesseract_->set_pix_binary(pix); + return -1; + } + tess->set_pix_binary(pix); } - if (tesseract_->tessedit_dump_pageseg_images) { - tesseract_->AddPixDebugPage(tesseract_->pix_binary(), "FindLines :: Thresholded Image -- this image is now set as the page Master Source Image for this activity"); + if (tess->tessedit_dump_pageseg_images) { + tess->AddPixDebugPage(tess->pix_binary(), "FindLines :: Thresholded Image -- this image is now set as the page Master Source Image for this activity"); } if (verbose_process) { tprintInfo("PROCESS: prepare the image for page segmentation, i.e. discovery of all text areas + bounding boxes & image/text orientation and script{} detection.\n", - (tesseract_->textord_equation_detect ? " + equations" : "")); + (tess->textord_equation_detect ? " + equations" : "")); } - AutoPopDebugSectionLevel section_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection("Prepare for Page Segmentation")); + AutoPopDebugSectionLevel section_handle(tess, tess->PushSubordinatePixDebugSection("Prepare for Page Segmentation")); - tesseract_->PrepareForPageseg(); + tess->PrepareForPageseg(); #if !DISABLED_LEGACY_ENGINE - if (tesseract_->textord_equation_detect) { + if (tess->textord_equation_detect) { if (equ_detect_ == nullptr && !datapath_.empty()) { equ_detect_ = new EquationDetect(*this, datapath_.c_str()); } if (equ_detect_ == nullptr) { tprintWarn("Could not set equation detector\n"); } else { - tesseract_->SetEquationDetect(equ_detect_); + tess->SetEquationDetect(equ_detect_); } } #endif // !DISABLED_LEGACY_ENGINE @@ -3004,7 +3024,7 @@ int TessBaseAPI::FindLines() { #endif OSResults osr; #if !DISABLED_LEGACY_ENGINE - if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) && osd_tess == nullptr) { + if (PSM_OSD_ENABLED(tess->tessedit_pageseg_mode) && osd_tess == nullptr) { if (strcmp(language_.c_str(), "osd") == 0) { osd_tess = tesseract_; } else { @@ -3032,13 +3052,13 @@ int TessBaseAPI::FindLines() { } #endif // !DISABLED_LEGACY_ENGINE - if (tesseract_->SegmentPage(tesseract_->input_file_path_.c_str(), block_list_, osd_tess, &osr) < 0) { + if (tess->SegmentPage(tess->input_file_path_.c_str(), block_list_, osd_tess, &osr) < 0) { return -1; } // If Devanagari is being recognized, we use different images for page seg // and for OCR. - tesseract_->PrepareForTessOCR(block_list_, &osr); + tess->PrepareForTessOCR(block_list_, &osr); return 0; } @@ -3047,7 +3067,8 @@ int TessBaseAPI::FindLines() { * Return average gradient of lines on page. */ float TessBaseAPI::GetGradient() { - return tesseract_->gradient(); + Tesseract *tess = tesseract(); + return tess->gradient(); } /** Delete the pageres and clear the block list ready for a new page. */ @@ -3127,23 +3148,25 @@ bool TessBaseAPI::DetectOS(OSResults *osr) { "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", __func__); ClearResults(); - if (tesseract_->pix_binary() == nullptr) { + Tesseract *tess = tesseract(); + if (tess->pix_binary() == nullptr) { Image pix; if (!Threshold(pix.obtains())) { return false; } - tesseract_->set_pix_binary(pix); // candidate for move semantics + tess->set_pix_binary(pix); // candidate for move semantics - if (tesseract_->tessedit_write_images) - tesseract_->AddPixDebugPage(tesseract_->pix_binary(), "DetectOS (Orientation And Script) : Thresholded Image"); + if (tess->tessedit_write_images) + tess->AddPixDebugPage(tess->pix_binary(), "DetectOS (Orientation And Script) : Thresholded Image"); } - return tesseract_->orientation_and_script_detection(tesseract_->input_file_path_.c_str(), osr) > 0; + return tess->orientation_and_script_detection(tess->input_file_path_.c_str(), osr) > 0; } #endif // !DISABLED_LEGACY_ENGINE void TessBaseAPI::set_min_orientation_margin(double margin) { - tesseract_->min_orientation_margin.set_value(margin); + Tesseract *tess = tesseract(); + tess->min_orientation_margin.set_value(margin); } /** @@ -3206,13 +3229,14 @@ void TessBaseAPI::GetBlockTextOrientations(int **block_orientation, bool **verti } void TessBaseAPI::DetectParagraphs(bool after_text_recognition) { + Tesseract *tess = tesseract(); if (paragraph_models_ == nullptr) { - paragraph_models_ = new std::vector; + paragraph_models_ = new std::vector; } MutableIterator *result_it = GetMutableIterator(); do { // Detect paragraphs for this block std::vector models; - tesseract_->DetectParagraphs(after_text_recognition, result_it, &models); + tess->DetectParagraphs(after_text_recognition, result_it, &models); paragraph_models_->insert(paragraph_models_->end(), models.begin(), models.end()); } while (result_it->Next(RIL_BLOCK)); delete result_it; @@ -3220,7 +3244,8 @@ void TessBaseAPI::DetectParagraphs(bool after_text_recognition) { /** This method returns the string form of the specified unichar. */ const char *TessBaseAPI::GetUnichar(int unichar_id) const { - return tesseract_->unicharset_.id_to_unichar(unichar_id); + const Tesseract *tess = tesseract(); + return tess->unicharset_.id_to_unichar(unichar_id); } /** Return the pointer to the i-th dawg loaded into tesseract_ object. */ @@ -3232,7 +3257,8 @@ const Dawg *TessBaseAPI::GetDawg(int i) const { if (i >= NumDawgs()) { return nullptr; } - return tesseract_->getDict().GetDawg(i); + Tesseract *tess = const_cast(tesseract()); + return tess->getDict().GetDawg(i); } /** Return the number of dawgs loaded into tesseract_ object. */ @@ -3240,7 +3266,8 @@ int TessBaseAPI::NumDawgs() const { ASSERT_HOST_MSG(tesseract_ != nullptr, "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", __func__); - return tesseract_->getDict().NumDawgs(); + Tesseract *tess = const_cast(tesseract()); + return tess->getDict().NumDawgs(); } @@ -3250,7 +3277,8 @@ void TessBaseAPI::ReportDebugInfo() { "live tesseract instance: you may have a bug that looses a " "lot of tesseract diagnostics info + reporting for you.\n", __func__); - tesseract_->ReportDebugInfo(); + Tesseract *tess = tesseract(); + tess->ReportDebugInfo(); } void TessBaseAPI::FinalizeAndWriteDiagnosticsReport() { @@ -3259,7 +3287,8 @@ void TessBaseAPI::FinalizeAndWriteDiagnosticsReport() { "live tesseract instance: you may have a bug that looses a " "lot of tesseract diagnostics info + reporting for you.\n", __func__); - tesseract_->ReportDebugInfo(); + Tesseract *tess = tesseract(); + tess->ReportDebugInfo(); } /** Escape a char string - replace <>&"' with HTML codes. */ From 4aba140daf62903eebd46e750c59c62ff76bf020 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Fri, 26 Jul 2024 13:26:58 +0200 Subject: [PATCH 57/66] part 3/N of the refactoring of API::tesseract() usage: calling this one now guarantees a live Tesseract object exists, i.e. `api.tesseract_ != nullptr` and all API methods, when previously using `api.tesseract_`, will now use that one through `api.tesseract()`. Later on this will be further migrated to using a more appropriate `Tesseract &` C++ reference type, but we ran into issues during the initial refactor and these commits are the stages of the original refactor action with the errors removed. --- Entire refactor commit message: bit of a brutalist `const_cast(...)` hack at a few spots (which will be refactored at a later date anyway): moving towards dev/master branch codebase: transitioned to using `Tesseract &` reference instead of `Tesseract *` pointer reference base in internal classes and iterators: their lifetimes are always supposed to be shorter than the Tesseract+TessBaseAPI class instance lifetimes, the `&` reference type communicates better that these expect these lifetime behaviours and we were already busy replacing the nasty "delete Tesseract instance + allocate new Tesseract instance" reset-to-defaults behaviour in TessBaseAPI's Init methods as we want to arrive at a Tesseract which can be fully controlled through *parameters*, i.e simplest, no-config-at-all call interfaces while same configuration power for both tesseract CLI and Tesseract C/C++ usage in larger applications (mupdf et al), where transporting complex configurations through deep call chains is often no sinecure. --- src/api/baseapi.cpp | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 1557c8028e..ed9b994e28 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -549,22 +549,22 @@ int TessBaseAPI::InitFullWithReader(const char *data, int data_size, const char } std::string datapath = data_size == 0 ? data : language; ASSERT_HOST(tesseract_ != nullptr); + Tesseract *tess = tesseract(); // If the datapath, OcrEngineMode or the language have changed - start again. // Note that the language_ field stores the last requested language that was - // initialized successfully, while tesseract_->lang stores the language + // initialized successfully, while tess->lang stores the language // actually used. They differ only if the requested language was nullptr, in - // which case tesseract_->lang is set to the Tesseract default ("eng"). - if (tesseract_ != nullptr && - tesseract_->RequiresWipeBeforeIndependentReUse() && + // which case tess->lang is set to the Tesseract default ("eng"). + if (tess->RequiresWipeBeforeIndependentReUse() && (datapath_.empty() || language_.empty() || datapath_ != datapath || - last_oem_requested_ != oem || (language_ != language && tesseract_->lang_ != language))) { + last_oem_requested_ != oem || (language_ != language && tess->lang_ != language))) { #if 0 delete tesseract_; tesseract_ = nullptr; #else // try not to throw away tesseract instances. Clean them out rigorously, instead. - tesseract_->WipeSqueakyCleanForReUse(); + tess->WipeSqueakyCleanForReUse(); #endif } ASSERT_HOST(tesseract_ != nullptr); @@ -578,7 +578,7 @@ int TessBaseAPI::InitFullWithReader(const char *data, int data_size, const char (void)Monitor().set_progress(0.0).exec_progress_func(); - if (tesseract_->init_tesseract(datapath, output_file_, language, oem, configs, + if (tess->init_tesseract(datapath, output_file_, language, oem, configs, configs_size, vars_vec, vars_values, set_only_non_debug_params, &mgr) != 0) { return -1; @@ -586,8 +586,8 @@ int TessBaseAPI::InitFullWithReader(const char *data, int data_size, const char // Update datapath and language requested for the last valid initialization. datapath_ = std::move(datapath); - if (datapath_.empty() && !tesseract_->datadir_.empty()) { - datapath_ = tesseract_->datadir_; + if (datapath_.empty() && !tess->datadir_.empty()) { + datapath_ = tess->datadir_; } language_ = language; @@ -600,7 +600,7 @@ int TessBaseAPI::InitFullWithReader(const char *data, int data_size, const char // can come through here after a previous failed/aborted/successful // initialization and we still would need to set up the Tesseract // instance to a definitely known state here anyway. - tesseract_->ResetAdaptiveClassifier(); + tess->ResetAdaptiveClassifier(); #endif // !DISABLED_LEGACY_ENGINE if (Monitor().kick_watchdog_and_check_for_cancel()) { @@ -755,8 +755,9 @@ void TessBaseAPI::ClearAdaptiveClassifier() { if (tesseract_ == nullptr) { return; } - tesseract_->ResetAdaptiveClassifier(); - tesseract_->ResetDocumentDictionary(); + Tesseract *tess = tesseract(); + tess->ResetAdaptiveClassifier(); + tess->ResetDocumentDictionary(); } #endif // !DISABLED_LEGACY_ENGINE From 62bf9ec8ed47c0da632e4bc5b41a82f3e340776d Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Fri, 26 Jul 2024 13:28:54 +0200 Subject: [PATCH 58/66] part 4/N of the refactoring of API::tesseract() usage: calling this one now guarantees a live Tesseract object exists, i.e. `api.tesseract_ != nullptr` and all API methods, when previously using `api.tesseract_`, will now use that one through `api.tesseract()`. Later on this will be further migrated to using a more appropriate `Tesseract &` C++ reference type, but we ran into issues during the initial refactor and these commits are the stages of the original refactor action with the errors removed. --- Entire refactor commit message: bit of a brutalist `const_cast(...)` hack at a few spots (which will be refactored at a later date anyway): moving towards dev/master branch codebase: transitioned to using `Tesseract &` reference instead of `Tesseract *` pointer reference base in internal classes and iterators: their lifetimes are always supposed to be shorter than the Tesseract+TessBaseAPI class instance lifetimes, the `&` reference type communicates better that these expect these lifetime behaviours and we were already busy replacing the nasty "delete Tesseract instance + allocate new Tesseract instance" reset-to-defaults behaviour in TessBaseAPI's Init methods as we want to arrive at a Tesseract which can be fully controlled through *parameters*, i.e simplest, no-config-at-all call interfaces while same configuration power for both tesseract CLI and Tesseract C/C++ usage in larger applications (mupdf et al), where transporting complex configurations through deep call chains is often no sinecure. --- src/api/baseapi.cpp | 78 ++++++++++++++++++++++++--------------------- 1 file changed, 41 insertions(+), 37 deletions(-) diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index ed9b994e28..35f440146c 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -2290,6 +2290,8 @@ char *TessBaseAPI::GetUNLVText() { if (!recognition_done_ && Recognize() < 0) { return nullptr; } + Tesseract *tess = tesseract(); + bool tilde_crunch_written = false; bool last_char_was_newline = true; bool last_char_was_tilde = false; @@ -2323,7 +2325,7 @@ char *TessBaseAPI::GetUNLVText() { } else { // NORMAL PROCESSING of non tilde crunched words. tilde_crunch_written = false; - tesseract_->set_unlv_suspects(word); + tess->set_unlv_suspects(word); const char *wordstr = word->best_choice->unichar_string().c_str(); const auto &lengths = word->best_choice->unichar_lengths(); int length = lengths.length(); @@ -2523,13 +2525,14 @@ int *TessBaseAPI::AllWordConfidences() { */ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char *wordstr) { bool success = true; + Tesseract *tess = tesseract(); PageSegMode current_psm = GetPageSegMode(); SetPageSegMode(mode); - tesseract_->classify_enable_learning = false; + tess->classify_enable_learning = false; const std::unique_ptr text(GetUTF8Text()); - if (tesseract_->applybox_debug) { + if (tess->applybox_debug) { tprintDebug("Trying to adapt \"{}\" to \"{}\"\n", text.get(), wordstr); } if (text != nullptr) { @@ -2556,9 +2559,9 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char *wordstr) { // No match. delete page_res_; std::vector boxes; - page_res_ = tesseract_->SetupApplyBoxes(boxes, block_list_); - tesseract_->ReSegmentByClassification(page_res_); - tesseract_->TidyUp(page_res_); + page_res_ = tess->SetupApplyBoxes(boxes, block_list_); + tess->ReSegmentByClassification(page_res_); + tess->TidyUp(page_res_); PAGE_RES_IT pr_it(page_res_); if (pr_it.word() == nullptr) { success = false; @@ -2569,8 +2572,8 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char *wordstr) { word_res->BestChoiceToCorrectText(); } if (success) { - tesseract_->EnableLearning = true; - tesseract_->LearnWord(nullptr, word_res); + tess->EnableLearning = true; + tess->LearnWord(nullptr, word_res); } } else { success = false; @@ -2797,12 +2800,13 @@ bool TessBaseAPI::InternalResetImage() { * The usual argument to Threshold is Tesseract::mutable_pix_binary(). */ bool TessBaseAPI::Threshold(Pix **pix) { + Tesseract *tess = tesseract(); ASSERT_HOST(pix != nullptr); if (*pix != nullptr) { pixDestroy(pix); } // Zero resolution messes up the algorithms, so make sure it is credible. - int user_dpi = tesseract_->user_defined_dpi; + int user_dpi = tess->user_defined_dpi; int y_res = thresholder_->GetScaledYResolution(); if (user_dpi && (user_dpi < kMinCredibleResolution || user_dpi > kMaxCredibleResolution)) { tprintWarn( @@ -2827,11 +2831,11 @@ bool TessBaseAPI::Threshold(Pix **pix) { return false; } - auto selected_thresholding_method = static_cast(static_cast(tesseract_->thresholding_method)); + auto selected_thresholding_method = static_cast(static_cast(tess->thresholding_method)); Image pix_binary; std::string caption = ThresholdMethodName(selected_thresholding_method); - AutoPopDebugSectionLevel subsec_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection(fmt::format("Applying the threshold method chosen for this run: {}: {}", selected_thresholding_method, caption))); + AutoPopDebugSectionLevel subsec_handle(tess, tess->PushSubordinatePixDebugSection(fmt::format("Applying the threshold method chosen for this run: {}: {}", selected_thresholding_method, caption))); if (selected_thresholding_method == ThresholdMethod::Otsu) { pix_binary = pix; @@ -2842,11 +2846,11 @@ bool TessBaseAPI::Threshold(Pix **pix) { *pix = pix_binary.clone2pix(); if (!thresholder_->IsBinary()) { - tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds()); - tesseract_->set_pix_grey(thresholder_->GetPixRectGrey()); + tess->set_pix_thresholds(thresholder_->GetPixRectThresholds()); + tess->set_pix_grey(thresholder_->GetPixRectGrey()); } else { - tesseract_->set_pix_thresholds(nullptr); - tesseract_->set_pix_grey(nullptr); + tess->set_pix_thresholds(nullptr); + tess->set_pix_grey(nullptr); } } else { auto [ok, pix_grey, pix_binary2, pix_thresholds] = thresholder_->Threshold(selected_thresholding_method); @@ -2858,19 +2862,19 @@ bool TessBaseAPI::Threshold(Pix **pix) { pix_binary = pix_binary2; *pix = pix_binary.clone2pix(); - tesseract_->set_pix_thresholds(pix_thresholds); // candidates for move semantics - tesseract_->set_pix_grey(pix_grey); + tess->set_pix_thresholds(pix_thresholds); // candidates for move semantics + tess->set_pix_grey(pix_grey); // pix_thresholds.destroy(); // pix_grey.destroy(); } - if (tesseract_->tessedit_dump_pageseg_images) { - tesseract_->AddPixDebugPage(tesseract_->pix_grey(), fmt::format("{} : Grey = pre-image", caption)); - tesseract_->AddPixDebugPage(tesseract_->pix_thresholds(), fmt::format("{} : Thresholds", caption)); + if (tess->tessedit_dump_pageseg_images) { + tess->AddPixDebugPage(tess->pix_grey(), fmt::format("{} : Grey = pre-image", caption)); + tess->AddPixDebugPage(tess->pix_thresholds(), fmt::format("{} : Thresholds", caption)); if (verbose_process) { tprintInfo("PROCESS: The 'Thresholds' image displays the per-pixel grey level which will be used to decide which pixels are *foreground* (text, probably) and which pixels are *background* (i.e. the *paper* the text was printed on); you'll note that each pixel in the original (greyscale!) image which is darker than its corresponding threshold level is *binarized* to black (foreground in tesseract) while any lighter pixel is *binarized* to white (background in tesseract).\n"); } - tesseract_->AddPixDebugPage(pix_binary, fmt::format("{} : Binary = post-image", caption)); + tess->AddPixDebugPage(pix_binary, fmt::format("{} : Binary = post-image", caption)); } // demo a bit of pre-postprocessing @@ -2878,32 +2882,32 @@ bool TessBaseAPI::Threshold(Pix **pix) { const char *sequence = "c1.1 + d3.3"; const int dispsep = 0; Image pix_post = pixMorphSequence(pix_binary, sequence, dispsep); - tesseract_->AddPixCompedOverOrigDebugPage(pix_post, fmt::format("{} : post-processed: {} -- just an example to showcase what leptonica can do for us!", caption, sequence)); + tess->AddPixCompedOverOrigDebugPage(pix_post, fmt::format("{} : post-processed: {} -- just an example to showcase what leptonica can do for us!", caption, sequence)); l_int32 w, h, d; - Image composite = tesseract_->pix_grey().copy(); + Image composite = tess->pix_grey().copy(); pixGetDimensions(composite, &w, &h, &d); Image mask = pixConvert1To8(nullptr, pix_post, 255, 0); pixRasterop(composite, 0, 0, w, h, PIX_PAINT, mask, 0, 0); - tesseract_->AddPixCompedOverOrigDebugPage(composite, fmt::format("{} : post-processed & masked with: {} -- this should remove all image noise that's not very close to the text, i.e. is considered *not part of the text to OCR*.", caption, sequence)); + tess->AddPixCompedOverOrigDebugPage(composite, fmt::format("{} : post-processed & masked with: {} -- this should remove all image noise that's not very close to the text, i.e. is considered *not part of the text to OCR*.", caption, sequence)); - Image noise1 = pixEmphasizeImageNoise(tesseract_->pix_original().ptr()); - Image noise2 = pixEmphasizeImageNoise(tesseract_->pix_grey().ptr()); + Image noise1 = pixEmphasizeImageNoise(tess->pix_original().ptr()); + Image noise2 = pixEmphasizeImageNoise(tess->pix_grey().ptr()); Image noise3 = pixEmphasizeImageNoise(composite.ptr()); Image noise4 = pixEmphasizeImageNoise(pix_post.ptr()); - tesseract_->AddPixCompedOverOrigDebugPage(noise1, fmt::format("{} : post-processed :: noise emphasis A: emphasized the noise inherent in the source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); - tesseract_->AddPixCompedOverOrigDebugPage(noise2, fmt::format("{} : post-processed :: noise emphasis B: emphasized the noise inherent in the greyscaled / normalized source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); - tesseract_->AddPixCompedOverOrigDebugPage(noise3, fmt::format("{} : post-processed :: noise emphasis C: emphasized the noise inherent in the composited image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); - tesseract_->AddPixCompedOverOrigDebugPage(noise4, fmt::format("{} : post-processed :: noise emphasis D: emphasized the noise inherent in the closed & binarized / thresholded source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); + tess->AddPixCompedOverOrigDebugPage(noise1, fmt::format("{} : post-processed :: noise emphasis A: emphasized the noise inherent in the source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); + tess->AddPixCompedOverOrigDebugPage(noise2, fmt::format("{} : post-processed :: noise emphasis B: emphasized the noise inherent in the greyscaled / normalized source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); + tess->AddPixCompedOverOrigDebugPage(noise3, fmt::format("{} : post-processed :: noise emphasis C: emphasized the noise inherent in the composited image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); + tess->AddPixCompedOverOrigDebugPage(noise4, fmt::format("{} : post-processed :: noise emphasis D: emphasized the noise inherent in the closed & binarized / thresholded source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); - noise1 = pixEmphasizeImageNoise2(tesseract_->pix_original().ptr()); - noise2 = pixEmphasizeImageNoise2(tesseract_->pix_grey().ptr()); + noise1 = pixEmphasizeImageNoise2(tess->pix_original().ptr()); + noise2 = pixEmphasizeImageNoise2(tess->pix_grey().ptr()); noise3 = pixEmphasizeImageNoise2(composite.ptr()); noise4 = pixEmphasizeImageNoise2(pix_post.ptr()); - tesseract_->AddPixCompedOverOrigDebugPage(noise1, fmt::format("{} : post-processed :: noise emphasis E: emphasized the noise inherent in the source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); - tesseract_->AddPixCompedOverOrigDebugPage(noise2, fmt::format("{} : post-processed :: noise emphasis F: emphasized the noise inherent in the greyscaled / normalized source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); - tesseract_->AddPixCompedOverOrigDebugPage(noise3, fmt::format("{} : post-processed :: noise emphasis G: emphasized the noise inherent in the composited image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); - tesseract_->AddPixCompedOverOrigDebugPage(noise4, fmt::format("{} : post-processed :: noise emphasis H: emphasized the noise inherent in the closed & binarized / thresholded source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); + tess->AddPixCompedOverOrigDebugPage(noise1, fmt::format("{} : post-processed :: noise emphasis E: emphasized the noise inherent in the source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); + tess->AddPixCompedOverOrigDebugPage(noise2, fmt::format("{} : post-processed :: noise emphasis F: emphasized the noise inherent in the greyscaled / normalized source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); + tess->AddPixCompedOverOrigDebugPage(noise3, fmt::format("{} : post-processed :: noise emphasis G: emphasized the noise inherent in the composited image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); + tess->AddPixCompedOverOrigDebugPage(noise4, fmt::format("{} : post-processed :: noise emphasis H: emphasized the noise inherent in the closed & binarized / thresholded source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); if (false) { // NOTE/WARNING: if you want to pick up one of these processed images as the replacement `*pix` then you MUST @@ -2948,7 +2952,7 @@ bool TessBaseAPI::Threshold(Pix **pix) { "Corrected to {}.\n", thresholder_->GetScaledEstimatedResolution(), estimated_res); } - tesseract_->set_source_resolution(estimated_res); + tess->set_source_resolution(estimated_res); (void)Monitor().bump_progress().exec_progress_func(); From 6eabc6b08fedb4d15fe6ac70fe8860978e02acaf Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Fri, 26 Jul 2024 13:37:40 +0200 Subject: [PATCH 59/66] WARNING: this is the culprit of our woes re Tesseract reference / refactoring: this was previously, because the tesseract_ member pointer was never NULL, effectively DEAD CODE. *I* made a mistake during the refactoring by not being careful enough and removing the null-check around it, which made this suddenly ACTIVE/LIVE code (we redo that mistake now through if 01..endif) and it badly breaks the OCR engine, resulting in insane ratings & costs turning up much later in the process. --- src/api/baseapi.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 35f440146c..1720342ba5 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -2979,7 +2979,7 @@ int TessBaseAPI::FindLines() { } ASSERT0(tesseract_ != nullptr); Tesseract *tess = tesseract(); -#if 0 +#if 01 #if !DISABLED_LEGACY_ENGINE tesseract_->InitAdaptiveClassifier(nullptr); #endif From 40803505eeadcb66b0b2d67a9aedce9fb35d6130 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Fri, 26 Jul 2024 13:38:39 +0200 Subject: [PATCH 60/66] Undoing faulty code, which we committed only as a reminder for posterity -- Revert "WARNING: this is the culprit of our woes re Tesseract reference / refactoring: this was previously, because the tesseract_ member pointer was never NULL, effectively DEAD CODE. *I* made a mistake during the refactoring by not being careful enough and removing the null-check around it, which made this suddenly ACTIVE/LIVE code (we redo that mistake now through if 01..endif) and it badly breaks the OCR engine, resulting in insane ratings & costs turning up much later in the process." This reverts commit 64d4e17e3797261d592e9214b28308bcc842f1bc. --- src/api/baseapi.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 1720342ba5..35f440146c 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -2979,7 +2979,7 @@ int TessBaseAPI::FindLines() { } ASSERT0(tesseract_ != nullptr); Tesseract *tess = tesseract(); -#if 01 +#if 0 #if !DISABLED_LEGACY_ENGINE tesseract_->InitAdaptiveClassifier(nullptr); #endif From 0e948e0e414a7c796225b3a6f999fee1e3dee4ac Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Fri, 26 Jul 2024 14:07:09 +0200 Subject: [PATCH 61/66] remove superfluous use of tesseract namespace identifier --- src/ccstruct/pageres.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/ccstruct/pageres.h b/src/ccstruct/pageres.h index b9789cb72b..e7e3c2617a 100644 --- a/src/ccstruct/pageres.h +++ b/src/ccstruct/pageres.h @@ -192,7 +192,7 @@ class TESS_API WERD_RES : public ELIST_LINK { // match as they are both before any chopping. // TODO(rays) determine if docqual does anything useful and delete bln_boxes // if it doesn't. - tesseract::BoxWord *bln_boxes = nullptr; // BLN input bounding boxes. + BoxWord *bln_boxes = nullptr; // BLN input bounding boxes. // The ROW that this word sits in. NOT owned by the WERD_RES. ROW *blob_row = nullptr; // The denorm provides the transformation to get back to the rotated image @@ -269,7 +269,7 @@ class TESS_API WERD_RES : public ELIST_LINK { // The length of box_word matches rebuild_word, best_state (if set) and // correct_text (if set), as well as best_choice and represents the // number of classified units in the output. - tesseract::BoxWord *box_word = nullptr; // Denormalized output boxes. + BoxWord *box_word = nullptr; // Denormalized output boxes. // The Tesseract that was used to recognize this word. Just a borrowed // pointer. Note: Tesseract's class definition is in a higher-level library. // We avoid introducing a cyclic dependency by not using the Tesseract @@ -277,7 +277,7 @@ class TESS_API WERD_RES : public ELIST_LINK { // for the top-level multi-language controller, and maybe for output of // the recognized language. // tesseract points to data owned elsewhere. - tesseract::Tesseract *tesseract = nullptr; + Tesseract *tesseract = nullptr; // The best_state stores the relationship between chopped_word and // rebuild_word. Each blob[i] in rebuild_word is composed of best_state[i] // adjacent blobs in chopped_word. The seams in seam_array are hidden @@ -469,11 +469,11 @@ class TESS_API WERD_RES : public ELIST_LINK { // features on low resolution images. // // The norm_mode sets the default mode for normalization in absence - // of any of the above flags. It should really be a tesseract::OcrEngineMode + // of any of the above flags. It should really be a OcrEngineMode // but is declared as int for ease of use with tessedit_ocr_engine_mode. // Returns false if the word is empty and sets up fake results. bool SetupForRecognition(const UNICHARSET &unicharset_in, - tesseract::Tesseract *tesseract, + Tesseract *tesseract, int norm_mode, const TBOX *norm_box, bool numeric_mode, bool use_body_size, bool allow_detailed_fx, ROW *row, @@ -608,7 +608,7 @@ class TESS_API WERD_RES : public ELIST_LINK { // the given position. (When a sub/superscript is recognized as a separate // word, it falls victim to the rule that a whole word cannot be sub or // superscript, so this function overrides that problem.) - void SetAllScriptPositions(tesseract::ScriptPos position); + void SetAllScriptPositions(ScriptPos position); // Classifies the word with some already-calculated BLOB_CHOICEs. // The choices are an array of blob_count pointers to BLOB_CHOICE, From 3604bf391265a14ec2eabc85a909bc55a6f469e4 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Fri, 26 Jul 2024 14:19:40 +0200 Subject: [PATCH 62/66] part 5/N of the refactoring of API::tesseract() usage: here we transition from using `Tesseract *` type to `Tesseract &` C++ reference type where applicable -- note that the tesseract iterator classes cannot use a `Tesseract&` reference but must track a `Tesseract *` pointer instead as these iterators can jump across several Tesseract instances where iterating or when being re-started; see the code comments which were added in an earlier commit. This commit picks up what's left of commit SHA-1: bee51e471879275b04f0ecb63a7596bb359bb169, which is the complete refactor, but which includes refactoring errors and is not part of this branch. --- Entire refactor commit message: bit of a brutalist `const_cast(...)` hack at a few spots (which will be refactored at a later date anyway): moving towards dev/master branch codebase: transitioned to using `Tesseract &` reference instead of `Tesseract *` pointer reference base in internal classes and iterators: their lifetimes are always supposed to be shorter than the Tesseract+TessBaseAPI class instance lifetimes, the `&` reference type communicates better that these expect these lifetime behaviours and we were already busy replacing the nasty "delete Tesseract instance + allocate new Tesseract instance" reset-to-defaults behaviour in TessBaseAPI's Init methods as we want to arrive at a Tesseract which can be fully controlled through *parameters*, i.e simplest, no-config-at-all call interfaces while same configuration power for both tesseract CLI and Tesseract C/C++ usage in larger applications (mupdf et al), where transporting complex configurations through deep call chains is often no sinecure. --- include/tesseract/baseapi.h | 4 +- src/api/baseapi.cpp | 502 ++++++++++++++--------------- src/api/pdfrenderer.cpp | 2 +- src/ccmain/tesseractclass.cpp | 2 +- src/ccmain/thresholder.cpp | 23 +- src/ccmain/thresholder.h | 4 +- src/ccstruct/debugpixa.cpp | 51 ++- src/ccstruct/debugpixa.h | 8 +- src/tesseract.cpp | 40 +-- src/training/ambiguous_words.cpp | 2 +- src/training/classifier_tester.cpp | 2 +- 11 files changed, 316 insertions(+), 324 deletions(-) diff --git a/include/tesseract/baseapi.h b/include/tesseract/baseapi.h index 33302c71f2..738b9a8ef1 100644 --- a/include/tesseract/baseapi.h +++ b/include/tesseract/baseapi.h @@ -928,8 +928,8 @@ class TESS_API TessBaseAPI { /// \sa WipeSqueakyCleanForReUse() /// /// @{ - const Tesseract *tesseract() const; - Tesseract *tesseract(); + const Tesseract &tesseract() const; + Tesseract &tesseract(); // https://stackoverflow.com/questions/856542/elegant-solution-to-duplicate-const-and-non-const-getters //inline Tesseract &tesseract() { // return const_cast(this->tesseract()); diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 35f440146c..ce65b30b75 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -276,8 +276,8 @@ const char *TessBaseAPI::Version() { * loading a UNLV zone file. */ void TessBaseAPI::SetInputName(const char *name) { - Tesseract *tess = tesseract(); - tess->input_file_path_ = name ? name : ""; + Tesseract &tess = tesseract(); + tess.input_file_path_ = name ? name : ""; } /** Set the name of the visible image files. Needed only for PDF output. */ @@ -330,8 +330,8 @@ ImageCostEstimate TessBaseAPI::EstimateImageMemoryCost(const Pix* pix, float all * and reports the cost estimate for the current instance/image. */ ImageCostEstimate TessBaseAPI::EstimateImageMemoryCost() const { - const Tesseract *tess = tesseract(); - return tess->EstimateImageMemoryCost(); + const Tesseract &tess = tesseract(); + return tess.EstimateImageMemoryCost(); } /** @@ -343,8 +343,8 @@ ImageCostEstimate TessBaseAPI::EstimateImageMemoryCost() const { * this same check as part of their startup routine. */ bool TessBaseAPI::CheckAndReportIfImageTooLarge(const Pix* pix) const { - const Tesseract *tess = tesseract(); - return tess->CheckAndReportIfImageTooLarge(pix); + const Tesseract &tess = tesseract(); + return tess.CheckAndReportIfImageTooLarge(pix); } /** Set the name of the output files. Needed only for debugging. */ @@ -357,24 +357,24 @@ const std::string &TessBaseAPI::GetOutputName() { } bool TessBaseAPI::SetVariable(const char *name, const char *value) { - Tesseract *tess = tesseract(); - return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_NON_INIT_ONLY, tess->params()); + Tesseract &tess = tesseract(); + return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_NON_INIT_ONLY, tess.params()); } bool TessBaseAPI::SetVariable(const char *name, int value) { - Tesseract *tess = tesseract(); + Tesseract &tess = tesseract(); std::string v = fmt::format("{}", value); - return ParamUtils::SetParam(name, v.c_str(), SET_PARAM_CONSTRAINT_NON_INIT_ONLY, tess->params()); + return ParamUtils::SetParam(name, v.c_str(), SET_PARAM_CONSTRAINT_NON_INIT_ONLY, tess.params()); } bool TessBaseAPI::SetDebugVariable(const char *name, const char *value) { - Tesseract *tess = tesseract(); - return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_DEBUG_ONLY, tess->params()); + Tesseract &tess = tesseract(); + return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_DEBUG_ONLY, tess.params()); } bool TessBaseAPI::GetIntVariable(const char *name, int *value) const { - Tesseract *tess = const_cast(tesseract()); + Tesseract &tess = const_cast(tesseract()); auto *p = ParamUtils::FindParam(name, GlobalParams()->int_params(), - tess->params()->int_params()); + tess.params()->int_params()); if (p == nullptr) { return false; } @@ -383,9 +383,9 @@ bool TessBaseAPI::GetIntVariable(const char *name, int *value) const { } bool TessBaseAPI::GetBoolVariable(const char *name, bool *value) const { - Tesseract *tess = const_cast(tesseract()); + Tesseract &tess = const_cast(tesseract()); auto *p = ParamUtils::FindParam(name, GlobalParams()->bool_params(), - tess->params()->bool_params()); + tess.params()->bool_params()); if (p == nullptr) { return false; } @@ -394,9 +394,9 @@ bool TessBaseAPI::GetBoolVariable(const char *name, bool *value) const { } const char *TessBaseAPI::GetStringVariable(const char *name) const { - Tesseract *tess = const_cast(tesseract()); + Tesseract &tess = const_cast(tesseract()); auto *p = ParamUtils::FindParam(name, GlobalParams()->string_params(), - tess->params()->string_params()); + tess.params()->string_params()); if (p == nullptr) { return nullptr; } @@ -404,9 +404,9 @@ const char *TessBaseAPI::GetStringVariable(const char *name) const { } bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const { - Tesseract *tess = const_cast(tesseract()); + Tesseract &tess = const_cast(tesseract()); auto *p = ParamUtils::FindParam(name, GlobalParams()->double_params(), - tess->params()->double_params()); + tess.params()->double_params()); if (p == nullptr) { return false; } @@ -416,8 +416,8 @@ bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const { /** Get value of named variable as a string, if it exists. */ bool TessBaseAPI::GetVariableAsString(const char *name, std::string *val) const { - Tesseract *tess = const_cast(tesseract()); - return ParamUtils::GetParamAsString(name, tess->params(), val); + Tesseract &tess = const_cast(tesseract()); + return ParamUtils::GetParamAsString(name, tess.params(), val); } #if !DISABLED_LEGACY_ENGINE @@ -427,10 +427,10 @@ void TessBaseAPI::PrintFontsTable(FILE *fp) const { if (!fp) fp = stdout; bool print_info = (fp == stdout || fp == stderr); - const Tesseract *tess = tesseract(); - const int fontinfo_size = tess->get_fontinfo_table().size(); + const Tesseract &tess = tesseract(); + const int fontinfo_size = tess.get_fontinfo_table().size(); for (int font_index = 1; font_index < fontinfo_size; ++font_index) { - FontInfo font = tess->get_fontinfo_table().at(font_index); + FontInfo font = tess.get_fontinfo_table().at(font_index); if (print_info) { tprintInfo( "ID={}: {} is_italic={} is_bold={} is_fixed_pitch={} is_serif={} is_fraktur={}\n", @@ -462,8 +462,8 @@ void TessBaseAPI::PrintFontsTable(FILE *fp) const { * (use DumpVariables instead to create config files). */ void TessBaseAPI::PrintVariables(FILE *fp) const { - Tesseract *tess = const_cast(tesseract()); - ParamUtils::PrintParams(fp, tess->params(), true); + Tesseract &tess = const_cast(tesseract()); + ParamUtils::PrintParams(fp, tess.params(), true); } /** @@ -471,8 +471,8 @@ void TessBaseAPI::PrintVariables(FILE *fp) const { * Can be used as Tesseract configuration file. */ void TessBaseAPI::DumpVariables(FILE *fp) const { - Tesseract *tess = const_cast(tesseract()); - ParamUtils::PrintParams(fp, tess->params(), false); + Tesseract &tess = const_cast(tesseract()); + ParamUtils::PrintParams(fp, tess.params(), false); } // Report parameters' usage statistics, i.e. report which params have been @@ -483,11 +483,11 @@ void TessBaseAPI::DumpVariables(FILE *fp) const { // answering the question: // "Which of all those parameters are actually *relevant* to my use case today?" void TessBaseAPI::ReportParamsUsageStatistics() const { - Tesseract *tess = const_cast(tesseract()); - const tesseract::ParamsVectors *vec = tess->params(); + Tesseract &tess = const_cast(tesseract()); + const tesseract::ParamsVectors *vec = tess.params(); std::string fpath = tesseract::vars_report_file; FILE *f = ParamUtils::OpenReportFile(fpath.c_str()); - int section_level = tess->GetPixDebugSectionLevel(); + int section_level = tess.GetPixDebugSectionLevel(); ParamUtils::ReportParamsUsageStatistics(f, vec, section_level, nullptr); if (f) { if (f != stdout && f != stderr) { @@ -549,25 +549,19 @@ int TessBaseAPI::InitFullWithReader(const char *data, int data_size, const char } std::string datapath = data_size == 0 ? data : language; ASSERT_HOST(tesseract_ != nullptr); - Tesseract *tess = tesseract(); + Tesseract &tess = tesseract(); // If the datapath, OcrEngineMode or the language have changed - start again. // Note that the language_ field stores the last requested language that was - // initialized successfully, while tess->lang stores the language + // initialized successfully, while tess.lang stores the language // actually used. They differ only if the requested language was nullptr, in - // which case tess->lang is set to the Tesseract default ("eng"). - if (tess->RequiresWipeBeforeIndependentReUse() && + // which case tess.lang is set to the Tesseract default ("eng"). + if (tess.RequiresWipeBeforeIndependentReUse() && (datapath_.empty() || language_.empty() || datapath_ != datapath || - last_oem_requested_ != oem || (language_ != language && tess->lang_ != language))) { -#if 0 - delete tesseract_; - tesseract_ = nullptr; -#else + last_oem_requested_ != oem || (language_ != language && tess.lang_ != language))) { // try not to throw away tesseract instances. Clean them out rigorously, instead. - tess->WipeSqueakyCleanForReUse(); -#endif + tess.WipeSqueakyCleanForReUse(); } - ASSERT_HOST(tesseract_ != nullptr); if (reader != nullptr) { reader_ = reader; } @@ -578,7 +572,7 @@ int TessBaseAPI::InitFullWithReader(const char *data, int data_size, const char (void)Monitor().set_progress(0.0).exec_progress_func(); - if (tess->init_tesseract(datapath, output_file_, language, oem, configs, + if (tess.init_tesseract(datapath, output_file_, language, oem, configs, configs_size, vars_vec, vars_values, set_only_non_debug_params, &mgr) != 0) { return -1; @@ -586,8 +580,8 @@ int TessBaseAPI::InitFullWithReader(const char *data, int data_size, const char // Update datapath and language requested for the last valid initialization. datapath_ = std::move(datapath); - if (datapath_.empty() && !tess->datadir_.empty()) { - datapath_ = tess->datadir_; + if (datapath_.empty() && !tess.datadir_.empty()) { + datapath_ = tess.datadir_; } language_ = language; @@ -600,7 +594,7 @@ int TessBaseAPI::InitFullWithReader(const char *data, int data_size, const char // can come through here after a previous failed/aborted/successful // initialization and we still would need to set up the Tesseract // instance to a definitely known state here anyway. - tess->ResetAdaptiveClassifier(); + tess.ResetAdaptiveClassifier(); #endif // !DISABLED_LEGACY_ENGINE if (Monitor().kick_watchdog_and_check_for_cancel()) { @@ -628,8 +622,8 @@ const ETEXT_DESC &TessBaseAPI::Monitor() const { void TessBaseAPI::DebugAddCommandline(const std::vector& argv) { - Tesseract *tess = tesseract(); - tess->DebugAddCommandline(argv); + Tesseract &tess = tesseract(); + tess.DebugAddCommandline(argv); } @@ -653,11 +647,11 @@ const char *TessBaseAPI::GetInitLanguagesAsString() const { void TessBaseAPI::GetLoadedLanguagesAsVector(std::vector *langs) const { langs->clear(); ASSERT_HOST(tesseract_ != nullptr); - const Tesseract *tess = tesseract(); - langs->push_back(tess->lang_); - int num_subs = tess->num_sub_langs(); + const Tesseract &tess = tesseract(); + langs->push_back(tess.lang_); + int num_subs = tess.num_sub_langs(); for (int i = 0; i < num_subs; ++i) { - langs->push_back(tess->get_sub_lang(i)->lang_); + langs->push_back(tess.get_sub_lang(i)->lang_); } } @@ -667,8 +661,8 @@ void TessBaseAPI::GetLoadedLanguagesAsVector(std::vector *langs) co void TessBaseAPI::GetAvailableLanguagesAsVector(std::vector *langs) const { langs->clear(); ASSERT_HOST(tesseract_ != nullptr); - const Tesseract *tess = tesseract(); - addAvailableLanguages(tess->datadir_, "", langs); + const Tesseract &tess = tesseract(); + addAvailableLanguages(tess.datadir_, "", langs); std::sort(langs->begin(), langs->end()); } @@ -678,9 +672,9 @@ void TessBaseAPI::GetAvailableLanguagesAsVector(std::vector *langs) */ void TessBaseAPI::InitForAnalysePage() { ASSERT_HOST(tesseract_ != nullptr); - Tesseract *tess = tesseract(); + Tesseract &tess = tesseract(); #if !DISABLED_LEGACY_ENGINE - tess->InitAdaptiveClassifier(nullptr); + tess.InitAdaptiveClassifier(nullptr); #endif } @@ -690,8 +684,8 @@ void TessBaseAPI::InitForAnalysePage() { * and also accepts a relative or absolute path name. */ void TessBaseAPI::ReadConfigFile(const char *filename) { - Tesseract *tess = tesseract(); - tess->read_config_file(filename, SET_PARAM_CONSTRAINT_NON_INIT_ONLY); + Tesseract &tess = tesseract(); + tess.read_config_file(filename, SET_PARAM_CONSTRAINT_NON_INIT_ONLY); } /** @@ -700,8 +694,8 @@ void TessBaseAPI::ReadConfigFile(const char *filename) { * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string). */ void TessBaseAPI::SetPageSegMode(PageSegMode mode) { - Tesseract *tess = tesseract(); - tess->tessedit_pageseg_mode.set_value(mode); + Tesseract &tess = tesseract(); + tess.tessedit_pageseg_mode.set_value(mode); } /** Return the current page segmentation mode. */ @@ -709,8 +703,8 @@ PageSegMode TessBaseAPI::GetPageSegMode() const { if (tesseract_ == nullptr) { return PSM_SINGLE_BLOCK; } - const Tesseract *tess = tesseract(); - return static_cast(tess->tessedit_pageseg_mode.value()); + const Tesseract &tess = tesseract(); + return static_cast(tess.tessedit_pageseg_mode.value()); } /** @@ -755,9 +749,9 @@ void TessBaseAPI::ClearAdaptiveClassifier() { if (tesseract_ == nullptr) { return; } - Tesseract *tess = tesseract(); - tess->ResetAdaptiveClassifier(); - tess->ResetDocumentDictionary(); + Tesseract& tess = tesseract(); + tess.ResetAdaptiveClassifier(); + tess.ResetDocumentDictionary(); } #endif // !DISABLED_LEGACY_ENGINE @@ -856,8 +850,8 @@ Pix *TessBaseAPI::GetThresholdedImage() { "{} was invoked without a live tesseract thresholder instance: please call SetImage before attempting this method.\n", __func__); - Tesseract* tess = tesseract(); - if (tess->pix_binary() == nullptr) { + Tesseract& tess = tesseract(); + if (tess.pix_binary() == nullptr) { if (verbose_process) { tprintInfo("PROCESS: the source image is not a binary image, hence we apply a thresholding algo/subprocess to obtain a binarized image.\n"); } @@ -866,18 +860,18 @@ Pix *TessBaseAPI::GetThresholdedImage() { if (!Threshold(pix.obtains())) { return nullptr; } - tess->set_pix_binary(pix); // candidate for move semantics + tess.set_pix_binary(pix); // candidate for move semantics - if (tess->tessedit_dump_pageseg_images) { - tess->AddPixDebugPage(tess->pix_binary(), "Thresholded Image result (because it wasn't thresholded yet)"); + if (tess.tessedit_dump_pageseg_images) { + tess.AddPixDebugPage(tess.pix_binary(), "Thresholded Image result (because it wasn't thresholded yet)"); } } - // Image p1 = pixRotate(tess->pix_binary(), 0.15, L_ROTATE_SHEAR, L_BRING_IN_WHITE, 0, 0); + // Image p1 = pixRotate(tess.pix_binary(), 0.15, L_ROTATE_SHEAR, L_BRING_IN_WHITE, 0, 0); // because we want to keep the public API as-is for now, instead of migrating it to using Image type directly, // we downgrade to `PIX *` at the exit point, hence the reponsibility to CLONE is ours: - return tess->pix_binary().clone2pix(); + return tess.pix_binary().clone2pix(); } /** @@ -1127,8 +1121,8 @@ int TessBaseAPI::GetThresholdedImageScaleFactor() const { */ PageIterator *TessBaseAPI::AnalyseLayout(bool merge_similar_words) { if (FindLines() == 0) { - Tesseract *tess = tesseract(); - AutoPopDebugSectionLevel section_handle(tess, tess->PushSubordinatePixDebugSection("Analyse Layout")); + Tesseract& tess = tesseract(); + AutoPopDebugSectionLevel section_handle(tess, tess.PushSubordinatePixDebugSection("Analyse Layout")); if (block_list_->empty()) { return nullptr; // The page was empty. @@ -1151,64 +1145,64 @@ int TessBaseAPI::Recognize() { "{} was invoked without a live tesseract instance: please call Init and/or SetImage before attempting this method.\n", __func__); - Tesseract *tess = tesseract(); + Tesseract& tess = tesseract(); if (FindLines() != 0) { return -1; } - AutoPopDebugSectionLevel section_handle(tess, tess->PushSubordinatePixDebugSection("Recognize (OCR)")); + AutoPopDebugSectionLevel section_handle(tess, tess.PushSubordinatePixDebugSection("Recognize (OCR)")); delete page_res_; if (block_list_->empty()) { - page_res_ = new PAGE_RES(false, block_list_, &tess->prev_word_best_choice_); + page_res_ = new PAGE_RES(false, block_list_, &tess.prev_word_best_choice_); return 0; // Empty page. } - tess->SetBlackAndWhitelist(); + tess.SetBlackAndWhitelist(); recognition_done_ = true; #if !DISABLED_LEGACY_ENGINE - if (tess->tessedit_resegment_from_line_boxes) { + if (tess.tessedit_resegment_from_line_boxes) { if (verbose_process) tprintInfo("PROCESS: Re-segment from line boxes.\n"); - page_res_ = tess->ApplyBoxes(tess->input_file_path_.c_str(), true, block_list_); - } else if (tess->tessedit_resegment_from_boxes) { + page_res_ = tess.ApplyBoxes(tess.input_file_path_.c_str(), true, block_list_); + } else if (tess.tessedit_resegment_from_boxes) { if (verbose_process) tprintInfo("PROCESS: Re-segment from page boxes.\n"); - page_res_ = tess->ApplyBoxes(tess->input_file_path_.c_str(), false, block_list_); + page_res_ = tess.ApplyBoxes(tess.input_file_path_.c_str(), false, block_list_); } else #endif // !DISABLED_LEGACY_ENGINE { if (verbose_process) tprintInfo("PROCESS: Re-segment from LSTM / previous word best choice.\n"); - page_res_ = new PAGE_RES(tess->AnyLSTMLang(), block_list_, &tess->prev_word_best_choice_); + page_res_ = new PAGE_RES(tess.AnyLSTMLang(), block_list_, &tess.prev_word_best_choice_); } if (page_res_ == nullptr) { return -1; } - if (tess->tessedit_train_line_recognizer) { - AutoPopDebugSectionLevel subsection_handle(tess, tess->PushSubordinatePixDebugSection("Train Line Recognizer: Correct Classify Words")); - if (!tess->TrainLineRecognizer(tess->input_file_path_.c_str(), output_file_, block_list_)) { + if (tess.tessedit_train_line_recognizer) { + AutoPopDebugSectionLevel subsection_handle(tess, tess.PushSubordinatePixDebugSection("Train Line Recognizer: Correct Classify Words")); + if (!tess.TrainLineRecognizer(tess.input_file_path_.c_str(), output_file_, block_list_)) { return -1; } - tess->CorrectClassifyWords(page_res_); + tess.CorrectClassifyWords(page_res_); return 0; } #if !DISABLED_LEGACY_ENGINE - if (tess->tessedit_make_boxes_from_boxes) { - AutoPopDebugSectionLevel subsection_handle(tess, tess->PushSubordinatePixDebugSection("Make Boxes From Boxes: Correct Classify Words")); - tess->CorrectClassifyWords(page_res_); + if (tess.tessedit_make_boxes_from_boxes) { + AutoPopDebugSectionLevel subsection_handle(tess, tess.PushSubordinatePixDebugSection("Make Boxes From Boxes: Correct Classify Words")); + tess.CorrectClassifyWords(page_res_); return 0; } #endif // !DISABLED_LEGACY_ENGINE int result = 0; - if (tess->SupportsInteractiveScrollView()) { - AutoPopDebugSectionLevel subsection_handle(tess, tess->PushSubordinatePixDebugSection("PGEditor: Interactive Session")); + if (tess.SupportsInteractiveScrollView()) { + AutoPopDebugSectionLevel subsection_handle(tess, tess.PushSubordinatePixDebugSection("PGEditor: Interactive Session")); #if !GRAPHICS_DISABLED - tess->pgeditor_main(rect_width_, rect_height_, page_res_); + tess.pgeditor_main(rect_width_, rect_height_, page_res_); #endif // !GRAPHICS_DISABLED // The page_res is invalid after an interactive session, so cleanup // in a way that lets us continue to the next page without crashing. @@ -1216,45 +1210,45 @@ int TessBaseAPI::Recognize() { page_res_ = nullptr; return -1; #if !DISABLED_LEGACY_ENGINE - } else if (tess->tessedit_train_from_boxes) { - AutoPopDebugSectionLevel subsection_handle(tess, tess->PushSubordinatePixDebugSection("Train From Boxes")); + } else if (tess.tessedit_train_from_boxes) { + AutoPopDebugSectionLevel subsection_handle(tess, tess.PushSubordinatePixDebugSection("Train From Boxes")); std::string fontname; ExtractFontName(output_file_.c_str(), &fontname); - tess->ApplyBoxTraining(fontname, page_res_); - } else if (tess->tessedit_ambigs_training) { - AutoPopDebugSectionLevel subsection_handle(tess, tess->PushSubordinatePixDebugSection("Train Ambigs")); - FILE *training_output_file = tess->init_recog_training(tess->input_file_path_.c_str()); + tess.ApplyBoxTraining(fontname, page_res_); + } else if (tess.tessedit_ambigs_training) { + AutoPopDebugSectionLevel subsection_handle(tess, tess.PushSubordinatePixDebugSection("Train Ambigs")); + FILE *training_output_file = tess.init_recog_training(tess.input_file_path_.c_str()); // OCR the page segmented into words by tesseract. - tess->recog_training_segmented(tess->input_file_path_.c_str(), page_res_, training_output_file); + tess.recog_training_segmented(tess.input_file_path_.c_str(), page_res_, training_output_file); fclose(training_output_file); #endif // !DISABLED_LEGACY_ENGINE } else { - AutoPopDebugSectionLevel subsection_handle(tess, tess->PushSubordinatePixDebugSection("The Main Recognition Phase")); + AutoPopDebugSectionLevel subsection_handle(tess, tess.PushSubordinatePixDebugSection("The Main Recognition Phase")); if (scrollview_support) { - tess->pgeditor_main(rect_width_, rect_height_, page_res_); + tess.pgeditor_main(rect_width_, rect_height_, page_res_); } // Now run the main recognition. - if (!tess->paragraph_text_based) { - AutoPopDebugSectionLevel subsection_handle(tess, tess->PushSubordinatePixDebugSection("Detect Paragraphs (Before Recognition)")); + if (!tess.paragraph_text_based) { + AutoPopDebugSectionLevel subsection_handle(tess, tess.PushSubordinatePixDebugSection("Detect Paragraphs (Before Recognition)")); DetectParagraphs(false); if (scrollview_support) { - tess->pgeditor_main(rect_width_, rect_height_, page_res_); + tess.pgeditor_main(rect_width_, rect_height_, page_res_); } } - AutoPopDebugSectionLevel subsection_handle2(tess, tess->PushSubordinatePixDebugSection("Recognize All Words")); - if (tess->recog_all_words(page_res_, nullptr, nullptr, 0)) { + AutoPopDebugSectionLevel subsection_handle2(tess, tess.PushSubordinatePixDebugSection("Recognize All Words")); + if (tess.recog_all_words(page_res_, nullptr, nullptr, 0)) { if (scrollview_support) { - tess->pgeditor_main(rect_width_, rect_height_, page_res_); + tess.pgeditor_main(rect_width_, rect_height_, page_res_); } subsection_handle2.pop(); - if (tess->paragraph_text_based) { - AutoPopDebugSectionLevel subsection_handle(tess, tess->PushSubordinatePixDebugSection("Detect Paragraphs (After Recognition)")); + if (tess.paragraph_text_based) { + AutoPopDebugSectionLevel subsection_handle(tess, tess.PushSubordinatePixDebugSection("Detect Paragraphs (After Recognition)")); DetectParagraphs(true); if (scrollview_support) { - tess->pgeditor_main(rect_width_, rect_height_, page_res_); + tess.pgeditor_main(rect_width_, rect_height_, page_res_); } } } else { @@ -1268,38 +1262,38 @@ int TessBaseAPI::Recognize() { void TessBaseAPI::SetInputImage(Pix *pix) { Image img(false, pix); img = img.copy(); - Tesseract *tess = tesseract(); - tess->set_pix_original(img); + Tesseract &tess = tesseract(); + tess.set_pix_original(img); } // Takes ownership of the input pix. void TessBaseAPI::SetInputImage(Image &&pix) { - Tesseract *tess = tesseract(); - tess->set_pix_original(pix); + Tesseract &tess = tesseract(); + tess.set_pix_original(pix); } void TessBaseAPI::SetInputImage(const Image &pix) { - Tesseract *tess = tesseract(); - tess->set_pix_original(pix); + Tesseract &tess = tesseract(); + tess.set_pix_original(pix); } void TessBaseAPI::SetVisibleImage(Pix *pix) { pix_visible_image_ = pixCopy(NULL, pix); - // tess->set_pix_visible_image(pix); + // tess.set_pix_visible_image(pix); } void TessBaseAPI::SetVisibleImage(Image &&pix) { pix_visible_image_ = pix; - // tess->set_pix_visible_image(pix); + // tess.set_pix_visible_image(pix); } void TessBaseAPI::SetVisibleImage(const Image &pix) { pix_visible_image_ = pix; //.clone(); - //tess->set_pix_visible_image(pix); + //tess.set_pix_visible_image(pix); } Pix *TessBaseAPI::GetInputImage() const { - const Tesseract *tess = tesseract(); - return tess->pix_original().clone2pix(); + const Tesseract &tess = tesseract(); + return tess.pix_original().clone2pix(); } static const char* NormalizationProcessModeName(int mode) { @@ -1336,8 +1330,8 @@ static const char *NormalizationTargetModeName(int mode) { // Grayscale normalization (preprocessing) bool TessBaseAPI::NormalizeImage(int mode) { - Tesseract *tess = tesseract(); - AutoPopDebugSectionLevel section_handle(tess, tess->PushSubordinatePixDebugSection("Normalize Image")); + Tesseract& tess = tesseract(); + AutoPopDebugSectionLevel section_handle(tess, tess.PushSubordinatePixDebugSection("Normalize Image")); // Get a clone/copy of the source image rectangle, reduced to normalized greyscale, // and at the same resolution as the output binary. @@ -1356,10 +1350,10 @@ bool TessBaseAPI::NormalizeImage(int mode) { // ... and feed the result into the designated target(s): thresholder and/or tesseract source image (which is used as LSTM v4/v5 engine input). int targets = (mode & 0x03); - bool debug = (tess->debug_image_normalization || tess->tessedit_write_images); + bool debug = (tess.debug_image_normalization || tess.tessedit_write_images); if (false && debug) { - tess->AddPixDebugPage(pix, fmt::format("Grayscale normalization mode = {} ({} ({}) + {} ({}))", mode, NormalizationProcessModeName(process), process, NormalizationTargetModeName(targets), targets)); + tess.AddPixDebugPage(pix, fmt::format("Grayscale normalization mode = {} ({} ({}) + {} ({}))", mode, NormalizationProcessModeName(process), process, NormalizationTargetModeName(targets), targets)); } switch (process) { @@ -1386,7 +1380,7 @@ bool TessBaseAPI::NormalizeImage(int mode) { } if (debug) { - tess->AddPixDebugPage(result_pix, fmt::format("Grayscale normalization mode = {} ({} ({}) + {} ({}))", mode, NormalizationProcessModeName(process), process, NormalizationTargetModeName(targets), targets)); + tess.AddPixDebugPage(result_pix, fmt::format("Grayscale normalization mode = {} ({} ({}) + {} ({}))", mode, NormalizationProcessModeName(process), process, NormalizationTargetModeName(targets), targets)); } switch (targets) { @@ -1426,9 +1420,9 @@ const char *TessBaseAPI::GetInputName() { "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", __func__); - Tesseract *tess = tesseract(); - if (!tess->input_file_path_.empty()) { - return tess->input_file_path_.c_str(); + Tesseract &tess = tesseract(); + if (!tess.input_file_path_.empty()) { + return tess.input_file_path_.c_str(); } return nullptr; } @@ -1441,8 +1435,8 @@ const char * TessBaseAPI::GetVisibleImageFilename() { } const char *TessBaseAPI::GetDatapath() { - Tesseract *tess = tesseract(); - return tess->datadir_.c_str(); + Tesseract &tess = tesseract(); + return tess.datadir_.c_str(); } int TessBaseAPI::GetSourceYResolution() { @@ -1465,8 +1459,8 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, if (!flist && !buf) { return false; } - Tesseract *tess = tesseract(); - int page_number = (tess->tessedit_page_number >= 0) ? tess->tessedit_page_number : 0; + Tesseract& tess = tesseract(); + int page_number = (tess.tessedit_page_number >= 0) ? tess.tessedit_page_number : 0; char pagename[MAX_PATH]; std::vector lines; @@ -1516,7 +1510,7 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, return false; } tprintInfo("Processing page #{} : {}\n", page_number + 1, pagename); - tess->applybox_page.set_value(page_number); + tess.applybox_page.set_value(page_number); bool r = ProcessPage(pix, pagename, renderer); bool two_pass = false; @@ -1531,7 +1525,7 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, SetPageSegMode(PSM_SINGLE_BLOCK); // Set thresholding method to 0 for second pass regardless - tess->thresholding_method = (int)ThresholdMethod::Otsu; + tess.thresholding_method = (int)ThresholdMethod::Otsu; // SetPageSegMode(PSM_SPARSE_TEXT); SetImage(newpix); @@ -1549,7 +1543,7 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, if (!r) { return false; } - if (tess->tessedit_page_number >= 0) { + if (tess.tessedit_page_number >= 0) { break; } ++page_number; @@ -1567,12 +1561,12 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data, size_t size, const char *filename, TessResultRenderer *renderer) { Image pix; - Tesseract *tess = tesseract(); - int page_number = (tess->tessedit_page_number >= 0) ? tess->tessedit_page_number : 0; + Tesseract& tess = tesseract(); + int page_number = (tess.tessedit_page_number >= 0) ? tess.tessedit_page_number : 0; size_t offset = 0; for (;; ++page_number) { - if (tess->tessedit_page_number >= 0) { - page_number = tess->tessedit_page_number; + if (tess.tessedit_page_number >= 0) { + page_number = tess.tessedit_page_number; pix = (data) ? pixReadMemTiff(data, size, page_number) : pixReadTiff(filename, page_number); } else { pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset) @@ -1582,12 +1576,12 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data, size_t size, co break; } tprintInfo("Processing page #{} of multipage TIFF {}\n", page_number + 1, filename ? filename : "(from internal storage)"); - tess->applybox_page.set_value(page_number); + tess.applybox_page.set_value(page_number); bool r = ProcessPage(pix, filename, renderer); if (!r) { return false; } - if (tess->tessedit_page_number >= 0) { + if (tess.tessedit_page_number >= 0) { break; } if (!offset) { @@ -1601,13 +1595,13 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data, size_t size, co // processing required due to being in a training mode. bool TessBaseAPI::ProcessPages(const char *filename, TessResultRenderer *renderer) { - Tesseract *tess = tesseract(); - AutoPopDebugSectionLevel section_handle(tess, tess->PushSubordinatePixDebugSection("Process pages")); + Tesseract& tess = tesseract(); + AutoPopDebugSectionLevel section_handle(tess, tess.PushSubordinatePixDebugSection("Process pages")); bool result = ProcessPagesInternal(filename, renderer); #if !DISABLED_LEGACY_ENGINE if (result) { - if (tess->tessedit_train_from_boxes && !tess->WriteTRFile(output_file_.c_str())) { + if (tess.tessedit_train_from_boxes && !tess.WriteTRFile(output_file_.c_str())) { tprintError("Write of TR file failed: {}\n", output_file_.c_str()); return false; } @@ -1638,7 +1632,7 @@ static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, voi // stdin. We'll still do our best if the user likes pipes. bool TessBaseAPI::ProcessPagesInternal(const char *filename, TessResultRenderer *renderer) { - Tesseract *tess = tesseract(); + Tesseract &tess = tesseract(); bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "/dev/stdin") || !strcmp(filename, "-"); if (stdInput) { #if defined(WIN32) || defined(_WIN32) || defined(_WIN64) @@ -1786,7 +1780,7 @@ bool TessBaseAPI::ProcessPagesInternal(const char *filename, r = ProcessPagesMultipageTiff(data, buf.size(), filename, renderer); } else { - tess->applybox_page.set_value(-1 /* all pages */); + tess.applybox_page.set_value(-1 /* all pages */); r = ProcessPage(pix, filename, renderer); } @@ -1801,8 +1795,8 @@ bool TessBaseAPI::ProcessPagesInternal(const char *filename, bool TessBaseAPI::ProcessPage(Pix *pix, const char *filename, TessResultRenderer *renderer) { - Tesseract *tess = tesseract(); - AutoPopDebugSectionLevel page_level_handle(tess, tess->PushSubordinatePixDebugSection(fmt::format("Process a single page: page #{}", 1 + tess->tessedit_page_number))); + Tesseract& tess = tesseract(); + AutoPopDebugSectionLevel page_level_handle(tess, tess.PushSubordinatePixDebugSection(fmt::format("Process a single page: page #{}", 1 + tess.tessedit_page_number))); //page_level_handle.SetAsRootLevelForParamUsageReporting(); SetInputName(filename); @@ -1828,7 +1822,7 @@ bool TessBaseAPI::ProcessPage(Pix *pix, const char *filename, // Image preprocessing on image // Grayscale normalization - int graynorm_mode = tess->preprocess_graynorm_mode; + int graynorm_mode = tess.preprocess_graynorm_mode; { bool rc = NormalizeImage(graynorm_mode); if (!rc) @@ -1839,21 +1833,21 @@ bool TessBaseAPI::ProcessPage(Pix *pix, const char *filename, bool failed = false; - if (tess->tessedit_pageseg_mode == PSM_AUTO_ONLY) { + if (tess.tessedit_pageseg_mode == PSM_AUTO_ONLY) { // Disabled character recognition if (! std::unique_ptr(AnalyseLayout())) { failed = true; } - } else if (tess->tessedit_pageseg_mode == PSM_OSD_ONLY) { + } else if (tess.tessedit_pageseg_mode == PSM_OSD_ONLY) { failed = (FindLines() != 0); } else { // Normal layout and character recognition. failed = (Recognize() < 0); } - if (tess->tessedit_write_images) { + if (tess.tessedit_write_images) { Image page_pix = GetThresholdedImage(); - tess->AddPixDebugPage(page_pix, fmt::format("processed page #{} : text recog done", 1 + tess->tessedit_page_number)); + tess.AddPixDebugPage(page_pix, fmt::format("processed page #{} : text recog done", 1 + tess.tessedit_page_number)); } if (renderer && !failed) { @@ -1970,8 +1964,8 @@ std::tuple TessBaseAPI::GetTableBoundingBox(unsigned i) return std::tuple(0, 0, 0, 0); } - Tesseract *tess = tesseract(); - const int height = tess->ImageHeight(); + Tesseract &tess = tesseract(); + const int height = tess.ImageHeight(); return std::make_tuple( t[i].box.left(), height - t[i].box.top(), @@ -1986,9 +1980,9 @@ std::vector> TessBaseAPI::GetTableRows(unsigned i) return std::vector>(); } - Tesseract *tess = tesseract(); + Tesseract &tess = tesseract(); std::vector> rows(t[i].rows.size()); - const int height = tess->ImageHeight(); + const int height = tess.ImageHeight(); for (unsigned j = 0; j < t[i].rows.size(); ++j) { rows[j] = @@ -2007,9 +2001,9 @@ std::vector> TessBaseAPI::GetTableCols(unsigned i) return std::vector>(); } - Tesseract *tess = tesseract(); + Tesseract &tess = tesseract(); std::vector> cols(t[i].cols.size()); - const int height = tess->ImageHeight(); + const int height = tess.ImageHeight(); for (unsigned j = 0; j < t[i].cols.size(); ++j) { cols[j] = @@ -2290,7 +2284,7 @@ char *TessBaseAPI::GetUNLVText() { if (!recognition_done_ && Recognize() < 0) { return nullptr; } - Tesseract *tess = tesseract(); + Tesseract &tess = tesseract(); bool tilde_crunch_written = false; bool last_char_was_newline = true; @@ -2325,7 +2319,7 @@ char *TessBaseAPI::GetUNLVText() { } else { // NORMAL PROCESSING of non tilde crunched words. tilde_crunch_written = false; - tess->set_unlv_suspects(word); + tess.set_unlv_suspects(word); const char *wordstr = word->best_choice->unichar_string().c_str(); const auto &lengths = word->best_choice->unichar_lengths(); int length = lengths.length(); @@ -2525,14 +2519,14 @@ int *TessBaseAPI::AllWordConfidences() { */ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char *wordstr) { bool success = true; - Tesseract *tess = tesseract(); + Tesseract& tess = tesseract(); PageSegMode current_psm = GetPageSegMode(); SetPageSegMode(mode); - tess->classify_enable_learning = false; + tess.classify_enable_learning = false; const std::unique_ptr text(GetUTF8Text()); - if (tess->applybox_debug) { + if (tess.applybox_debug) { tprintDebug("Trying to adapt \"{}\" to \"{}\"\n", text.get(), wordstr); } if (text != nullptr) { @@ -2559,9 +2553,9 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char *wordstr) { // No match. delete page_res_; std::vector boxes; - page_res_ = tess->SetupApplyBoxes(boxes, block_list_); - tess->ReSegmentByClassification(page_res_); - tess->TidyUp(page_res_); + page_res_ = tess.SetupApplyBoxes(boxes, block_list_); + tess.ReSegmentByClassification(page_res_); + tess.TidyUp(page_res_); PAGE_RES_IT pr_it(page_res_); if (pr_it.word() == nullptr) { success = false; @@ -2572,8 +2566,8 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char *wordstr) { word_res->BestChoiceToCorrectText(); } if (success) { - tess->EnableLearning = true; - tess->LearnWord(nullptr, word_res); + tess.EnableLearning = true; + tess.LearnWord(nullptr, word_res); } } else { success = false; @@ -2666,7 +2660,7 @@ void TessBaseAPI::WipeSqueakyCleanForReUse() { * Once End() has been used, none of the other API functions may be used * other than Init and anything declared above it in the class definition. * - * All `Tesseract*` reference pointers produced by the tesseract() API are invalid + * All `Tesseract&` references produced by the tesseract() API are invalid * after this call. If you don't want that, i.e. wish to use tesseract * some more, than consider using the new WipeSqueakyCleanForReUse() API * instead: that one DOES NOT invalidate the active Tesseract instance @@ -2702,13 +2696,13 @@ void TessBaseAPI::ClearPersistentCache() { * returns 0 if the word is invalid, non-zero if valid */ int TessBaseAPI::IsValidWord(const char *word) const { - Tesseract *tess = const_cast(tesseract()); - return tess->getDict().valid_word(word); + Tesseract &tess = const_cast(tesseract()); + return tess.getDict().valid_word(word); } // Returns true if utf8_character is defined in the UniCharset. bool TessBaseAPI::IsValidCharacter(const char *utf8_character) const { - const Tesseract *tess = tesseract(); - return tess->unicharset_.contains_unichar(utf8_character); + const Tesseract &tess = tesseract(); + return tess.unicharset_.contains_unichar(utf8_character); } // TODO(rays) Obsolete this function and replace with a more aptly named @@ -2750,8 +2744,8 @@ bool TessBaseAPI::GetTextDirection(int *out_offset, float *out_slope) { /** Sets Dict::letter_is_okay_ function to point to the given function. */ void TessBaseAPI::SetDictFunc(DictFunc f) { if (tesseract_ != nullptr) { - Tesseract *tess = tesseract(); - tess->getDict().letter_is_okay_ = f; + Tesseract &tess = tesseract(); + tess.getDict().letter_is_okay_ = f; } } @@ -2768,12 +2762,12 @@ void TessBaseAPI::SetProbabilityInContextFunc(ProbabilityInContextFunc f) { "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", __func__); - Tesseract *tess = tesseract(); - tess->getDict().probability_in_context_ = f; + Tesseract &tess = tesseract(); + tess.getDict().probability_in_context_ = f; // Set it for the sublangs too. - int num_subs = tess->num_sub_langs(); + int num_subs = tess.num_sub_langs(); for (int i = 0; i < num_subs; ++i) { - tess->get_sub_lang(i)->getDict().probability_in_context_ = f; + tess.get_sub_lang(i)->getDict().probability_in_context_ = f; } } @@ -2800,13 +2794,13 @@ bool TessBaseAPI::InternalResetImage() { * The usual argument to Threshold is Tesseract::mutable_pix_binary(). */ bool TessBaseAPI::Threshold(Pix **pix) { - Tesseract *tess = tesseract(); + Tesseract& tess = tesseract(); ASSERT_HOST(pix != nullptr); if (*pix != nullptr) { pixDestroy(pix); } // Zero resolution messes up the algorithms, so make sure it is credible. - int user_dpi = tess->user_defined_dpi; + int user_dpi = tess.user_defined_dpi; int y_res = thresholder_->GetScaledYResolution(); if (user_dpi && (user_dpi < kMinCredibleResolution || user_dpi > kMaxCredibleResolution)) { tprintWarn( @@ -2831,11 +2825,11 @@ bool TessBaseAPI::Threshold(Pix **pix) { return false; } - auto selected_thresholding_method = static_cast(static_cast(tess->thresholding_method)); + auto selected_thresholding_method = static_cast(static_cast(tess.thresholding_method)); Image pix_binary; std::string caption = ThresholdMethodName(selected_thresholding_method); - AutoPopDebugSectionLevel subsec_handle(tess, tess->PushSubordinatePixDebugSection(fmt::format("Applying the threshold method chosen for this run: {}: {}", selected_thresholding_method, caption))); + AutoPopDebugSectionLevel subsec_handle(tess, tess.PushSubordinatePixDebugSection(fmt::format("Applying the threshold method chosen for this run: {}: {}", selected_thresholding_method, caption))); if (selected_thresholding_method == ThresholdMethod::Otsu) { pix_binary = pix; @@ -2846,11 +2840,11 @@ bool TessBaseAPI::Threshold(Pix **pix) { *pix = pix_binary.clone2pix(); if (!thresholder_->IsBinary()) { - tess->set_pix_thresholds(thresholder_->GetPixRectThresholds()); - tess->set_pix_grey(thresholder_->GetPixRectGrey()); + tess.set_pix_thresholds(thresholder_->GetPixRectThresholds()); + tess.set_pix_grey(thresholder_->GetPixRectGrey()); } else { - tess->set_pix_thresholds(nullptr); - tess->set_pix_grey(nullptr); + tess.set_pix_thresholds(nullptr); + tess.set_pix_grey(nullptr); } } else { auto [ok, pix_grey, pix_binary2, pix_thresholds] = thresholder_->Threshold(selected_thresholding_method); @@ -2862,19 +2856,19 @@ bool TessBaseAPI::Threshold(Pix **pix) { pix_binary = pix_binary2; *pix = pix_binary.clone2pix(); - tess->set_pix_thresholds(pix_thresholds); // candidates for move semantics - tess->set_pix_grey(pix_grey); + tess.set_pix_thresholds(pix_thresholds); // candidates for move semantics + tess.set_pix_grey(pix_grey); // pix_thresholds.destroy(); // pix_grey.destroy(); } - if (tess->tessedit_dump_pageseg_images) { - tess->AddPixDebugPage(tess->pix_grey(), fmt::format("{} : Grey = pre-image", caption)); - tess->AddPixDebugPage(tess->pix_thresholds(), fmt::format("{} : Thresholds", caption)); + if (tess.tessedit_dump_pageseg_images) { + tess.AddPixDebugPage(tess.pix_grey(), fmt::format("{} : Grey = pre-image", caption)); + tess.AddPixDebugPage(tess.pix_thresholds(), fmt::format("{} : Thresholds", caption)); if (verbose_process) { tprintInfo("PROCESS: The 'Thresholds' image displays the per-pixel grey level which will be used to decide which pixels are *foreground* (text, probably) and which pixels are *background* (i.e. the *paper* the text was printed on); you'll note that each pixel in the original (greyscale!) image which is darker than its corresponding threshold level is *binarized* to black (foreground in tesseract) while any lighter pixel is *binarized* to white (background in tesseract).\n"); } - tess->AddPixDebugPage(pix_binary, fmt::format("{} : Binary = post-image", caption)); + tess.AddPixDebugPage(pix_binary, fmt::format("{} : Binary = post-image", caption)); } // demo a bit of pre-postprocessing @@ -2882,32 +2876,32 @@ bool TessBaseAPI::Threshold(Pix **pix) { const char *sequence = "c1.1 + d3.3"; const int dispsep = 0; Image pix_post = pixMorphSequence(pix_binary, sequence, dispsep); - tess->AddPixCompedOverOrigDebugPage(pix_post, fmt::format("{} : post-processed: {} -- just an example to showcase what leptonica can do for us!", caption, sequence)); + tess.AddPixCompedOverOrigDebugPage(pix_post, fmt::format("{} : post-processed: {} -- just an example to showcase what leptonica can do for us!", caption, sequence)); l_int32 w, h, d; - Image composite = tess->pix_grey().copy(); + Image composite = tess.pix_grey().copy(); pixGetDimensions(composite, &w, &h, &d); Image mask = pixConvert1To8(nullptr, pix_post, 255, 0); pixRasterop(composite, 0, 0, w, h, PIX_PAINT, mask, 0, 0); - tess->AddPixCompedOverOrigDebugPage(composite, fmt::format("{} : post-processed & masked with: {} -- this should remove all image noise that's not very close to the text, i.e. is considered *not part of the text to OCR*.", caption, sequence)); + tess.AddPixCompedOverOrigDebugPage(composite, fmt::format("{} : post-processed & masked with: {} -- this should remove all image noise that's not very close to the text, i.e. is considered *not part of the text to OCR*.", caption, sequence)); - Image noise1 = pixEmphasizeImageNoise(tess->pix_original().ptr()); - Image noise2 = pixEmphasizeImageNoise(tess->pix_grey().ptr()); + Image noise1 = pixEmphasizeImageNoise(tess.pix_original().ptr()); + Image noise2 = pixEmphasizeImageNoise(tess.pix_grey().ptr()); Image noise3 = pixEmphasizeImageNoise(composite.ptr()); Image noise4 = pixEmphasizeImageNoise(pix_post.ptr()); - tess->AddPixCompedOverOrigDebugPage(noise1, fmt::format("{} : post-processed :: noise emphasis A: emphasized the noise inherent in the source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); - tess->AddPixCompedOverOrigDebugPage(noise2, fmt::format("{} : post-processed :: noise emphasis B: emphasized the noise inherent in the greyscaled / normalized source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); - tess->AddPixCompedOverOrigDebugPage(noise3, fmt::format("{} : post-processed :: noise emphasis C: emphasized the noise inherent in the composited image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); - tess->AddPixCompedOverOrigDebugPage(noise4, fmt::format("{} : post-processed :: noise emphasis D: emphasized the noise inherent in the closed & binarized / thresholded source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); + tess.AddPixCompedOverOrigDebugPage(noise1, fmt::format("{} : post-processed :: noise emphasis A: emphasized the noise inherent in the source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); + tess.AddPixCompedOverOrigDebugPage(noise2, fmt::format("{} : post-processed :: noise emphasis B: emphasized the noise inherent in the greyscaled / normalized source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); + tess.AddPixCompedOverOrigDebugPage(noise3, fmt::format("{} : post-processed :: noise emphasis C: emphasized the noise inherent in the composited image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); + tess.AddPixCompedOverOrigDebugPage(noise4, fmt::format("{} : post-processed :: noise emphasis D: emphasized the noise inherent in the closed & binarized / thresholded source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); - noise1 = pixEmphasizeImageNoise2(tess->pix_original().ptr()); - noise2 = pixEmphasizeImageNoise2(tess->pix_grey().ptr()); + noise1 = pixEmphasizeImageNoise2(tess.pix_original().ptr()); + noise2 = pixEmphasizeImageNoise2(tess.pix_grey().ptr()); noise3 = pixEmphasizeImageNoise2(composite.ptr()); noise4 = pixEmphasizeImageNoise2(pix_post.ptr()); - tess->AddPixCompedOverOrigDebugPage(noise1, fmt::format("{} : post-processed :: noise emphasis E: emphasized the noise inherent in the source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); - tess->AddPixCompedOverOrigDebugPage(noise2, fmt::format("{} : post-processed :: noise emphasis F: emphasized the noise inherent in the greyscaled / normalized source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); - tess->AddPixCompedOverOrigDebugPage(noise3, fmt::format("{} : post-processed :: noise emphasis G: emphasized the noise inherent in the composited image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); - tess->AddPixCompedOverOrigDebugPage(noise4, fmt::format("{} : post-processed :: noise emphasis H: emphasized the noise inherent in the closed & binarized / thresholded source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); + tess.AddPixCompedOverOrigDebugPage(noise1, fmt::format("{} : post-processed :: noise emphasis E: emphasized the noise inherent in the source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); + tess.AddPixCompedOverOrigDebugPage(noise2, fmt::format("{} : post-processed :: noise emphasis F: emphasized the noise inherent in the greyscaled / normalized source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); + tess.AddPixCompedOverOrigDebugPage(noise3, fmt::format("{} : post-processed :: noise emphasis G: emphasized the noise inherent in the composited image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); + tess.AddPixCompedOverOrigDebugPage(noise4, fmt::format("{} : post-processed :: noise emphasis H: emphasized the noise inherent in the closed & binarized / thresholded source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); if (false) { // NOTE/WARNING: if you want to pick up one of these processed images as the replacement `*pix` then you MUST @@ -2952,7 +2946,7 @@ bool TessBaseAPI::Threshold(Pix **pix) { "Corrected to {}.\n", thresholder_->GetScaledEstimatedResolution(), estimated_res); } - tess->set_source_resolution(estimated_res); + tess.set_source_resolution(estimated_res); (void)Monitor().bump_progress().exec_progress_func(); @@ -2978,13 +2972,13 @@ int TessBaseAPI::FindLines() { return 0; } ASSERT0(tesseract_ != nullptr); - Tesseract *tess = tesseract(); + Tesseract& tess = tesseract(); #if 0 #if !DISABLED_LEGACY_ENGINE - tesseract_->InitAdaptiveClassifier(nullptr); + tess.InitAdaptiveClassifier(nullptr); #endif #endif - if (tess->pix_binary() == nullptr) { + if (tess.pix_binary() == nullptr) { if (verbose_process) { tprintInfo("PROCESS: the source image is not a binary image, hence we apply a thresholding algo/subprocess to obtain a binarized image.\n"); } @@ -2993,31 +2987,31 @@ int TessBaseAPI::FindLines() { if (!Threshold(pix.obtains())) { return -1; } - tess->set_pix_binary(pix); + tess.set_pix_binary(pix); } - if (tess->tessedit_dump_pageseg_images) { - tess->AddPixDebugPage(tess->pix_binary(), "FindLines :: Thresholded Image -- this image is now set as the page Master Source Image for this activity"); + if (tess.tessedit_dump_pageseg_images) { + tess.AddPixDebugPage(tess.pix_binary(), "FindLines :: Thresholded Image -- this image is now set as the page Master Source Image for this activity"); } if (verbose_process) { tprintInfo("PROCESS: prepare the image for page segmentation, i.e. discovery of all text areas + bounding boxes & image/text orientation and script{} detection.\n", - (tess->textord_equation_detect ? " + equations" : "")); + (tess.textord_equation_detect ? " + equations" : "")); } - AutoPopDebugSectionLevel section_handle(tess, tess->PushSubordinatePixDebugSection("Prepare for Page Segmentation")); + AutoPopDebugSectionLevel section_handle(tess, tess.PushSubordinatePixDebugSection("Prepare for Page Segmentation")); - tess->PrepareForPageseg(); + tess.PrepareForPageseg(); #if !DISABLED_LEGACY_ENGINE - if (tess->textord_equation_detect) { + if (tess.textord_equation_detect) { if (equ_detect_ == nullptr && !datapath_.empty()) { equ_detect_ = new EquationDetect(*this, datapath_.c_str()); } if (equ_detect_ == nullptr) { tprintWarn("Could not set equation detector\n"); } else { - tess->SetEquationDetect(equ_detect_); + tess.SetEquationDetect(equ_detect_); } } #endif // !DISABLED_LEGACY_ENGINE @@ -3029,7 +3023,7 @@ int TessBaseAPI::FindLines() { #endif OSResults osr; #if !DISABLED_LEGACY_ENGINE - if (PSM_OSD_ENABLED(tess->tessedit_pageseg_mode) && osd_tess == nullptr) { + if (PSM_OSD_ENABLED(tess.tessedit_pageseg_mode) && osd_tess == nullptr) { if (strcmp(language_.c_str(), "osd") == 0) { osd_tess = tesseract_; } else { @@ -3057,13 +3051,13 @@ int TessBaseAPI::FindLines() { } #endif // !DISABLED_LEGACY_ENGINE - if (tess->SegmentPage(tess->input_file_path_.c_str(), block_list_, osd_tess, &osr) < 0) { + if (tess.SegmentPage(tess.input_file_path_.c_str(), block_list_, osd_tess, &osr) < 0) { return -1; } // If Devanagari is being recognized, we use different images for page seg // and for OCR. - tess->PrepareForTessOCR(block_list_, &osr); + tess.PrepareForTessOCR(block_list_, &osr); return 0; } @@ -3072,8 +3066,8 @@ int TessBaseAPI::FindLines() { * Return average gradient of lines on page. */ float TessBaseAPI::GetGradient() { - Tesseract *tess = tesseract(); - return tess->gradient(); + Tesseract &tess = tesseract(); + return tess.gradient(); } /** Delete the pageres and clear the block list ready for a new page. */ @@ -3153,25 +3147,25 @@ bool TessBaseAPI::DetectOS(OSResults *osr) { "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", __func__); ClearResults(); - Tesseract *tess = tesseract(); - if (tess->pix_binary() == nullptr) { + Tesseract& tess = tesseract(); + if (tess.pix_binary() == nullptr) { Image pix; if (!Threshold(pix.obtains())) { return false; } - tess->set_pix_binary(pix); // candidate for move semantics + tess.set_pix_binary(pix); // candidate for move semantics - if (tess->tessedit_write_images) - tess->AddPixDebugPage(tess->pix_binary(), "DetectOS (Orientation And Script) : Thresholded Image"); + if (tess.tessedit_write_images) + tess.AddPixDebugPage(tess.pix_binary(), "DetectOS (Orientation And Script) : Thresholded Image"); } - return tess->orientation_and_script_detection(tess->input_file_path_.c_str(), osr) > 0; + return tess.orientation_and_script_detection(tess.input_file_path_.c_str(), osr) > 0; } #endif // !DISABLED_LEGACY_ENGINE void TessBaseAPI::set_min_orientation_margin(double margin) { - Tesseract *tess = tesseract(); - tess->min_orientation_margin.set_value(margin); + Tesseract &tess = tesseract(); + tess.min_orientation_margin.set_value(margin); } /** @@ -3234,14 +3228,14 @@ void TessBaseAPI::GetBlockTextOrientations(int **block_orientation, bool **verti } void TessBaseAPI::DetectParagraphs(bool after_text_recognition) { - Tesseract *tess = tesseract(); + Tesseract& tess = tesseract(); if (paragraph_models_ == nullptr) { paragraph_models_ = new std::vector; } MutableIterator *result_it = GetMutableIterator(); do { // Detect paragraphs for this block std::vector models; - tess->DetectParagraphs(after_text_recognition, result_it, &models); + tess.DetectParagraphs(after_text_recognition, result_it, &models); paragraph_models_->insert(paragraph_models_->end(), models.begin(), models.end()); } while (result_it->Next(RIL_BLOCK)); delete result_it; @@ -3249,8 +3243,8 @@ void TessBaseAPI::DetectParagraphs(bool after_text_recognition) { /** This method returns the string form of the specified unichar. */ const char *TessBaseAPI::GetUnichar(int unichar_id) const { - const Tesseract *tess = tesseract(); - return tess->unicharset_.id_to_unichar(unichar_id); + const Tesseract &tess = tesseract(); + return tess.unicharset_.id_to_unichar(unichar_id); } /** Return the pointer to the i-th dawg loaded into tesseract_ object. */ @@ -3262,8 +3256,8 @@ const Dawg *TessBaseAPI::GetDawg(int i) const { if (i >= NumDawgs()) { return nullptr; } - Tesseract *tess = const_cast(tesseract()); - return tess->getDict().GetDawg(i); + Tesseract &tess = const_cast(tesseract()); + return tess.getDict().GetDawg(i); } /** Return the number of dawgs loaded into tesseract_ object. */ @@ -3271,8 +3265,8 @@ int TessBaseAPI::NumDawgs() const { ASSERT_HOST_MSG(tesseract_ != nullptr, "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", __func__); - Tesseract *tess = const_cast(tesseract()); - return tess->getDict().NumDawgs(); + Tesseract &tess = const_cast(tesseract()); + return tess.getDict().NumDawgs(); } @@ -3282,8 +3276,8 @@ void TessBaseAPI::ReportDebugInfo() { "live tesseract instance: you may have a bug that looses a " "lot of tesseract diagnostics info + reporting for you.\n", __func__); - Tesseract *tess = tesseract(); - tess->ReportDebugInfo(); + Tesseract &tess = tesseract(); + tess.ReportDebugInfo(); } void TessBaseAPI::FinalizeAndWriteDiagnosticsReport() { @@ -3292,8 +3286,8 @@ void TessBaseAPI::FinalizeAndWriteDiagnosticsReport() { "live tesseract instance: you may have a bug that looses a " "lot of tesseract diagnostics info + reporting for you.\n", __func__); - Tesseract *tess = tesseract(); - tess->ReportDebugInfo(); + Tesseract &tess = tesseract(); + tess.ReportDebugInfo(); } /** Escape a char string - replace <>&"' with HTML codes. */ @@ -3324,20 +3318,20 @@ std::string HOcrEscape(const char *text) { return ret; } -const Tesseract *TessBaseAPI::tesseract() const { +const Tesseract &TessBaseAPI::tesseract() const { if (tesseract_ == nullptr) { TessBaseAPI &owner = const_cast(*this); tesseract_ = new tesseract::Tesseract(owner, nullptr); } - return tesseract_; + return *tesseract_; } -Tesseract *TessBaseAPI::tesseract() { +Tesseract &TessBaseAPI::tesseract() { if (tesseract_ == nullptr) { TessBaseAPI &owner = *this; tesseract_ = new tesseract::Tesseract(owner, nullptr); } - return tesseract_; + return *tesseract_; } std::string mkUniqueOutputFilePath(const char *basepath, int page_number, const char *label, const char *filename_extension) { diff --git a/src/api/pdfrenderer.cpp b/src/api/pdfrenderer.cpp index 204e20081f..cfd82bedf2 100644 --- a/src/api/pdfrenderer.cpp +++ b/src/api/pdfrenderer.cpp @@ -923,7 +923,7 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI *api) { if (!textonly_) { char *pdf_object = nullptr; - int jpg_quality = api->tesseract()->jpg_quality; + int jpg_quality = api->tesseract().jpg_quality; if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize, jpg_quality)) { if (destroy_pix) { diff --git a/src/ccmain/tesseractclass.cpp b/src/ccmain/tesseractclass.cpp index aa555debb0..d7520276cc 100644 --- a/src/ccmain/tesseractclass.cpp +++ b/src/ccmain/tesseractclass.cpp @@ -467,7 +467,7 @@ Tesseract::Tesseract(TessBaseAPI &owner, Tesseract *parent) , BOOL_MEMBER(debug_output_diagnostics_HTML, false, "Write the debug/diagnostics output to a HTML file, including the collected images of the various process stages inside tesseract. The content is equivalent to the debug info you see on stderr, but in a nicely formatted and easier to grok modern format. Also handy for sharing your sessions' diagnostics with others. The output filename is derived from the source image name and output base path.", params()), INT_MEMBER(debug_output_diagnostics_images_format, IMG4W_WEBP_LOSSLESS, "The format of the images included in the debug/diagnostics output HTML file. Specify a number: 0:PNG, 1:JPEG, 2:WebP, 3:lossless-WebP, 4:TIFF. While we support TIFF and higher numbers, it is ill-advised to use as web browsers won't support those out of the box and choosing those formats will strongly and *negatively* impact your HTML diagnostics viewing experience. Tip: use PNG or JPEG if you want the output to be produced faster, lossless-WEBP if you want smaller image files with maximum precision. Set the jpeg_quality parameter for any of these formats for targeted compression ratio.", params()) - , pixa_debug_(this) + , pixa_debug_(*this) , splitter_(this) , image_finder_(this) , line_finder_(this) diff --git a/src/ccmain/thresholder.cpp b/src/ccmain/thresholder.cpp index 1c30d8a5db..bf4a36d9c0 100644 --- a/src/ccmain/thresholder.cpp +++ b/src/ccmain/thresholder.cpp @@ -41,7 +41,7 @@ namespace tesseract { -ImageThresholder::ImageThresholder(Tesseract* tess) +ImageThresholder::ImageThresholder(Tesseract& tess) : tesseract_(tess) , pix_(nullptr) , image_width_(0) @@ -51,7 +51,6 @@ ImageThresholder::ImageThresholder(Tesseract* tess) , scale_(1) , yres_(300) , estimated_res_(300) { - ASSERT0(tess != nullptr); SetRectangle(0, 0, 0, 0); } @@ -216,14 +215,14 @@ std::tuple ImageThresholder::Threshold(ThresholdMetho l_int32 pix_w, pix_h; pixGetDimensions(pix_ /* pix_grey */, &pix_w, &pix_h, nullptr); - if (tesseract_->thresholding_debug) { + if (tesseract_.thresholding_debug) { tprintDebug("\nimage width: {} height: {} ppi: {}\n", pix_w, pix_h, yres_); } switch (method) { case ThresholdMethod::Sauvola: { int window_size; - window_size = tesseract_->thresholding_window_size * yres_; + window_size = tesseract_.thresholding_window_size * yres_; window_size = std::max(7, window_size); window_size = std::min(pix_w < pix_h ? pix_w - 3 : pix_h - 3, window_size); int half_window_size = window_size / 2; @@ -242,10 +241,10 @@ std::tuple ImageThresholder::Threshold(ThresholdMetho ny = pix_h / (half_window_size + 2); } - double kfactor = tesseract_->thresholding_kfactor; + double kfactor = tesseract_.thresholding_kfactor; kfactor = std::max(0.0, kfactor); - if (tesseract_->thresholding_debug) { + if (tesseract_.thresholding_debug) { tprintDebug("Sauvola thresholding: window size: {} kfactor: {} nx: {} ny: {}\n", window_size, kfactor, nx, ny); } @@ -274,19 +273,19 @@ std::tuple ImageThresholder::Threshold(ThresholdMetho case ThresholdMethod::LeptonicaOtsu: { int tile_size; - double tile_size_factor = tesseract_->thresholding_tile_size; + double tile_size_factor = tesseract_.thresholding_tile_size; tile_size = tile_size_factor * yres_; tile_size = std::max(16, tile_size); int smooth_size; - double smooth_size_factor = tesseract_->thresholding_smooth_kernel_size; + double smooth_size_factor = tesseract_.thresholding_smooth_kernel_size; smooth_size_factor = std::max(0.0, smooth_size_factor); smooth_size = smooth_size_factor * yres_; int half_smooth_size = smooth_size / 2; - double score_fraction = tesseract_->thresholding_score_fraction; + double score_fraction = tesseract_.thresholding_score_fraction; - if (tesseract_->thresholding_debug) { + if (tesseract_.thresholding_debug) { tprintDebug("LeptonicaOtsu thresholding: tile size: {}, smooth_size: {}, score_fraction: {}\n", tile_size, smooth_size, score_fraction); } @@ -339,13 +338,13 @@ std::tuple ImageThresholder::Threshold(ThresholdMetho bool ImageThresholder::ThresholdToPix(Image *pix) { // tolerate overlarge images when they're about to be cropped by GetPixRect(): if (IsFullImage()) { - if (tesseract_->CheckAndReportIfImageTooLarge(pix_)) { + if (tesseract_.CheckAndReportIfImageTooLarge(pix_)) { return false; } } else { // validate against the future cropped image size: - if (tesseract_->CheckAndReportIfImageTooLarge(rect_width_, rect_height_)) { + if (tesseract_.CheckAndReportIfImageTooLarge(rect_width_, rect_height_)) { return false; } } diff --git a/src/ccmain/thresholder.h b/src/ccmain/thresholder.h index 4d91ed3a27..8f0f16ce62 100644 --- a/src/ccmain/thresholder.h +++ b/src/ccmain/thresholder.h @@ -85,7 +85,7 @@ class TessBaseAPI; /// desired. class TESS_API ImageThresholder { public: - ImageThresholder(Tesseract *tess); + ImageThresholder(Tesseract &tess); virtual ~ImageThresholder(); /// Destroy the Pix if there is one, freeing memory. @@ -218,7 +218,7 @@ class TESS_API ImageThresholder { const std::vector &hi_values, Image *pix) const; private: - Tesseract* tesseract_; // reference to the active instance + Tesseract& tesseract_; // reference to the active instance protected: /// Clone or other copy of the source Pix. diff --git a/src/ccstruct/debugpixa.cpp b/src/ccstruct/debugpixa.cpp index 5b555f3018..300552a95d 100644 --- a/src/ccstruct/debugpixa.cpp +++ b/src/ccstruct/debugpixa.cpp @@ -720,7 +720,7 @@ namespace tesseract { } #endif - DebugPixa::DebugPixa(Tesseract* tess) + DebugPixa::DebugPixa(Tesseract& tess) : tesseract_(tess) , content_has_been_written_to_file(false) , active_step_index(-1) @@ -1416,23 +1416,23 @@ namespace tesseract { int img_depth = pixGetDepth(pixs); ASSERT0(img_depth == 1 || img_depth == 8 || img_depth == 24 || img_depth == 32); - auto [image_extension, pix_format_id, image_format_id] = get_image_output_datums(img_depth, tesseract_->debug_output_diagnostics_images_format); + auto [image_extension, pix_format_id, image_format_id] = get_image_output_datums(img_depth, tesseract_.debug_output_diagnostics_images_format); std::string fn(partname + SanitizeFilenamePart(fmt::format(".img{:04d}.", counter) + caption) + image_extension); TBOX cliprect = cliprects[idx]; auto clip_area = cliprect.area(); Image bgimg; if (clip_area > 0) { - bgimg = tesseract_->pix_original(); // clones ownership + bgimg = tesseract_.pix_original(); // clones ownership } - write_one_pix_for_html(html, counter, image_format_id, tesseract_->jpg_quality, fn, pixs, bgimg, TruncatedForTitle(caption), caption); + write_one_pix_for_html(html, counter, image_format_id, tesseract_.jpg_quality, fn, pixs, bgimg, TruncatedForTitle(caption), caption); if (clip_area > 0 && false) { counter++; fn = partname + SanitizeFilenamePart(fmt::format(".img{:04d}.", counter) + caption) + image_extension; - write_one_pix_for_html(html, counter, image_format_id, tesseract_->jpg_quality, fn, pixs, bgimg, TruncatedForTitle(caption), caption, &cliprect); + write_one_pix_for_html(html, counter, image_format_id, tesseract_.jpg_quality, fn, pixs, bgimg, TruncatedForTitle(caption), caption, &cliprect); } //pixs.destroy(); @@ -1564,7 +1564,6 @@ namespace tesseract { void DebugPixa::WriteHTML(const char* filename) { - ASSERT0(tesseract_ != nullptr); if (HasContent()) { double time_elapsed_until_report = grand_clock.clock.get_elapsed_ns(); plf::nanotimer report_clock; @@ -1614,13 +1613,13 @@ namespace tesseract { std::string now_str = ss.str(); std::ostringstream languages; - int num_subs = tesseract_->num_sub_langs(); + int num_subs = tesseract_.num_sub_langs(); if (num_subs > 0) { int i; for (i = 0; i < num_subs - 1; ++i) { - languages << tesseract_->get_sub_lang(i)->lang_ << " + "; + languages << tesseract_.get_sub_lang(i)->lang_ << " + "; } - languages << tesseract_->get_sub_lang(i)->lang_; + languages << tesseract_.get_sub_lang(i)->lang_; } // CSS styles for the generated HTML @@ -1659,30 +1658,30 @@ namespace tesseract { Main directory{}\n\ \n\ ", - html_styling(tesseract_->datadir_, "normalize.css").c_str(), - html_styling(tesseract_->datadir_, "modern-normalize.css").c_str(), - html_styling(tesseract_->datadir_, "diag-report.css").c_str(), + html_styling(tesseract_.datadir_, "normalize.css").c_str(), + html_styling(tesseract_.datadir_, "modern-normalize.css").c_str(), + html_styling(tesseract_.datadir_, "diag-report.css").c_str(), TESSERACT_VERSION_STR, now_str.c_str(), - check_unknown_and_encode(tesseract_->input_file_path_).c_str(), - check_unknown_and_encode(tesseract_->imagebasename_).c_str(), - check_unknown_and_encode(tesseract_->imagefile_).c_str(), - tesseract_->lang_.c_str(), + check_unknown_and_encode(tesseract_.input_file_path_).c_str(), + check_unknown_and_encode(tesseract_.imagebasename_).c_str(), + check_unknown_and_encode(tesseract_.imagefile_).c_str(), + tesseract_.lang_.c_str(), languages.str().c_str(), - check_unknown_and_encode(tesseract_->language_data_path_prefix_).c_str(), - check_unknown_and_encode(tesseract_->datadir_).c_str(), - check_unknown_and_encode(tesseract_->directory_).c_str() + check_unknown_and_encode(tesseract_.language_data_path_prefix_).c_str(), + check_unknown_and_encode(tesseract_.datadir_).c_str(), + check_unknown_and_encode(tesseract_.directory_).c_str() ).c_str(), html); plf::nanotimer image_clock; image_clock.start(); { - Image pixs = tesseract_->pix_original(); + Image pixs = tesseract_.pix_original(); int img_depth = pixGetDepth(pixs); - auto [image_extension, pix_format_id, image_format_id] = get_image_output_datums(img_depth, tesseract_->debug_output_diagnostics_images_format); + auto [image_extension, pix_format_id, image_format_id] = get_image_output_datums(img_depth, tesseract_.debug_output_diagnostics_images_format); std::string fn(partname + SanitizeFilenamePart(".img-original.") + image_extension); - write_one_pix_for_html(html, 0, image_format_id, tesseract_->jpg_quality, fn, pixs, Image(), "original image", "The original image as registered with the Tesseract instance."); + write_one_pix_for_html(html, 0, image_format_id, tesseract_.jpg_quality, fn, pixs, Image(), "original image", "The original image as registered with the Tesseract instance."); } source_image_elapsed_ns = image_clock.get_elapsed_ns(); @@ -1859,7 +1858,7 @@ namespace tesseract { step.elapsed_ns = 0; } - tesseract::ParamsVectors *vec = tesseract_->params(); + tesseract::ParamsVectors *vec = tesseract_.params(); // produce a HTML-formatted parameter usage report by using the regular way to get such a report, // then feed it through the NDtext-to-HTML transformer and only then write the final result in one fell swoop to file. @@ -1888,7 +1887,7 @@ namespace tesseract { auto level = section_info.level; if (level == 3 && verbose_process) { - tesseract::ParamsVectors *vec = tesseract_->params(); + tesseract::ParamsVectors *vec = tesseract_.params(); ParamUtils::ReportParamsUsageStatistics(nullptr, vec, level, title); } } @@ -1917,13 +1916,13 @@ namespace tesseract { AutoPopDebugSectionLevel::~AutoPopDebugSectionLevel() { if (section_handle_ >= 0) { - tesseract_->PopPixDebugSection(section_handle_); + tesseract_.PopPixDebugSection(section_handle_); } } void AutoPopDebugSectionLevel::pop() { if (section_handle_ >= 0) { - tesseract_->PopPixDebugSection(section_handle_); + tesseract_.PopPixDebugSection(section_handle_); section_handle_ = INT_MIN; } } diff --git a/src/ccstruct/debugpixa.h b/src/ccstruct/debugpixa.h index dafebb7f91..7530a9ede1 100644 --- a/src/ccstruct/debugpixa.h +++ b/src/ccstruct/debugpixa.h @@ -36,7 +36,7 @@ namespace tesseract { class DebugPixa { public: // TODO(rays) add another constructor with size control. - DebugPixa(Tesseract* tess); + DebugPixa(Tesseract& tess); // If the filename_ has been set and there are any debug images, they are // written to the set filename_. @@ -118,7 +118,7 @@ namespace tesseract { }; private: - Tesseract* tesseract_; // reference to the driving tesseract instance + Tesseract& tesseract_; // reference to the driving tesseract instance private: // The collection of images to put in the PDF. @@ -180,7 +180,7 @@ namespace tesseract { class AutoPopDebugSectionLevel { public: - AutoPopDebugSectionLevel(Tesseract *tess, int section_handle) + AutoPopDebugSectionLevel(Tesseract &tess, int section_handle) : section_handle_(section_handle), tesseract_(tess) {} // auto-pop via end-of-scope i.e. object destructor: @@ -190,7 +190,7 @@ namespace tesseract { void pop(); protected: - Tesseract *tesseract_; + Tesseract &tesseract_; int section_handle_; }; diff --git a/src/tesseract.cpp b/src/tesseract.cpp index 171cb2bae9..5048fbaefa 100644 --- a/src/tesseract.cpp +++ b/src/tesseract.cpp @@ -974,8 +974,8 @@ extern "C" int tesseract_main(int argc, const char **argv) } // TODO: set during init phase and/or when this parameter is edited. - Tesseract *tess = api.tesseract(); - monitor.set_deadline_msecs(tess->activity_timeout_millisec); + Tesseract &tess = api.tesseract(); + monitor.set_deadline_msecs(tess.activity_timeout_millisec); // repeat the `-c var=val` load as debug_all MAY have overwritten some of these user-specified settings in the call above. if (!SetVariablesFromCLArgs(api, argc, argv)) { @@ -1056,15 +1056,15 @@ extern "C" int tesseract_main(int argc, const char **argv) // ambigs.train, box.train, box.train.stderr, linebox, rebox, lstm.train. // In this mode no other OCR result files are written. bool b = false; - ASSERT_HOST(api.tesseract() == tess); + ASSERT_HOST(&api.tesseract() == &tess); bool in_training_mode = - (bool(tess->tessedit_ambigs_training)) || - (bool(tess->tessedit_resegment_from_boxes)) || - (bool(tess->tessedit_make_boxes_from_boxes)) || - (bool(tess->tessedit_train_line_recognizer)); + (bool(tess.tessedit_ambigs_training)) || + (bool(tess.tessedit_resegment_from_boxes)) || + (bool(tess.tessedit_make_boxes_from_boxes)) || + (bool(tess.tessedit_train_line_recognizer)); if (api.GetPageSegMode() == tesseract::PSM_OSD_ONLY) { - if (!tess->AnyTessLang()) { + if (!tess.AnyTessLang()) { fprintf(stderr, "Error, OSD requires a model for the legacy engine\n"); return EXIT_FAILURE; } @@ -1103,18 +1103,18 @@ extern "C" int tesseract_main(int argc, const char **argv) succeed &= !PreloadRenderers(api, renderers, pagesegmode, outputbase); if (succeed && renderers.empty()) { // default: TXT + HOCR renderer - ASSERT_HOST(api.tesseract() == tess); - tess->tessedit_create_hocr.set_value(true); - tess->tessedit_create_alto.set_value(true); - tess->tessedit_create_page_xml.set_value(true); - tess->tessedit_create_tsv.set_value(true); - tess->tessedit_create_pdf.set_value(true); - tess->textonly_pdf.set_value(true); - tess->tessedit_write_unlv.set_value(true); - tess->tessedit_create_lstmbox.set_value(true); - tess->tessedit_create_boxfile.set_value(true); - tess->tessedit_create_wordstrbox.set_value(true); - tess->tessedit_create_txt.set_value(true); + ASSERT_HOST(&api.tesseract() == &tess); + tess.tessedit_create_hocr.set_value(true); + tess.tessedit_create_alto.set_value(true); + tess.tessedit_create_page_xml.set_value(true); + tess.tessedit_create_tsv.set_value(true); + tess.tessedit_create_pdf.set_value(true); + tess.textonly_pdf.set_value(true); + tess.tessedit_write_unlv.set_value(true); + tess.tessedit_create_lstmbox.set_value(true); + tess.tessedit_create_boxfile.set_value(true); + tess.tessedit_create_wordstrbox.set_value(true); + tess.tessedit_create_txt.set_value(true); succeed &= !PreloadRenderers(api, renderers, pagesegmode, outputbase); } diff --git a/src/training/ambiguous_words.cpp b/src/training/ambiguous_words.cpp index 3867e64535..f996dbeb97 100644 --- a/src/training/ambiguous_words.cpp +++ b/src/training/ambiguous_words.cpp @@ -75,7 +75,7 @@ extern "C" TESS_API int tesseract_ambiguous_words_main(int argc, const char** ar vars_values.emplace_back(output_file_str); api.InitFull(tessdata_dir, lang.c_str(), tesseract::OEM_TESSERACT_ONLY, nullptr, 0, &vars_vec, &vars_values, false); - tesseract::Dict &dict = api.tesseract()->getDict(); + tesseract::Dict &dict = api.tesseract().getDict(); FILE *input_file = fopen(input_file_str, "rb"); if (input_file == nullptr) { tesseract::tprintError("Failed to open input wordlist file {}\n", input_file_str); diff --git a/src/training/classifier_tester.cpp b/src/training/classifier_tester.cpp index 3d4a5e85c7..3a65d38994 100644 --- a/src/training/classifier_tester.cpp +++ b/src/training/classifier_tester.cpp @@ -74,7 +74,7 @@ static tesseract::ShapeClassifier *InitializeClassifier(const char *classifer_na tprintError("Tesseract initialization failed!\n"); return nullptr; } - tesseract = tess->tesseract(); + tesseract = &tess->tesseract(); classify = static_cast(tesseract); if (classify->shape_table() == nullptr) { tprintError("Tesseract must contain a ShapeTable!\n"); From 135fd2ecd6d492d05f65f2968f17ea2106ac2bf4 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Fri, 26 Jul 2024 14:22:37 +0200 Subject: [PATCH 63/66] Squashed commit of the following: commit 3604bf391265a14ec2eabc85a909bc55a6f469e4 Author: Ger Hobbelt Date: Fri Jul 26 14:19:40 2024 +0200 part 5/N of the refactoring of API::tesseract() usage: here we transition from using `Tesseract *` type to `Tesseract &` C++ reference type where applicable -- note that the tesseract iterator classes cannot use a `Tesseract&` reference but must track a `Tesseract *` pointer instead as these iterators can jump across several Tesseract instances where iterating or when being re-started; see the code comments which were added in an earlier commit. This commit picks up what's left of commit SHA-1: bee51e471879275b04f0ecb63a7596bb359bb169, which is the complete refactor, but which includes refactoring errors and is not part of this branch. --- Entire refactor commit message: bit of a brutalist `const_cast(...)` hack at a few spots (which will be refactored at a later date anyway): moving towards dev/master branch codebase: transitioned to using `Tesseract &` reference instead of `Tesseract *` pointer reference base in internal classes and iterators: their lifetimes are always supposed to be shorter than the Tesseract+TessBaseAPI class instance lifetimes, the `&` reference type communicates better that these expect these lifetime behaviours and we were already busy replacing the nasty "delete Tesseract instance + allocate new Tesseract instance" reset-to-defaults behaviour in TessBaseAPI's Init methods as we want to arrive at a Tesseract which can be fully controlled through *parameters*, i.e simplest, no-config-at-all call interfaces while same configuration power for both tesseract CLI and Tesseract C/C++ usage in larger applications (mupdf et al), where transporting complex configurations through deep call chains is often no sinecure. commit 0e948e0e414a7c796225b3a6f999fee1e3dee4ac Author: Ger Hobbelt Date: Fri Jul 26 14:07:09 2024 +0200 remove superfluous use of tesseract namespace identifier commit 40803505eeadcb66b0b2d67a9aedce9fb35d6130 Author: Ger Hobbelt Date: Fri Jul 26 13:38:39 2024 +0200 Undoing faulty code, which we committed only as a reminder for posterity -- Revert "WARNING: this is the culprit of our woes re Tesseract reference / refactoring: this was previously, because the tesseract_ member pointer was never NULL, effectively DEAD CODE. *I* made a mistake during the refactoring by not being careful enough and removing the null-check around it, which made this suddenly ACTIVE/LIVE code (we redo that mistake now through if 01..endif) and it badly breaks the OCR engine, resulting in insane ratings & costs turning up much later in the process." This reverts commit 64d4e17e3797261d592e9214b28308bcc842f1bc. commit 6eabc6b08fedb4d15fe6ac70fe8860978e02acaf Author: Ger Hobbelt Date: Fri Jul 26 13:37:40 2024 +0200 WARNING: this is the culprit of our woes re Tesseract reference / refactoring: this was previously, because the tesseract_ member pointer was never NULL, effectively DEAD CODE. *I* made a mistake during the refactoring by not being careful enough and removing the null-check around it, which made this suddenly ACTIVE/LIVE code (we redo that mistake now through if 01..endif) and it badly breaks the OCR engine, resulting in insane ratings & costs turning up much later in the process. commit 62bf9ec8ed47c0da632e4bc5b41a82f3e340776d Author: Ger Hobbelt Date: Fri Jul 26 13:28:54 2024 +0200 part 4/N of the refactoring of API::tesseract() usage: calling this one now guarantees a live Tesseract object exists, i.e. `api.tesseract_ != nullptr` and all API methods, when previously using `api.tesseract_`, will now use that one through `api.tesseract()`. Later on this will be further migrated to using a more appropriate `Tesseract &` C++ reference type, but we ran into issues during the initial refactor and these commits are the stages of the original refactor action with the errors removed. --- Entire refactor commit message: bit of a brutalist `const_cast(...)` hack at a few spots (which will be refactored at a later date anyway): moving towards dev/master branch codebase: transitioned to using `Tesseract &` reference instead of `Tesseract *` pointer reference base in internal classes and iterators: their lifetimes are always supposed to be shorter than the Tesseract+TessBaseAPI class instance lifetimes, the `&` reference type communicates better that these expect these lifetime behaviours and we were already busy replacing the nasty "delete Tesseract instance + allocate new Tesseract instance" reset-to-defaults behaviour in TessBaseAPI's Init methods as we want to arrive at a Tesseract which can be fully controlled through *parameters*, i.e simplest, no-config-at-all call interfaces while same configuration power for both tesseract CLI and Tesseract C/C++ usage in larger applications (mupdf et al), where transporting complex configurations through deep call chains is often no sinecure. commit 4aba140daf62903eebd46e750c59c62ff76bf020 Author: Ger Hobbelt Date: Fri Jul 26 13:26:58 2024 +0200 part 3/N of the refactoring of API::tesseract() usage: calling this one now guarantees a live Tesseract object exists, i.e. `api.tesseract_ != nullptr` and all API methods, when previously using `api.tesseract_`, will now use that one through `api.tesseract()`. Later on this will be further migrated to using a more appropriate `Tesseract &` C++ reference type, but we ran into issues during the initial refactor and these commits are the stages of the original refactor action with the errors removed. --- Entire refactor commit message: bit of a brutalist `const_cast(...)` hack at a few spots (which will be refactored at a later date anyway): moving towards dev/master branch codebase: transitioned to using `Tesseract &` reference instead of `Tesseract *` pointer reference base in internal classes and iterators: their lifetimes are always supposed to be shorter than the Tesseract+TessBaseAPI class instance lifetimes, the `&` reference type communicates better that these expect these lifetime behaviours and we were already busy replacing the nasty "delete Tesseract instance + allocate new Tesseract instance" reset-to-defaults behaviour in TessBaseAPI's Init methods as we want to arrive at a Tesseract which can be fully controlled through *parameters*, i.e simplest, no-config-at-all call interfaces while same configuration power for both tesseract CLI and Tesseract C/C++ usage in larger applications (mupdf et al), where transporting complex configurations through deep call chains is often no sinecure. commit 43fbf2f032c3dc5e7983ec58409f166f7b0402a3 Author: Ger Hobbelt Date: Fri Jul 26 13:25:08 2024 +0200 part 2/N of the refactoring of API::tesseract() usage: calling this one now guarantees a live Tesseract object exists, i.e. `api.tesseract_ != nullptr` and all API methods, when previously using `api.tesseract_`, will now use that one through `api.tesseract()`. Later on this will be further migrated to using a more appropriate `Tesseract &` C++ reference type, but we ran into issues during the initial refactor and these commits are the stages of the original refactor action with the errors removed. --- Entire refactor commit message: bit of a brutalist `const_cast(...)` hack at a few spots (which will be refactored at a later date anyway): moving towards dev/master branch codebase: transitioned to using `Tesseract &` reference instead of `Tesseract *` pointer reference base in internal classes and iterators: their lifetimes are always supposed to be shorter than the Tesseract+TessBaseAPI class instance lifetimes, the `&` reference type communicates better that these expect these lifetime behaviours and we were already busy replacing the nasty "delete Tesseract instance + allocate new Tesseract instance" reset-to-defaults behaviour in TessBaseAPI's Init methods as we want to arrive at a Tesseract which can be fully controlled through *parameters*, i.e simplest, no-config-at-all call interfaces while same configuration power for both tesseract CLI and Tesseract C/C++ usage in larger applications (mupdf et al), where transporting complex configurations through deep call chains is often no sinecure. commit 3bd99e17c55fa89e6be8fd1297c29fe4b4b7c69e Author: Ger Hobbelt Date: Fri Jul 26 12:58:40 2024 +0200 part 1/N of the refactoring of API::tesseract() usage: calling this one now guarantees a live Tesseract object exists, i.e. `api.tesseract_ != nullptr` and all API methods, when previously using `api.tesseract_`, will now use that one through `api.tesseract()`. Later on this will be further migrated to using a more appropriate `Tesseract &` C++ reference type, but we ran into issues during the initial refactor and these commits are the stages of the original refactor action with the errors removed. --- Entire refactor commit message: bit of a brutalist `const_cast(...)` hack at a few spots (which will be refactored at a later date anyway): moving towards dev/master branch codebase: transitioned to using `Tesseract &` reference instead of `Tesseract *` pointer reference base in internal classes and iterators: their lifetimes are always supposed to be shorter than the Tesseract+TessBaseAPI class instance lifetimes, the `&` reference type communicates better that these expect these lifetime behaviours and we were already busy replacing the nasty "delete Tesseract instance + allocate new Tesseract instance" reset-to-defaults behaviour in TessBaseAPI's Init methods as we want to arrive at a Tesseract which can be fully controlled through *parameters*, i.e simplest, no-config-at-all call interfaces while same configuration power for both tesseract CLI and Tesseract C/C++ usage in larger applications (mupdf et al), where transporting complex configurations through deep call chains is often no sinecure. commit 482a5b1b60219a7e130d0bace2571bef07f95d72 Author: Ger Hobbelt Date: Fri Jul 26 12:28:55 2024 +0200 guaranteed init all members of the class: assign the default/start values in the class declaration code chunk. # Conflicts: # include/tesseract/baseapi.h --- include/tesseract/baseapi.h | 54 ++- src/api/baseapi.cpp | 687 +++++++++++++++++------------ src/api/pdfrenderer.cpp | 2 +- src/ccmain/tesseractclass.cpp | 2 +- src/ccmain/thresholder.cpp | 23 +- src/ccmain/thresholder.h | 4 +- src/ccstruct/debugpixa.cpp | 51 ++- src/ccstruct/debugpixa.h | 8 +- src/ccstruct/pageres.h | 12 +- src/tesseract.cpp | 37 +- src/training/ambiguous_words.cpp | 2 +- src/training/classifier_tester.cpp | 2 +- 12 files changed, 513 insertions(+), 371 deletions(-) diff --git a/include/tesseract/baseapi.h b/include/tesseract/baseapi.h index 5248890617..738b9a8ef1 100644 --- a/include/tesseract/baseapi.h +++ b/include/tesseract/baseapi.h @@ -917,9 +917,24 @@ class TESS_API TessBaseAPI { /** Return the number of dawgs loaded into tesseract_ object. */ int NumDawgs() const; - Tesseract *tesseract() const { - return tesseract_; - } + /// Returns a reference to the internal instance of the Tesseract class; + /// the presence of which is guaranteed, i.e. the returned pointer + /// WILL NOT be `nullptr`. + /// + /// Note that the reference's lifetime ends once the TessBaseAPI's instance + /// is deleted or its End() API is invoked, whichever comes first. + /// + /// \sa End() + /// \sa WipeSqueakyCleanForReUse() + /// + /// @{ + const Tesseract &tesseract() const; + Tesseract &tesseract(); + // https://stackoverflow.com/questions/856542/elegant-solution-to-duplicate-const-and-non-const-getters + //inline Tesseract &tesseract() { + // return const_cast(this->tesseract()); + //} + /// @} OcrEngineMode oem() const { return last_oem_requested_; @@ -975,40 +990,41 @@ class TESS_API TessBaseAPI { } protected: - Tesseract *tesseract_; ///< The underlying data object. + mutable Tesseract *tesseract_ = nullptr; ///< The underlying data object. #if !DISABLED_LEGACY_ENGINE - Tesseract *osd_tesseract_; ///< For orientation & script detection. - EquationDetect *equ_detect_; ///< The equation detector. + Tesseract *osd_tesseract_ = nullptr; ///< For orientation & script detection. + EquationDetect *equ_detect_ = nullptr; ///< The equation detector. #endif ETEXT_DESC *monitor_ = nullptr; ETEXT_DESC default_minimal_monitor_; FileReader reader_; ///< Reads files from any filesystem. - ImageThresholder *thresholder_; ///< Image thresholding module. - std::vector *paragraph_models_; - BLOCK_LIST *block_list_; ///< The page layout. - PAGE_RES *page_res_; ///< The page-level data. + ImageThresholder *thresholder_ = nullptr; ///< Image thresholding module. + std::vector *paragraph_models_ = nullptr; + BLOCK_LIST *block_list_ = nullptr; ///< The page layout. + PAGE_RES *page_res_ = nullptr; ///< The page-level data. std::string visible_image_file_; Image pix_visible_image_; ///< Image used in output PDF std::string output_file_; ///< Name used by debug code. std::string datapath_; ///< Current location of tessdata. std::string language_; ///< Last initialized language. - OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested. - bool recognition_done_; ///< page_res_ contains recognition data. + OcrEngineMode last_oem_requested_ = OEM_DEFAULT; ///< Last ocr language mode requested. + bool recognition_done_ = false; ///< page_res_ contains recognition data. /** * @defgroup ThresholderParams Thresholder Parameters * Parameters saved from the Thresholder. Needed to rebuild coordinates. */ /* @{ */ - int rect_left_; - int rect_top_; - int rect_width_; - int rect_height_; - int image_width_; - int image_height_; + int rect_left_ = 0; + int rect_top_ = 0; + int rect_width_ = 0; + int rect_height_ = 0; + + int image_width_ = 0; + int image_height_ = 0; /* @} */ -private: +protected: // A list of image filenames gets special consideration // // If global parameter `tessedit_page_number` is non-negative, will only process that diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 7231a3cf78..ce65b30b75 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -276,10 +276,8 @@ const char *TessBaseAPI::Version() { * loading a UNLV zone file. */ void TessBaseAPI::SetInputName(const char *name) { - if (tesseract_ == nullptr) { - tesseract_ = new Tesseract(*this, nullptr); - } - tesseract_->input_file_path_ = name ? name : ""; + Tesseract &tess = tesseract(); + tess.input_file_path_ = name ? name : ""; } /** Set the name of the visible image files. Needed only for PDF output. */ @@ -332,7 +330,8 @@ ImageCostEstimate TessBaseAPI::EstimateImageMemoryCost(const Pix* pix, float all * and reports the cost estimate for the current instance/image. */ ImageCostEstimate TessBaseAPI::EstimateImageMemoryCost() const { - return tesseract_->EstimateImageMemoryCost(); + const Tesseract &tess = tesseract(); + return tess.EstimateImageMemoryCost(); } /** @@ -344,7 +343,8 @@ ImageCostEstimate TessBaseAPI::EstimateImageMemoryCost() const { * this same check as part of their startup routine. */ bool TessBaseAPI::CheckAndReportIfImageTooLarge(const Pix* pix) const { - return tesseract_->CheckAndReportIfImageTooLarge(pix); + const Tesseract &tess = tesseract(); + return tess.CheckAndReportIfImageTooLarge(pix); } /** Set the name of the output files. Needed only for debugging. */ @@ -357,29 +357,24 @@ const std::string &TessBaseAPI::GetOutputName() { } bool TessBaseAPI::SetVariable(const char *name, const char *value) { - if (tesseract_ == nullptr) { - tesseract_ = new Tesseract(*this, nullptr); - } - return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_NON_INIT_ONLY, tesseract_->params()); + Tesseract &tess = tesseract(); + return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_NON_INIT_ONLY, tess.params()); } bool TessBaseAPI::SetVariable(const char *name, int value) { - if (tesseract_ == nullptr) { - tesseract_ = new Tesseract(*this, nullptr); - } + Tesseract &tess = tesseract(); std::string v = fmt::format("{}", value); - return ParamUtils::SetParam(name, v.c_str(), SET_PARAM_CONSTRAINT_NON_INIT_ONLY, tesseract_->params()); + return ParamUtils::SetParam(name, v.c_str(), SET_PARAM_CONSTRAINT_NON_INIT_ONLY, tess.params()); } bool TessBaseAPI::SetDebugVariable(const char *name, const char *value) { - if (tesseract_ == nullptr) { - tesseract_ = new Tesseract(*this, nullptr); - } - return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_DEBUG_ONLY, tesseract_->params()); + Tesseract &tess = tesseract(); + return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_DEBUG_ONLY, tess.params()); } bool TessBaseAPI::GetIntVariable(const char *name, int *value) const { + Tesseract &tess = const_cast(tesseract()); auto *p = ParamUtils::FindParam(name, GlobalParams()->int_params(), - tesseract_->params()->int_params()); + tess.params()->int_params()); if (p == nullptr) { return false; } @@ -388,8 +383,9 @@ bool TessBaseAPI::GetIntVariable(const char *name, int *value) const { } bool TessBaseAPI::GetBoolVariable(const char *name, bool *value) const { + Tesseract &tess = const_cast(tesseract()); auto *p = ParamUtils::FindParam(name, GlobalParams()->bool_params(), - tesseract_->params()->bool_params()); + tess.params()->bool_params()); if (p == nullptr) { return false; } @@ -398,8 +394,9 @@ bool TessBaseAPI::GetBoolVariable(const char *name, bool *value) const { } const char *TessBaseAPI::GetStringVariable(const char *name) const { + Tesseract &tess = const_cast(tesseract()); auto *p = ParamUtils::FindParam(name, GlobalParams()->string_params(), - tesseract_->params()->string_params()); + tess.params()->string_params()); if (p == nullptr) { return nullptr; } @@ -407,8 +404,9 @@ const char *TessBaseAPI::GetStringVariable(const char *name) const { } bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const { + Tesseract &tess = const_cast(tesseract()); auto *p = ParamUtils::FindParam(name, GlobalParams()->double_params(), - tesseract_->params()->double_params()); + tess.params()->double_params()); if (p == nullptr) { return false; } @@ -418,7 +416,8 @@ bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const { /** Get value of named variable as a string, if it exists. */ bool TessBaseAPI::GetVariableAsString(const char *name, std::string *val) const { - return ParamUtils::GetParamAsString(name, tesseract_->params(), val); + Tesseract &tess = const_cast(tesseract()); + return ParamUtils::GetParamAsString(name, tess.params(), val); } #if !DISABLED_LEGACY_ENGINE @@ -428,9 +427,10 @@ void TessBaseAPI::PrintFontsTable(FILE *fp) const { if (!fp) fp = stdout; bool print_info = (fp == stdout || fp == stderr); - const int fontinfo_size = tesseract_->get_fontinfo_table().size(); + const Tesseract &tess = tesseract(); + const int fontinfo_size = tess.get_fontinfo_table().size(); for (int font_index = 1; font_index < fontinfo_size; ++font_index) { - FontInfo font = tesseract_->get_fontinfo_table().at(font_index); + FontInfo font = tess.get_fontinfo_table().at(font_index); if (print_info) { tprintInfo( "ID={}: {} is_italic={} is_bold={} is_fixed_pitch={} is_serif={} is_fraktur={}\n", @@ -462,7 +462,8 @@ void TessBaseAPI::PrintFontsTable(FILE *fp) const { * (use DumpVariables instead to create config files). */ void TessBaseAPI::PrintVariables(FILE *fp) const { - ParamUtils::PrintParams(fp, tesseract_->params(), true); + Tesseract &tess = const_cast(tesseract()); + ParamUtils::PrintParams(fp, tess.params(), true); } /** @@ -470,7 +471,8 @@ void TessBaseAPI::PrintVariables(FILE *fp) const { * Can be used as Tesseract configuration file. */ void TessBaseAPI::DumpVariables(FILE *fp) const { - ParamUtils::PrintParams(fp, tesseract_->params(), false); + Tesseract &tess = const_cast(tesseract()); + ParamUtils::PrintParams(fp, tess.params(), false); } // Report parameters' usage statistics, i.e. report which params have been @@ -481,10 +483,11 @@ void TessBaseAPI::DumpVariables(FILE *fp) const { // answering the question: // "Which of all those parameters are actually *relevant* to my use case today?" void TessBaseAPI::ReportParamsUsageStatistics() const { - tesseract::ParamsVectors *vec = tesseract_->params(); + Tesseract &tess = const_cast(tesseract()); + const tesseract::ParamsVectors *vec = tess.params(); std::string fpath = tesseract::vars_report_file; FILE *f = ParamUtils::OpenReportFile(fpath.c_str()); - int section_level = tesseract_->GetPixDebugSectionLevel(); + int section_level = tess.GetPixDebugSectionLevel(); ParamUtils::ReportParamsUsageStatistics(f, vec, section_level, nullptr); if (f) { if (f != stdout && f != stderr) { @@ -545,25 +548,19 @@ int TessBaseAPI::InitFullWithReader(const char *data, int data_size, const char data = ""; } std::string datapath = data_size == 0 ? data : language; + ASSERT_HOST(tesseract_ != nullptr); + Tesseract &tess = tesseract(); + // If the datapath, OcrEngineMode or the language have changed - start again. // Note that the language_ field stores the last requested language that was - // initialized successfully, while tesseract_->lang stores the language + // initialized successfully, while tess.lang stores the language // actually used. They differ only if the requested language was nullptr, in - // which case tesseract_->lang is set to the Tesseract default ("eng"). - if (tesseract_ != nullptr && - tesseract_->RequiresWipeBeforeIndependentReUse() && + // which case tess.lang is set to the Tesseract default ("eng"). + if (tess.RequiresWipeBeforeIndependentReUse() && (datapath_.empty() || language_.empty() || datapath_ != datapath || - last_oem_requested_ != oem || (language_ != language && tesseract_->lang_ != language))) { -#if 0 - delete tesseract_; - tesseract_ = nullptr; -#else + last_oem_requested_ != oem || (language_ != language && tess.lang_ != language))) { // try not to throw away tesseract instances. Clean them out rigorously, instead. - tesseract_->WipeSqueakyCleanForReUse(); -#endif - } - if (tesseract_ == nullptr) { - tesseract_ = new Tesseract(*this, nullptr); + tess.WipeSqueakyCleanForReUse(); } if (reader != nullptr) { reader_ = reader; @@ -575,7 +572,7 @@ int TessBaseAPI::InitFullWithReader(const char *data, int data_size, const char (void)Monitor().set_progress(0.0).exec_progress_func(); - if (tesseract_->init_tesseract(datapath, output_file_, language, oem, configs, + if (tess.init_tesseract(datapath, output_file_, language, oem, configs, configs_size, vars_vec, vars_values, set_only_non_debug_params, &mgr) != 0) { return -1; @@ -583,8 +580,8 @@ int TessBaseAPI::InitFullWithReader(const char *data, int data_size, const char // Update datapath and language requested for the last valid initialization. datapath_ = std::move(datapath); - if (datapath_.empty() && !tesseract_->datadir_.empty()) { - datapath_ = tesseract_->datadir_; + if (datapath_.empty() && !tess.datadir_.empty()) { + datapath_ = tess.datadir_; } language_ = language; @@ -597,7 +594,7 @@ int TessBaseAPI::InitFullWithReader(const char *data, int data_size, const char // can come through here after a previous failed/aborted/successful // initialization and we still would need to set up the Tesseract // instance to a definitely known state here anyway. - tesseract_->ResetAdaptiveClassifier(); + tess.ResetAdaptiveClassifier(); #endif // !DISABLED_LEGACY_ENGINE if (Monitor().kick_watchdog_and_check_for_cancel()) { @@ -625,10 +622,8 @@ const ETEXT_DESC &TessBaseAPI::Monitor() const { void TessBaseAPI::DebugAddCommandline(const std::vector& argv) { - if (tesseract_ == nullptr) { - tesseract_ = new Tesseract(*this, nullptr); - } - tesseract_->DebugAddCommandline(argv); + Tesseract &tess = tesseract(); + tess.DebugAddCommandline(argv); } @@ -651,12 +646,12 @@ const char *TessBaseAPI::GetInitLanguagesAsString() const { */ void TessBaseAPI::GetLoadedLanguagesAsVector(std::vector *langs) const { langs->clear(); - if (tesseract_ != nullptr) { - langs->push_back(tesseract_->lang_); - int num_subs = tesseract_->num_sub_langs(); - for (int i = 0; i < num_subs; ++i) { - langs->push_back(tesseract_->get_sub_lang(i)->lang_); - } + ASSERT_HOST(tesseract_ != nullptr); + const Tesseract &tess = tesseract(); + langs->push_back(tess.lang_); + int num_subs = tess.num_sub_langs(); + for (int i = 0; i < num_subs; ++i) { + langs->push_back(tess.get_sub_lang(i)->lang_); } } @@ -665,10 +660,10 @@ void TessBaseAPI::GetLoadedLanguagesAsVector(std::vector *langs) co */ void TessBaseAPI::GetAvailableLanguagesAsVector(std::vector *langs) const { langs->clear(); - if (tesseract_ != nullptr) { - addAvailableLanguages(tesseract_->datadir_, "", langs); - std::sort(langs->begin(), langs->end()); - } + ASSERT_HOST(tesseract_ != nullptr); + const Tesseract &tess = tesseract(); + addAvailableLanguages(tess.datadir_, "", langs); + std::sort(langs->begin(), langs->end()); } /** @@ -676,12 +671,11 @@ void TessBaseAPI::GetAvailableLanguagesAsVector(std::vector *langs) * AnalysePage. Calls that attempt recognition will generate an error. */ void TessBaseAPI::InitForAnalysePage() { - if (tesseract_ == nullptr) { - tesseract_ = new Tesseract(*this, nullptr); + ASSERT_HOST(tesseract_ != nullptr); + Tesseract &tess = tesseract(); #if !DISABLED_LEGACY_ENGINE - tesseract_->InitAdaptiveClassifier(nullptr); + tess.InitAdaptiveClassifier(nullptr); #endif - } } /** @@ -690,7 +684,8 @@ void TessBaseAPI::InitForAnalysePage() { * and also accepts a relative or absolute path name. */ void TessBaseAPI::ReadConfigFile(const char *filename) { - tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_NON_INIT_ONLY); + Tesseract &tess = tesseract(); + tess.read_config_file(filename, SET_PARAM_CONSTRAINT_NON_INIT_ONLY); } /** @@ -699,10 +694,8 @@ void TessBaseAPI::ReadConfigFile(const char *filename) { * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string). */ void TessBaseAPI::SetPageSegMode(PageSegMode mode) { - if (tesseract_ == nullptr) { - tesseract_ = new Tesseract(*this, nullptr); - } - tesseract_->tessedit_pageseg_mode.set_value(mode); + Tesseract &tess = tesseract(); + tess.tessedit_pageseg_mode.set_value(mode); } /** Return the current page segmentation mode. */ @@ -710,7 +703,8 @@ PageSegMode TessBaseAPI::GetPageSegMode() const { if (tesseract_ == nullptr) { return PSM_SINGLE_BLOCK; } - return static_cast(tesseract_->tessedit_pageseg_mode.value()); + const Tesseract &tess = tesseract(); + return static_cast(tess.tessedit_pageseg_mode.value()); } /** @@ -728,7 +722,10 @@ PageSegMode TessBaseAPI::GetPageSegMode() const { */ char *TessBaseAPI::TesseractRect(const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height) { - if (tesseract_ == nullptr || width < kMinRectSize || height < kMinRectSize) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + if (width < kMinRectSize || height < kMinRectSize) { return nullptr; // Nothing worth doing. } @@ -748,11 +745,13 @@ char *TessBaseAPI::TesseractRect(const unsigned char *imagedata, int bytes_per_p * adaptive data. */ void TessBaseAPI::ClearAdaptiveClassifier() { + ASSERT_HOST(tesseract_ != nullptr); if (tesseract_ == nullptr) { return; } - tesseract_->ResetAdaptiveClassifier(); - tesseract_->ResetDocumentDictionary(); + Tesseract& tess = tesseract(); + tess.ResetAdaptiveClassifier(); + tess.ResetDocumentDictionary(); } #endif // !DISABLED_LEGACY_ENGINE @@ -765,6 +764,9 @@ void TessBaseAPI::ClearAdaptiveClassifier() { */ void TessBaseAPI::SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line, float angle) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); if (InternalResetImage()) { thresholder_->SetImage(imagedata, width, height, bytes_per_pixel, bytes_per_line, angle); SetInputImage(thresholder_->GetPixRect()); @@ -772,11 +774,13 @@ void TessBaseAPI::SetImage(const unsigned char *imagedata, int width, int height } void TessBaseAPI::SetSourceResolution(int ppi) { - if (thresholder_) { - thresholder_->SetSourceYResolution(ppi); - } else { - tprintError("Please call SetImage before SetSourceResolution.\n"); - } + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init and/or SetImage before attempting this method.\n", + __func__); + ASSERT_HOST_MSG(thresholder_ != nullptr, + "{} was invoked without a live tesseract thresholder instance: please call SetImage before attempting this method.\n", + __func__); + thresholder_->SetSourceYResolution(ppi); } /** @@ -788,6 +792,9 @@ void TessBaseAPI::SetSourceResolution(int ppi) { * and it is therefore more efficient to provide a Pix directly. */ void TessBaseAPI::SetImage(Pix *pix, float angle) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); if (InternalResetImage()) { // as Image will take ownership and `pix` is not owned by us, we must clone it: Image img(false, pix); @@ -814,9 +821,13 @@ void TessBaseAPI::SetImage(Pix *pix, float angle) { * can be recognized with the same image. */ void TessBaseAPI::SetRectangle(int left, int top, int width, int height) { - if (thresholder_ == nullptr) { - return; - } + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init and/or SetImage before attempting this method.\n", + __func__); + ASSERT_HOST_MSG(thresholder_ != nullptr, + "{} was invoked without a live tesseract thresholder instance: please call SetImage before attempting this method.\n", + __func__); + // TODO: this ClearResults prematurely nukes the page image and pushes for the diagnostics log to be written to output file, // while this SetRectangle() very well may be meant to OCR a *second* rectangle in the existing page image, which will fail // today as the page image will be lost, thanks to ClearResults. @@ -832,30 +843,35 @@ void TessBaseAPI::SetRectangle(int left, int top, int width, int height) { * Get a copy of the internal thresholded image from Tesseract. */ Pix *TessBaseAPI::GetThresholdedImage() { - if (tesseract_ == nullptr || thresholder_ == nullptr) { - return nullptr; - } - if (tesseract_->pix_binary() == nullptr) { - if (verbose_process) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init and/or SetImage before attempting this method.\n", + __func__); + ASSERT_HOST_MSG(thresholder_ != nullptr, + "{} was invoked without a live tesseract thresholder instance: please call SetImage before attempting this method.\n", + __func__); + + Tesseract& tess = tesseract(); + if (tess.pix_binary() == nullptr) { + if (verbose_process) { tprintInfo("PROCESS: the source image is not a binary image, hence we apply a thresholding algo/subprocess to obtain a binarized image.\n"); - } + } Image pix; if (!Threshold(pix.obtains())) { return nullptr; } - tesseract_->set_pix_binary(pix); // candidate for move semantics + tess.set_pix_binary(pix); // candidate for move semantics - if (tesseract_->tessedit_dump_pageseg_images) { - tesseract_->AddPixDebugPage(tesseract_->pix_binary(), "Thresholded Image result (because it wasn't thresholded yet)"); + if (tess.tessedit_dump_pageseg_images) { + tess.AddPixDebugPage(tess.pix_binary(), "Thresholded Image result (because it wasn't thresholded yet)"); } } - // Image p1 = pixRotate(tesseract_->pix_binary(), 0.15, L_ROTATE_SHEAR, L_BRING_IN_WHITE, 0, 0); + // Image p1 = pixRotate(tess.pix_binary(), 0.15, L_ROTATE_SHEAR, L_BRING_IN_WHITE, 0, 0); // because we want to keep the public API as-is for now, instead of migrating it to using Image type directly, // we downgrade to `PIX *` at the exit point, hence the reponsibility to CLONE is ours: - return tesseract_->pix_binary().clone2pix(); + return tess.pix_binary().clone2pix(); } /** @@ -1105,7 +1121,8 @@ int TessBaseAPI::GetThresholdedImageScaleFactor() const { */ PageIterator *TessBaseAPI::AnalyseLayout(bool merge_similar_words) { if (FindLines() == 0) { - AutoPopDebugSectionLevel section_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection("Analyse Layout")); + Tesseract& tess = tesseract(); + AutoPopDebugSectionLevel section_handle(tess, tess.PushSubordinatePixDebugSection("Analyse Layout")); if (block_list_->empty()) { return nullptr; // The page was empty. @@ -1124,65 +1141,68 @@ PageIterator *TessBaseAPI::AnalyseLayout(bool merge_similar_words) { * internal structures. */ int TessBaseAPI::Recognize() { - if (tesseract_ == nullptr) { - return -1; - } + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init and/or SetImage before attempting this method.\n", + __func__); + + Tesseract& tess = tesseract(); + if (FindLines() != 0) { return -1; } - AutoPopDebugSectionLevel section_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection("Recognize (OCR)")); + AutoPopDebugSectionLevel section_handle(tess, tess.PushSubordinatePixDebugSection("Recognize (OCR)")); delete page_res_; if (block_list_->empty()) { - page_res_ = new PAGE_RES(false, block_list_, &tesseract_->prev_word_best_choice_); + page_res_ = new PAGE_RES(false, block_list_, &tess.prev_word_best_choice_); return 0; // Empty page. } - tesseract_->SetBlackAndWhitelist(); + tess.SetBlackAndWhitelist(); recognition_done_ = true; #if !DISABLED_LEGACY_ENGINE - if (tesseract_->tessedit_resegment_from_line_boxes) { + if (tess.tessedit_resegment_from_line_boxes) { if (verbose_process) tprintInfo("PROCESS: Re-segment from line boxes.\n"); - page_res_ = tesseract_->ApplyBoxes(tesseract_->input_file_path_.c_str(), true, block_list_); - } else if (tesseract_->tessedit_resegment_from_boxes) { + page_res_ = tess.ApplyBoxes(tess.input_file_path_.c_str(), true, block_list_); + } else if (tess.tessedit_resegment_from_boxes) { if (verbose_process) tprintInfo("PROCESS: Re-segment from page boxes.\n"); - page_res_ = tesseract_->ApplyBoxes(tesseract_->input_file_path_.c_str(), false, block_list_); + page_res_ = tess.ApplyBoxes(tess.input_file_path_.c_str(), false, block_list_); } else #endif // !DISABLED_LEGACY_ENGINE { if (verbose_process) tprintInfo("PROCESS: Re-segment from LSTM / previous word best choice.\n"); - page_res_ = new PAGE_RES(tesseract_->AnyLSTMLang(), block_list_, &tesseract_->prev_word_best_choice_); + page_res_ = new PAGE_RES(tess.AnyLSTMLang(), block_list_, &tess.prev_word_best_choice_); } if (page_res_ == nullptr) { return -1; } - if (tesseract_->tessedit_train_line_recognizer) { - AutoPopDebugSectionLevel subsection_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection("Train Line Recognizer: Correct Classify Words")); - if (!tesseract_->TrainLineRecognizer(tesseract_->input_file_path_.c_str(), output_file_, block_list_)) { + if (tess.tessedit_train_line_recognizer) { + AutoPopDebugSectionLevel subsection_handle(tess, tess.PushSubordinatePixDebugSection("Train Line Recognizer: Correct Classify Words")); + if (!tess.TrainLineRecognizer(tess.input_file_path_.c_str(), output_file_, block_list_)) { return -1; } - tesseract_->CorrectClassifyWords(page_res_); + tess.CorrectClassifyWords(page_res_); return 0; } #if !DISABLED_LEGACY_ENGINE - if (tesseract_->tessedit_make_boxes_from_boxes) { - AutoPopDebugSectionLevel subsection_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection("Make Boxes From Boxes: Correct Classify Words")); - tesseract_->CorrectClassifyWords(page_res_); + if (tess.tessedit_make_boxes_from_boxes) { + AutoPopDebugSectionLevel subsection_handle(tess, tess.PushSubordinatePixDebugSection("Make Boxes From Boxes: Correct Classify Words")); + tess.CorrectClassifyWords(page_res_); return 0; } #endif // !DISABLED_LEGACY_ENGINE int result = 0; - if (tesseract_->SupportsInteractiveScrollView()) { - AutoPopDebugSectionLevel subsection_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection("PGEditor: Interactive Session")); + if (tess.SupportsInteractiveScrollView()) { + AutoPopDebugSectionLevel subsection_handle(tess, tess.PushSubordinatePixDebugSection("PGEditor: Interactive Session")); #if !GRAPHICS_DISABLED - tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_); + tess.pgeditor_main(rect_width_, rect_height_, page_res_); #endif // !GRAPHICS_DISABLED // The page_res is invalid after an interactive session, so cleanup // in a way that lets us continue to the next page without crashing. @@ -1190,45 +1210,45 @@ int TessBaseAPI::Recognize() { page_res_ = nullptr; return -1; #if !DISABLED_LEGACY_ENGINE - } else if (tesseract_->tessedit_train_from_boxes) { - AutoPopDebugSectionLevel subsection_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection("Train From Boxes")); + } else if (tess.tessedit_train_from_boxes) { + AutoPopDebugSectionLevel subsection_handle(tess, tess.PushSubordinatePixDebugSection("Train From Boxes")); std::string fontname; ExtractFontName(output_file_.c_str(), &fontname); - tesseract_->ApplyBoxTraining(fontname, page_res_); - } else if (tesseract_->tessedit_ambigs_training) { - AutoPopDebugSectionLevel subsection_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection("Train Ambigs")); - FILE *training_output_file = tesseract_->init_recog_training(tesseract_->input_file_path_.c_str()); + tess.ApplyBoxTraining(fontname, page_res_); + } else if (tess.tessedit_ambigs_training) { + AutoPopDebugSectionLevel subsection_handle(tess, tess.PushSubordinatePixDebugSection("Train Ambigs")); + FILE *training_output_file = tess.init_recog_training(tess.input_file_path_.c_str()); // OCR the page segmented into words by tesseract. - tesseract_->recog_training_segmented(tesseract_->input_file_path_.c_str(), page_res_, training_output_file); + tess.recog_training_segmented(tess.input_file_path_.c_str(), page_res_, training_output_file); fclose(training_output_file); #endif // !DISABLED_LEGACY_ENGINE } else { - AutoPopDebugSectionLevel subsection_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection("The Main Recognition Phase")); + AutoPopDebugSectionLevel subsection_handle(tess, tess.PushSubordinatePixDebugSection("The Main Recognition Phase")); if (scrollview_support) { - tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_); + tess.pgeditor_main(rect_width_, rect_height_, page_res_); } // Now run the main recognition. - if (!tesseract_->paragraph_text_based) { - AutoPopDebugSectionLevel subsection_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection("Detect Paragraphs (Before Recognition)")); + if (!tess.paragraph_text_based) { + AutoPopDebugSectionLevel subsection_handle(tess, tess.PushSubordinatePixDebugSection("Detect Paragraphs (Before Recognition)")); DetectParagraphs(false); if (scrollview_support) { - tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_); + tess.pgeditor_main(rect_width_, rect_height_, page_res_); } } - AutoPopDebugSectionLevel subsection_handle2(tesseract_, tesseract_->PushSubordinatePixDebugSection("Recognize All Words")); - if (tesseract_->recog_all_words(page_res_, nullptr, nullptr, 0)) { + AutoPopDebugSectionLevel subsection_handle2(tess, tess.PushSubordinatePixDebugSection("Recognize All Words")); + if (tess.recog_all_words(page_res_, nullptr, nullptr, 0)) { if (scrollview_support) { - tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_); + tess.pgeditor_main(rect_width_, rect_height_, page_res_); } subsection_handle2.pop(); - if (tesseract_->paragraph_text_based) { - AutoPopDebugSectionLevel subsection_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection("Detect Paragraphs (After Recognition)")); + if (tess.paragraph_text_based) { + AutoPopDebugSectionLevel subsection_handle(tess, tess.PushSubordinatePixDebugSection("Detect Paragraphs (After Recognition)")); DetectParagraphs(true); if (scrollview_support) { - tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_); + tess.pgeditor_main(rect_width_, rect_height_, page_res_); } } } else { @@ -1242,34 +1262,38 @@ int TessBaseAPI::Recognize() { void TessBaseAPI::SetInputImage(Pix *pix) { Image img(false, pix); img = img.copy(); - tesseract_->set_pix_original(img); + Tesseract &tess = tesseract(); + tess.set_pix_original(img); } // Takes ownership of the input pix. void TessBaseAPI::SetInputImage(Image &&pix) { - tesseract_->set_pix_original(pix); + Tesseract &tess = tesseract(); + tess.set_pix_original(pix); } void TessBaseAPI::SetInputImage(const Image &pix) { - tesseract_->set_pix_original(pix); + Tesseract &tess = tesseract(); + tess.set_pix_original(pix); } void TessBaseAPI::SetVisibleImage(Pix *pix) { pix_visible_image_ = pixCopy(NULL, pix); - // tesseract_->set_pix_visible_image(pix); + // tess.set_pix_visible_image(pix); } void TessBaseAPI::SetVisibleImage(Image &&pix) { pix_visible_image_ = pix; - // tesseract_->set_pix_visible_image(pix); + // tess.set_pix_visible_image(pix); } void TessBaseAPI::SetVisibleImage(const Image &pix) { pix_visible_image_ = pix; //.clone(); - //tesseract_->set_pix_visible_image(pix); + //tess.set_pix_visible_image(pix); } Pix *TessBaseAPI::GetInputImage() const { - return tesseract_->pix_original().clone2pix(); + const Tesseract &tess = tesseract(); + return tess.pix_original().clone2pix(); } static const char* NormalizationProcessModeName(int mode) { @@ -1306,7 +1330,8 @@ static const char *NormalizationTargetModeName(int mode) { // Grayscale normalization (preprocessing) bool TessBaseAPI::NormalizeImage(int mode) { - AutoPopDebugSectionLevel section_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection("Normalize Image")); + Tesseract& tess = tesseract(); + AutoPopDebugSectionLevel section_handle(tess, tess.PushSubordinatePixDebugSection("Normalize Image")); // Get a clone/copy of the source image rectangle, reduced to normalized greyscale, // and at the same resolution as the output binary. @@ -1325,10 +1350,10 @@ bool TessBaseAPI::NormalizeImage(int mode) { // ... and feed the result into the designated target(s): thresholder and/or tesseract source image (which is used as LSTM v4/v5 engine input). int targets = (mode & 0x03); - bool debug = (tesseract_->debug_image_normalization || tesseract_->tessedit_write_images); + bool debug = (tess.debug_image_normalization || tess.tessedit_write_images); if (false && debug) { - tesseract_->AddPixDebugPage(pix, fmt::format("Grayscale normalization mode = {} ({} ({}) + {} ({}))", mode, NormalizationProcessModeName(process), process, NormalizationTargetModeName(targets), targets)); + tess.AddPixDebugPage(pix, fmt::format("Grayscale normalization mode = {} ({} ({}) + {} ({}))", mode, NormalizationProcessModeName(process), process, NormalizationTargetModeName(targets), targets)); } switch (process) { @@ -1355,7 +1380,7 @@ bool TessBaseAPI::NormalizeImage(int mode) { } if (debug) { - tesseract_->AddPixDebugPage(result_pix, fmt::format("Grayscale normalization mode = {} ({} ({}) + {} ({}))", mode, NormalizationProcessModeName(process), process, NormalizationTargetModeName(targets), targets)); + tess.AddPixDebugPage(result_pix, fmt::format("Grayscale normalization mode = {} ({} ({}) + {} ({}))", mode, NormalizationProcessModeName(process), process, NormalizationTargetModeName(targets), targets)); } switch (targets) { @@ -1391,8 +1416,13 @@ Pix* TessBaseAPI::GetVisibleImage() { } const char *TessBaseAPI::GetInputName() { - if (tesseract_ != nullptr && !tesseract_->input_file_path_.empty()) { - return tesseract_->input_file_path_.c_str(); + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + + Tesseract &tess = tesseract(); + if (!tess.input_file_path_.empty()) { + return tess.input_file_path_.c_str(); } return nullptr; } @@ -1405,7 +1435,8 @@ const char * TessBaseAPI::GetVisibleImageFilename() { } const char *TessBaseAPI::GetDatapath() { - return tesseract_->datadir_.c_str(); + Tesseract &tess = tesseract(); + return tess.datadir_.c_str(); } int TessBaseAPI::GetSourceYResolution() { @@ -1428,7 +1459,8 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, if (!flist && !buf) { return false; } - int page_number = (tesseract_->tessedit_page_number >= 0) ? tesseract_->tessedit_page_number : 0; + Tesseract& tess = tesseract(); + int page_number = (tess.tessedit_page_number >= 0) ? tess.tessedit_page_number : 0; char pagename[MAX_PATH]; std::vector lines; @@ -1478,7 +1510,7 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, return false; } tprintInfo("Processing page #{} : {}\n", page_number + 1, pagename); - tesseract_->applybox_page.set_value(page_number); + tess.applybox_page.set_value(page_number); bool r = ProcessPage(pix, pagename, renderer); bool two_pass = false; @@ -1493,7 +1525,7 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, SetPageSegMode(PSM_SINGLE_BLOCK); // Set thresholding method to 0 for second pass regardless - tesseract_->thresholding_method = (int)ThresholdMethod::Otsu; + tess.thresholding_method = (int)ThresholdMethod::Otsu; // SetPageSegMode(PSM_SPARSE_TEXT); SetImage(newpix); @@ -1511,7 +1543,7 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, if (!r) { return false; } - if (tesseract_->tessedit_page_number >= 0) { + if (tess.tessedit_page_number >= 0) { break; } ++page_number; @@ -1529,11 +1561,12 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data, size_t size, const char *filename, TessResultRenderer *renderer) { Image pix; - int page_number = (tesseract_->tessedit_page_number >= 0) ? tesseract_->tessedit_page_number : 0; + Tesseract& tess = tesseract(); + int page_number = (tess.tessedit_page_number >= 0) ? tess.tessedit_page_number : 0; size_t offset = 0; for (;; ++page_number) { - if (tesseract_->tessedit_page_number >= 0) { - page_number = tesseract_->tessedit_page_number; + if (tess.tessedit_page_number >= 0) { + page_number = tess.tessedit_page_number; pix = (data) ? pixReadMemTiff(data, size, page_number) : pixReadTiff(filename, page_number); } else { pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset) @@ -1543,12 +1576,12 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data, size_t size, co break; } tprintInfo("Processing page #{} of multipage TIFF {}\n", page_number + 1, filename ? filename : "(from internal storage)"); - tesseract_->applybox_page.set_value(page_number); + tess.applybox_page.set_value(page_number); bool r = ProcessPage(pix, filename, renderer); if (!r) { return false; } - if (tesseract_->tessedit_page_number >= 0) { + if (tess.tessedit_page_number >= 0) { break; } if (!offset) { @@ -1562,12 +1595,13 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data, size_t size, co // processing required due to being in a training mode. bool TessBaseAPI::ProcessPages(const char *filename, TessResultRenderer *renderer) { - AutoPopDebugSectionLevel section_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection("Process pages")); + Tesseract& tess = tesseract(); + AutoPopDebugSectionLevel section_handle(tess, tess.PushSubordinatePixDebugSection("Process pages")); bool result = ProcessPagesInternal(filename, renderer); #if !DISABLED_LEGACY_ENGINE if (result) { - if (tesseract_->tessedit_train_from_boxes && !tesseract_->WriteTRFile(output_file_.c_str())) { + if (tess.tessedit_train_from_boxes && !tess.WriteTRFile(output_file_.c_str())) { tprintError("Write of TR file failed: {}\n", output_file_.c_str()); return false; } @@ -1598,6 +1632,7 @@ static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, voi // stdin. We'll still do our best if the user likes pipes. bool TessBaseAPI::ProcessPagesInternal(const char *filename, TessResultRenderer *renderer) { + Tesseract &tess = tesseract(); bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "/dev/stdin") || !strcmp(filename, "-"); if (stdInput) { #if defined(WIN32) || defined(_WIN32) || defined(_WIN64) @@ -1745,7 +1780,7 @@ bool TessBaseAPI::ProcessPagesInternal(const char *filename, r = ProcessPagesMultipageTiff(data, buf.size(), filename, renderer); } else { - tesseract_->applybox_page.set_value(-1 /* all pages */); + tess.applybox_page.set_value(-1 /* all pages */); r = ProcessPage(pix, filename, renderer); } @@ -1760,7 +1795,8 @@ bool TessBaseAPI::ProcessPagesInternal(const char *filename, bool TessBaseAPI::ProcessPage(Pix *pix, const char *filename, TessResultRenderer *renderer) { - AutoPopDebugSectionLevel page_level_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection(fmt::format("Process a single page: page #{}", 1 + tesseract_->tessedit_page_number))); + Tesseract& tess = tesseract(); + AutoPopDebugSectionLevel page_level_handle(tess, tess.PushSubordinatePixDebugSection(fmt::format("Process a single page: page #{}", 1 + tess.tessedit_page_number))); //page_level_handle.SetAsRootLevelForParamUsageReporting(); SetInputName(filename); @@ -1786,7 +1822,7 @@ bool TessBaseAPI::ProcessPage(Pix *pix, const char *filename, // Image preprocessing on image // Grayscale normalization - int graynorm_mode = tesseract_->preprocess_graynorm_mode; + int graynorm_mode = tess.preprocess_graynorm_mode; { bool rc = NormalizeImage(graynorm_mode); if (!rc) @@ -1797,21 +1833,21 @@ bool TessBaseAPI::ProcessPage(Pix *pix, const char *filename, bool failed = false; - if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) { + if (tess.tessedit_pageseg_mode == PSM_AUTO_ONLY) { // Disabled character recognition if (! std::unique_ptr(AnalyseLayout())) { failed = true; } - } else if (tesseract_->tessedit_pageseg_mode == PSM_OSD_ONLY) { + } else if (tess.tessedit_pageseg_mode == PSM_OSD_ONLY) { failed = (FindLines() != 0); } else { // Normal layout and character recognition. failed = (Recognize() < 0); } - if (tesseract_->tessedit_write_images) { + if (tess.tessedit_write_images) { Image page_pix = GetThresholdedImage(); - tesseract_->AddPixDebugPage(page_pix, fmt::format("processed page #{} : text recog done", 1 + tesseract_->tessedit_page_number)); + tess.AddPixDebugPage(page_pix, fmt::format("processed page #{} : text recog done", 1 + tess.tessedit_page_number)); } if (renderer && !failed) { @@ -1826,7 +1862,11 @@ bool TessBaseAPI::ProcessPage(Pix *pix, const char *filename, * Recognize. The returned iterator must be deleted after use. */ LTRResultIterator *TessBaseAPI::GetLTRIterator() { - if (tesseract_ == nullptr || page_res_ == nullptr) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + + if (page_res_ == nullptr) { return nullptr; } return new LTRResultIterator(page_res_, tesseract_, thresholder_->GetScaleFactor(), @@ -1843,7 +1883,10 @@ LTRResultIterator *TessBaseAPI::GetLTRIterator() { * DetectOS, or anything else that changes the internal PAGE_RES. */ ResultIterator *TessBaseAPI::GetIterator() { - if (tesseract_ == nullptr || page_res_ == nullptr) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + if (page_res_ == nullptr) { return nullptr; } return ResultIterator::StartOfParagraph(LTRResultIterator( @@ -1860,7 +1903,10 @@ ResultIterator *TessBaseAPI::GetIterator() { * DetectOS, or anything else that changes the internal PAGE_RES. */ MutableIterator *TessBaseAPI::GetMutableIterator() { - if (tesseract_ == nullptr || page_res_ == nullptr) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + if (page_res_ == nullptr) { return nullptr; } return new MutableIterator(page_res_, tesseract_, thresholder_->GetScaleFactor(), @@ -1870,7 +1916,11 @@ MutableIterator *TessBaseAPI::GetMutableIterator() { /** Make a text string from the internal data structures. */ char *TessBaseAPI::GetUTF8Text() { - if (tesseract_ == nullptr || (!recognition_done_ && Recognize() < 0)) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + + if (!recognition_done_ && Recognize() < 0) { return nullptr; } std::string text(""); @@ -1914,7 +1964,8 @@ std::tuple TessBaseAPI::GetTableBoundingBox(unsigned i) return std::tuple(0, 0, 0, 0); } - const int height = tesseract_->ImageHeight(); + Tesseract &tess = tesseract(); + const int height = tess.ImageHeight(); return std::make_tuple( t[i].box.left(), height - t[i].box.top(), @@ -1929,8 +1980,9 @@ std::vector> TessBaseAPI::GetTableRows(unsigned i) return std::vector>(); } - std::vector> rows(t[i].rows.size()); - const int height = tesseract_->ImageHeight(); + Tesseract &tess = tesseract(); + std::vector> rows(t[i].rows.size()); + const int height = tess.ImageHeight(); for (unsigned j = 0; j < t[i].rows.size(); ++j) { rows[j] = @@ -1949,8 +2001,9 @@ std::vector> TessBaseAPI::GetTableCols(unsigned i) return std::vector>(); } - std::vector> cols(t[i].cols.size()); - const int height = tesseract_->ImageHeight(); + Tesseract &tess = tesseract(); + std::vector> cols(t[i].cols.size()); + const int height = tess.ImageHeight(); for (unsigned j = 0; j < t[i].cols.size(); ++j) { cols[j] = @@ -1978,7 +2031,11 @@ static void AddBoxToTSV(const PageIterator *it, PageIteratorLevel level, std::st * Returned string must be freed with the delete [] operator. */ char *TessBaseAPI::GetTSVText(int page_number, bool lang_info) { - if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize() < 0)) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + + if (page_res_ == nullptr && Recognize() < 0) { return nullptr; } @@ -2164,7 +2221,11 @@ const int kMaxBytesPerLine = kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 + * Returned string must be freed with the delete [] operator. */ char *TessBaseAPI::GetBoxText(int page_number) { - if (tesseract_ == nullptr || (!recognition_done_ && Recognize() < 0)) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + + if (!recognition_done_ && Recognize() < 0) { return nullptr; } int blob_count; @@ -2216,9 +2277,15 @@ const int kLatinChs[] = {0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, * Returned string must be freed with the delete [] operator. */ char *TessBaseAPI::GetUNLVText() { - if (tesseract_ == nullptr || (!recognition_done_ && Recognize() < 0)) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + + if (!recognition_done_ && Recognize() < 0) { return nullptr; } + Tesseract &tess = tesseract(); + bool tilde_crunch_written = false; bool last_char_was_newline = true; bool last_char_was_tilde = false; @@ -2252,7 +2319,7 @@ char *TessBaseAPI::GetUNLVText() { } else { // NORMAL PROCESSING of non tilde crunched words. tilde_crunch_written = false; - tesseract_->set_unlv_suspects(word); + tess.set_unlv_suspects(word); const char *wordstr = word->best_choice->unichar_string().c_str(); const auto &lengths = word->best_choice->unichar_lengths(); int length = lengths.length(); @@ -2407,7 +2474,11 @@ int TessBaseAPI::MeanTextConf() { /** Returns an array of all word confidences, terminated by -1. */ int *TessBaseAPI::AllWordConfidences() { - if (tesseract_ == nullptr || (!recognition_done_ && Recognize() < 0)) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + + if (!recognition_done_ && Recognize() < 0) { return nullptr; } int n_word = 0; @@ -2448,13 +2519,14 @@ int *TessBaseAPI::AllWordConfidences() { */ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char *wordstr) { bool success = true; + Tesseract& tess = tesseract(); PageSegMode current_psm = GetPageSegMode(); SetPageSegMode(mode); - tesseract_->classify_enable_learning = false; + tess.classify_enable_learning = false; const std::unique_ptr text(GetUTF8Text()); - if (tesseract_->applybox_debug) { + if (tess.applybox_debug) { tprintDebug("Trying to adapt \"{}\" to \"{}\"\n", text.get(), wordstr); } if (text != nullptr) { @@ -2481,9 +2553,9 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char *wordstr) { // No match. delete page_res_; std::vector boxes; - page_res_ = tesseract_->SetupApplyBoxes(boxes, block_list_); - tesseract_->ReSegmentByClassification(page_res_); - tesseract_->TidyUp(page_res_); + page_res_ = tess.SetupApplyBoxes(boxes, block_list_); + tess.ReSegmentByClassification(page_res_); + tess.TidyUp(page_res_); PAGE_RES_IT pr_it(page_res_); if (pr_it.word() == nullptr) { success = false; @@ -2494,8 +2566,8 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char *wordstr) { word_res->BestChoiceToCorrectText(); } if (success) { - tesseract_->EnableLearning = true; - tesseract_->LearnWord(nullptr, word_res); + tess.EnableLearning = true; + tess.LearnWord(nullptr, word_res); } } else { success = false; @@ -2587,6 +2659,12 @@ void TessBaseAPI::WipeSqueakyCleanForReUse() { * destructing and reconstructing your TessBaseAPI. * Once End() has been used, none of the other API functions may be used * other than Init and anything declared above it in the class definition. + * + * All `Tesseract&` references produced by the tesseract() API are invalid + * after this call. If you don't want that, i.e. wish to use tesseract + * some more, than consider using the new WipeSqueakyCleanForReUse() API + * instead: that one DOES NOT invalidate the active Tesseract instance + * nor the references to it obtained previously. */ void TessBaseAPI::End() { WipeSqueakyCleanForReUse(); @@ -2618,11 +2696,13 @@ void TessBaseAPI::ClearPersistentCache() { * returns 0 if the word is invalid, non-zero if valid */ int TessBaseAPI::IsValidWord(const char *word) const { - return tesseract_->getDict().valid_word(word); + Tesseract &tess = const_cast(tesseract()); + return tess.getDict().valid_word(word); } // Returns true if utf8_character is defined in the UniCharset. bool TessBaseAPI::IsValidCharacter(const char *utf8_character) const { - return tesseract_->unicharset_.contains_unichar(utf8_character); + const Tesseract &tess = tesseract(); + return tess.unicharset_.contains_unichar(utf8_character); } // TODO(rays) Obsolete this function and replace with a more aptly named @@ -2664,7 +2744,8 @@ bool TessBaseAPI::GetTextDirection(int *out_offset, float *out_slope) { /** Sets Dict::letter_is_okay_ function to point to the given function. */ void TessBaseAPI::SetDictFunc(DictFunc f) { if (tesseract_ != nullptr) { - tesseract_->getDict().letter_is_okay_ = f; + Tesseract &tess = tesseract(); + tess.getDict().letter_is_okay_ = f; } } @@ -2677,27 +2758,29 @@ void TessBaseAPI::SetDictFunc(DictFunc f) { * utf-8 string. */ void TessBaseAPI::SetProbabilityInContextFunc(ProbabilityInContextFunc f) { - if (tesseract_ != nullptr) { - tesseract_->getDict().probability_in_context_ = f; + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + + Tesseract &tess = tesseract(); + tess.getDict().probability_in_context_ = f; // Set it for the sublangs too. - int num_subs = tesseract_->num_sub_langs(); + int num_subs = tess.num_sub_langs(); for (int i = 0; i < num_subs; ++i) { - tesseract_->get_sub_lang(i)->getDict().probability_in_context_ = f; + tess.get_sub_lang(i)->getDict().probability_in_context_ = f; } - } } /** Common code for setting the image. */ bool TessBaseAPI::InternalResetImage() { - if (tesseract_ == nullptr) { - tprintError("Please call Init before attempting to set an image.\n"); - return false; - } + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + if (thresholder_ != nullptr) { thresholder_->Clear(); - } - if (thresholder_ == nullptr) { - thresholder_ = new ImageThresholder(tesseract_); + } else { + thresholder_ = new ImageThresholder(tesseract()); } ClearResults(); return true; @@ -2711,12 +2794,13 @@ bool TessBaseAPI::InternalResetImage() { * The usual argument to Threshold is Tesseract::mutable_pix_binary(). */ bool TessBaseAPI::Threshold(Pix **pix) { + Tesseract& tess = tesseract(); ASSERT_HOST(pix != nullptr); if (*pix != nullptr) { pixDestroy(pix); } // Zero resolution messes up the algorithms, so make sure it is credible. - int user_dpi = tesseract_->user_defined_dpi; + int user_dpi = tess.user_defined_dpi; int y_res = thresholder_->GetScaledYResolution(); if (user_dpi && (user_dpi < kMinCredibleResolution || user_dpi > kMaxCredibleResolution)) { tprintWarn( @@ -2741,11 +2825,11 @@ bool TessBaseAPI::Threshold(Pix **pix) { return false; } - auto selected_thresholding_method = static_cast(static_cast(tesseract_->thresholding_method)); + auto selected_thresholding_method = static_cast(static_cast(tess.thresholding_method)); Image pix_binary; std::string caption = ThresholdMethodName(selected_thresholding_method); - AutoPopDebugSectionLevel subsec_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection(fmt::format("Applying the threshold method chosen for this run: {}: {}", selected_thresholding_method, caption))); + AutoPopDebugSectionLevel subsec_handle(tess, tess.PushSubordinatePixDebugSection(fmt::format("Applying the threshold method chosen for this run: {}: {}", selected_thresholding_method, caption))); if (selected_thresholding_method == ThresholdMethod::Otsu) { pix_binary = pix; @@ -2756,11 +2840,11 @@ bool TessBaseAPI::Threshold(Pix **pix) { *pix = pix_binary.clone2pix(); if (!thresholder_->IsBinary()) { - tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds()); - tesseract_->set_pix_grey(thresholder_->GetPixRectGrey()); + tess.set_pix_thresholds(thresholder_->GetPixRectThresholds()); + tess.set_pix_grey(thresholder_->GetPixRectGrey()); } else { - tesseract_->set_pix_thresholds(nullptr); - tesseract_->set_pix_grey(nullptr); + tess.set_pix_thresholds(nullptr); + tess.set_pix_grey(nullptr); } } else { auto [ok, pix_grey, pix_binary2, pix_thresholds] = thresholder_->Threshold(selected_thresholding_method); @@ -2772,19 +2856,19 @@ bool TessBaseAPI::Threshold(Pix **pix) { pix_binary = pix_binary2; *pix = pix_binary.clone2pix(); - tesseract_->set_pix_thresholds(pix_thresholds); // candidates for move semantics - tesseract_->set_pix_grey(pix_grey); + tess.set_pix_thresholds(pix_thresholds); // candidates for move semantics + tess.set_pix_grey(pix_grey); // pix_thresholds.destroy(); // pix_grey.destroy(); } - if (tesseract_->tessedit_dump_pageseg_images) { - tesseract_->AddPixDebugPage(tesseract_->pix_grey(), fmt::format("{} : Grey = pre-image", caption)); - tesseract_->AddPixDebugPage(tesseract_->pix_thresholds(), fmt::format("{} : Thresholds", caption)); + if (tess.tessedit_dump_pageseg_images) { + tess.AddPixDebugPage(tess.pix_grey(), fmt::format("{} : Grey = pre-image", caption)); + tess.AddPixDebugPage(tess.pix_thresholds(), fmt::format("{} : Thresholds", caption)); if (verbose_process) { tprintInfo("PROCESS: The 'Thresholds' image displays the per-pixel grey level which will be used to decide which pixels are *foreground* (text, probably) and which pixels are *background* (i.e. the *paper* the text was printed on); you'll note that each pixel in the original (greyscale!) image which is darker than its corresponding threshold level is *binarized* to black (foreground in tesseract) while any lighter pixel is *binarized* to white (background in tesseract).\n"); } - tesseract_->AddPixDebugPage(pix_binary, fmt::format("{} : Binary = post-image", caption)); + tess.AddPixDebugPage(pix_binary, fmt::format("{} : Binary = post-image", caption)); } // demo a bit of pre-postprocessing @@ -2792,32 +2876,32 @@ bool TessBaseAPI::Threshold(Pix **pix) { const char *sequence = "c1.1 + d3.3"; const int dispsep = 0; Image pix_post = pixMorphSequence(pix_binary, sequence, dispsep); - tesseract_->AddPixCompedOverOrigDebugPage(pix_post, fmt::format("{} : post-processed: {} -- just an example to showcase what leptonica can do for us!", caption, sequence)); + tess.AddPixCompedOverOrigDebugPage(pix_post, fmt::format("{} : post-processed: {} -- just an example to showcase what leptonica can do for us!", caption, sequence)); l_int32 w, h, d; - Image composite = tesseract_->pix_grey().copy(); + Image composite = tess.pix_grey().copy(); pixGetDimensions(composite, &w, &h, &d); Image mask = pixConvert1To8(nullptr, pix_post, 255, 0); pixRasterop(composite, 0, 0, w, h, PIX_PAINT, mask, 0, 0); - tesseract_->AddPixCompedOverOrigDebugPage(composite, fmt::format("{} : post-processed & masked with: {} -- this should remove all image noise that's not very close to the text, i.e. is considered *not part of the text to OCR*.", caption, sequence)); + tess.AddPixCompedOverOrigDebugPage(composite, fmt::format("{} : post-processed & masked with: {} -- this should remove all image noise that's not very close to the text, i.e. is considered *not part of the text to OCR*.", caption, sequence)); - Image noise1 = pixEmphasizeImageNoise(tesseract_->pix_original().ptr()); - Image noise2 = pixEmphasizeImageNoise(tesseract_->pix_grey().ptr()); + Image noise1 = pixEmphasizeImageNoise(tess.pix_original().ptr()); + Image noise2 = pixEmphasizeImageNoise(tess.pix_grey().ptr()); Image noise3 = pixEmphasizeImageNoise(composite.ptr()); Image noise4 = pixEmphasizeImageNoise(pix_post.ptr()); - tesseract_->AddPixCompedOverOrigDebugPage(noise1, fmt::format("{} : post-processed :: noise emphasis A: emphasized the noise inherent in the source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); - tesseract_->AddPixCompedOverOrigDebugPage(noise2, fmt::format("{} : post-processed :: noise emphasis B: emphasized the noise inherent in the greyscaled / normalized source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); - tesseract_->AddPixCompedOverOrigDebugPage(noise3, fmt::format("{} : post-processed :: noise emphasis C: emphasized the noise inherent in the composited image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); - tesseract_->AddPixCompedOverOrigDebugPage(noise4, fmt::format("{} : post-processed :: noise emphasis D: emphasized the noise inherent in the closed & binarized / thresholded source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); + tess.AddPixCompedOverOrigDebugPage(noise1, fmt::format("{} : post-processed :: noise emphasis A: emphasized the noise inherent in the source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); + tess.AddPixCompedOverOrigDebugPage(noise2, fmt::format("{} : post-processed :: noise emphasis B: emphasized the noise inherent in the greyscaled / normalized source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); + tess.AddPixCompedOverOrigDebugPage(noise3, fmt::format("{} : post-processed :: noise emphasis C: emphasized the noise inherent in the composited image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); + tess.AddPixCompedOverOrigDebugPage(noise4, fmt::format("{} : post-processed :: noise emphasis D: emphasized the noise inherent in the closed & binarized / thresholded source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); - noise1 = pixEmphasizeImageNoise2(tesseract_->pix_original().ptr()); - noise2 = pixEmphasizeImageNoise2(tesseract_->pix_grey().ptr()); + noise1 = pixEmphasizeImageNoise2(tess.pix_original().ptr()); + noise2 = pixEmphasizeImageNoise2(tess.pix_grey().ptr()); noise3 = pixEmphasizeImageNoise2(composite.ptr()); noise4 = pixEmphasizeImageNoise2(pix_post.ptr()); - tesseract_->AddPixCompedOverOrigDebugPage(noise1, fmt::format("{} : post-processed :: noise emphasis E: emphasized the noise inherent in the source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); - tesseract_->AddPixCompedOverOrigDebugPage(noise2, fmt::format("{} : post-processed :: noise emphasis F: emphasized the noise inherent in the greyscaled / normalized source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); - tesseract_->AddPixCompedOverOrigDebugPage(noise3, fmt::format("{} : post-processed :: noise emphasis G: emphasized the noise inherent in the composited image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); - tesseract_->AddPixCompedOverOrigDebugPage(noise4, fmt::format("{} : post-processed :: noise emphasis H: emphasized the noise inherent in the closed & binarized / thresholded source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); + tess.AddPixCompedOverOrigDebugPage(noise1, fmt::format("{} : post-processed :: noise emphasis E: emphasized the noise inherent in the source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); + tess.AddPixCompedOverOrigDebugPage(noise2, fmt::format("{} : post-processed :: noise emphasis F: emphasized the noise inherent in the greyscaled / normalized source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); + tess.AddPixCompedOverOrigDebugPage(noise3, fmt::format("{} : post-processed :: noise emphasis G: emphasized the noise inherent in the composited image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); + tess.AddPixCompedOverOrigDebugPage(noise4, fmt::format("{} : post-processed :: noise emphasis H: emphasized the noise inherent in the closed & binarized / thresholded source image. Every non-black/white pixel is colored to make them more apparent for the human inspector.", caption)); if (false) { // NOTE/WARNING: if you want to pick up one of these processed images as the replacement `*pix` then you MUST @@ -2862,7 +2946,7 @@ bool TessBaseAPI::Threshold(Pix **pix) { "Corrected to {}.\n", thresholder_->GetScaledEstimatedResolution(), estimated_res); } - tesseract_->set_source_resolution(estimated_res); + tess.set_source_resolution(estimated_res); (void)Monitor().bump_progress().exec_progress_func(); @@ -2871,7 +2955,13 @@ bool TessBaseAPI::Threshold(Pix **pix) { /** Find lines from the image making the BLOCK_LIST. */ int TessBaseAPI::FindLines() { - if (thresholder_ == nullptr || thresholder_->IsEmpty()) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init and/or SetImage before attempting this method.\n", + __func__); + ASSERT_HOST_MSG(thresholder_ != nullptr, + "{} was invoked without a live tesseract thresholder instance: please call SetImage before attempting this method.\n", + __func__); + if (thresholder_->IsEmpty()) { tprintError("Please call SetImage before attempting recognition.\n"); return -1; } @@ -2881,46 +2971,47 @@ int TessBaseAPI::FindLines() { if (!block_list_->empty()) { return 0; } - if (tesseract_ == nullptr) { - tesseract_ = new Tesseract(*this, nullptr); + ASSERT0(tesseract_ != nullptr); + Tesseract& tess = tesseract(); +#if 0 #if !DISABLED_LEGACY_ENGINE - tesseract_->InitAdaptiveClassifier(nullptr); + tess.InitAdaptiveClassifier(nullptr); #endif - } - if (tesseract_->pix_binary() == nullptr) { - if (verbose_process) { +#endif + if (tess.pix_binary() == nullptr) { + if (verbose_process) { tprintInfo("PROCESS: the source image is not a binary image, hence we apply a thresholding algo/subprocess to obtain a binarized image.\n"); - } + } Image pix; if (!Threshold(pix.obtains())) { - return -1; - } - tesseract_->set_pix_binary(pix); + return -1; + } + tess.set_pix_binary(pix); } - if (tesseract_->tessedit_dump_pageseg_images) { - tesseract_->AddPixDebugPage(tesseract_->pix_binary(), "FindLines :: Thresholded Image -- this image is now set as the page Master Source Image for this activity"); + if (tess.tessedit_dump_pageseg_images) { + tess.AddPixDebugPage(tess.pix_binary(), "FindLines :: Thresholded Image -- this image is now set as the page Master Source Image for this activity"); } if (verbose_process) { tprintInfo("PROCESS: prepare the image for page segmentation, i.e. discovery of all text areas + bounding boxes & image/text orientation and script{} detection.\n", - (tesseract_->textord_equation_detect ? " + equations" : "")); + (tess.textord_equation_detect ? " + equations" : "")); } - AutoPopDebugSectionLevel section_handle(tesseract_, tesseract_->PushSubordinatePixDebugSection("Prepare for Page Segmentation")); + AutoPopDebugSectionLevel section_handle(tess, tess.PushSubordinatePixDebugSection("Prepare for Page Segmentation")); - tesseract_->PrepareForPageseg(); + tess.PrepareForPageseg(); #if !DISABLED_LEGACY_ENGINE - if (tesseract_->textord_equation_detect) { + if (tess.textord_equation_detect) { if (equ_detect_ == nullptr && !datapath_.empty()) { equ_detect_ = new EquationDetect(*this, datapath_.c_str()); } if (equ_detect_ == nullptr) { tprintWarn("Could not set equation detector\n"); } else { - tesseract_->SetEquationDetect(equ_detect_); + tess.SetEquationDetect(equ_detect_); } } #endif // !DISABLED_LEGACY_ENGINE @@ -2932,7 +3023,7 @@ int TessBaseAPI::FindLines() { #endif OSResults osr; #if !DISABLED_LEGACY_ENGINE - if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) && osd_tess == nullptr) { + if (PSM_OSD_ENABLED(tess.tessedit_pageseg_mode) && osd_tess == nullptr) { if (strcmp(language_.c_str(), "osd") == 0) { osd_tess = tesseract_; } else { @@ -2960,13 +3051,13 @@ int TessBaseAPI::FindLines() { } #endif // !DISABLED_LEGACY_ENGINE - if (tesseract_->SegmentPage(tesseract_->input_file_path_.c_str(), block_list_, osd_tess, &osr) < 0) { + if (tess.SegmentPage(tess.input_file_path_.c_str(), block_list_, osd_tess, &osr) < 0) { return -1; } // If Devanagari is being recognized, we use different images for page seg // and for OCR. - tesseract_->PrepareForTessOCR(block_list_, &osr); + tess.PrepareForTessOCR(block_list_, &osr); return 0; } @@ -2975,7 +3066,8 @@ int TessBaseAPI::FindLines() { * Return average gradient of lines on page. */ float TessBaseAPI::GetGradient() { - return tesseract_->gradient(); + Tesseract &tess = tesseract(); + return tess.gradient(); } /** Delete the pageres and clear the block list ready for a new page. */ @@ -3015,7 +3107,10 @@ void TessBaseAPI::ClearResults() { * Also return the number of recognized blobs in blob_count. */ int TessBaseAPI::TextLength(int *blob_count) const { - if (tesseract_ == nullptr || page_res_ == nullptr) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + if (page_res_ == nullptr) { return 0; } @@ -3048,27 +3143,29 @@ int TessBaseAPI::TextLength(int *blob_count) const { * Returns true if the image was processed successfully. */ bool TessBaseAPI::DetectOS(OSResults *osr) { - if (tesseract_ == nullptr) { - return false; - } + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); ClearResults(); - if (tesseract_->pix_binary() == nullptr) { + Tesseract& tess = tesseract(); + if (tess.pix_binary() == nullptr) { Image pix; if (!Threshold(pix.obtains())) { return false; } - tesseract_->set_pix_binary(pix); // candidate for move semantics + tess.set_pix_binary(pix); // candidate for move semantics - if (tesseract_->tessedit_write_images) - tesseract_->AddPixDebugPage(tesseract_->pix_binary(), "DetectOS (Orientation And Script) : Thresholded Image"); + if (tess.tessedit_write_images) + tess.AddPixDebugPage(tess.pix_binary(), "DetectOS (Orientation And Script) : Thresholded Image"); } - return tesseract_->orientation_and_script_detection(tesseract_->input_file_path_.c_str(), osr) > 0; + return tess.orientation_and_script_detection(tess.input_file_path_.c_str(), osr) > 0; } #endif // !DISABLED_LEGACY_ENGINE void TessBaseAPI::set_min_orientation_margin(double margin) { - tesseract_->min_orientation_margin.set_value(margin); + Tesseract &tess = tesseract(); + tess.min_orientation_margin.set_value(margin); } /** @@ -3131,13 +3228,14 @@ void TessBaseAPI::GetBlockTextOrientations(int **block_orientation, bool **verti } void TessBaseAPI::DetectParagraphs(bool after_text_recognition) { + Tesseract& tess = tesseract(); if (paragraph_models_ == nullptr) { - paragraph_models_ = new std::vector; + paragraph_models_ = new std::vector; } MutableIterator *result_it = GetMutableIterator(); do { // Detect paragraphs for this block std::vector models; - tesseract_->DetectParagraphs(after_text_recognition, result_it, &models); + tess.DetectParagraphs(after_text_recognition, result_it, &models); paragraph_models_->insert(paragraph_models_->end(), models.begin(), models.end()); } while (result_it->Next(RIL_BLOCK)); delete result_it; @@ -3145,39 +3243,51 @@ void TessBaseAPI::DetectParagraphs(bool after_text_recognition) { /** This method returns the string form of the specified unichar. */ const char *TessBaseAPI::GetUnichar(int unichar_id) const { - return tesseract_->unicharset_.id_to_unichar(unichar_id); + const Tesseract &tess = tesseract(); + return tess.unicharset_.id_to_unichar(unichar_id); } /** Return the pointer to the i-th dawg loaded into tesseract_ object. */ const Dawg *TessBaseAPI::GetDawg(int i) const { - if (tesseract_ == nullptr || i >= NumDawgs()) { + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init and/or SetImage before attempting this method.\n", + __func__); + + if (i >= NumDawgs()) { return nullptr; } - return tesseract_->getDict().GetDawg(i); + Tesseract &tess = const_cast(tesseract()); + return tess.getDict().GetDawg(i); } /** Return the number of dawgs loaded into tesseract_ object. */ int TessBaseAPI::NumDawgs() const { - return tesseract_ == nullptr ? 0 : tesseract_->getDict().NumDawgs(); + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a live tesseract instance: please call Init before attempting this method.\n", + __func__); + Tesseract &tess = const_cast(tesseract()); + return tess.getDict().NumDawgs(); } void TessBaseAPI::ReportDebugInfo() { - if (tesseract_ == nullptr) { - return; - } - tesseract_->ReportDebugInfo(); + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a " + "live tesseract instance: you may have a bug that looses a " + "lot of tesseract diagnostics info + reporting for you.\n", + __func__); + Tesseract &tess = tesseract(); + tess.ReportDebugInfo(); } void TessBaseAPI::FinalizeAndWriteDiagnosticsReport() { - if (tesseract_ == nullptr) { - ASSERT_HOST_MSG(false, - "FinalizeAndWriteDiagnosticsReport was invoked without a " + ASSERT_HOST_MSG(tesseract_ != nullptr, + "{} was invoked without a " "live tesseract instance: you may have a bug that looses a " - "lot of tesseract diagnostics info + reporting for you.\n"); - return; - }; - tesseract_->ReportDebugInfo(); + "lot of tesseract diagnostics info + reporting for you.\n", + __func__); + Tesseract &tess = tesseract(); + tess.ReportDebugInfo(); } /** Escape a char string - replace <>&"' with HTML codes. */ @@ -3208,8 +3318,23 @@ std::string HOcrEscape(const char *text) { return ret; } -std::string mkUniqueOutputFilePath(const char* basepath, int page_number, const char* label, const char* filename_extension) -{ +const Tesseract &TessBaseAPI::tesseract() const { + if (tesseract_ == nullptr) { + TessBaseAPI &owner = const_cast(*this); + tesseract_ = new tesseract::Tesseract(owner, nullptr); + } + return *tesseract_; +} + +Tesseract &TessBaseAPI::tesseract() { + if (tesseract_ == nullptr) { + TessBaseAPI &owner = *this; + tesseract_ = new tesseract::Tesseract(owner, nullptr); + } + return *tesseract_; +} + +std::string mkUniqueOutputFilePath(const char *basepath, int page_number, const char *label, const char *filename_extension) { size_t pos = strcspn(basepath, ":\\/"); const char* filename = basepath; const char* p = basepath + pos; diff --git a/src/api/pdfrenderer.cpp b/src/api/pdfrenderer.cpp index 204e20081f..cfd82bedf2 100644 --- a/src/api/pdfrenderer.cpp +++ b/src/api/pdfrenderer.cpp @@ -923,7 +923,7 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI *api) { if (!textonly_) { char *pdf_object = nullptr; - int jpg_quality = api->tesseract()->jpg_quality; + int jpg_quality = api->tesseract().jpg_quality; if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize, jpg_quality)) { if (destroy_pix) { diff --git a/src/ccmain/tesseractclass.cpp b/src/ccmain/tesseractclass.cpp index aa555debb0..d7520276cc 100644 --- a/src/ccmain/tesseractclass.cpp +++ b/src/ccmain/tesseractclass.cpp @@ -467,7 +467,7 @@ Tesseract::Tesseract(TessBaseAPI &owner, Tesseract *parent) , BOOL_MEMBER(debug_output_diagnostics_HTML, false, "Write the debug/diagnostics output to a HTML file, including the collected images of the various process stages inside tesseract. The content is equivalent to the debug info you see on stderr, but in a nicely formatted and easier to grok modern format. Also handy for sharing your sessions' diagnostics with others. The output filename is derived from the source image name and output base path.", params()), INT_MEMBER(debug_output_diagnostics_images_format, IMG4W_WEBP_LOSSLESS, "The format of the images included in the debug/diagnostics output HTML file. Specify a number: 0:PNG, 1:JPEG, 2:WebP, 3:lossless-WebP, 4:TIFF. While we support TIFF and higher numbers, it is ill-advised to use as web browsers won't support those out of the box and choosing those formats will strongly and *negatively* impact your HTML diagnostics viewing experience. Tip: use PNG or JPEG if you want the output to be produced faster, lossless-WEBP if you want smaller image files with maximum precision. Set the jpeg_quality parameter for any of these formats for targeted compression ratio.", params()) - , pixa_debug_(this) + , pixa_debug_(*this) , splitter_(this) , image_finder_(this) , line_finder_(this) diff --git a/src/ccmain/thresholder.cpp b/src/ccmain/thresholder.cpp index 1c30d8a5db..bf4a36d9c0 100644 --- a/src/ccmain/thresholder.cpp +++ b/src/ccmain/thresholder.cpp @@ -41,7 +41,7 @@ namespace tesseract { -ImageThresholder::ImageThresholder(Tesseract* tess) +ImageThresholder::ImageThresholder(Tesseract& tess) : tesseract_(tess) , pix_(nullptr) , image_width_(0) @@ -51,7 +51,6 @@ ImageThresholder::ImageThresholder(Tesseract* tess) , scale_(1) , yres_(300) , estimated_res_(300) { - ASSERT0(tess != nullptr); SetRectangle(0, 0, 0, 0); } @@ -216,14 +215,14 @@ std::tuple ImageThresholder::Threshold(ThresholdMetho l_int32 pix_w, pix_h; pixGetDimensions(pix_ /* pix_grey */, &pix_w, &pix_h, nullptr); - if (tesseract_->thresholding_debug) { + if (tesseract_.thresholding_debug) { tprintDebug("\nimage width: {} height: {} ppi: {}\n", pix_w, pix_h, yres_); } switch (method) { case ThresholdMethod::Sauvola: { int window_size; - window_size = tesseract_->thresholding_window_size * yres_; + window_size = tesseract_.thresholding_window_size * yres_; window_size = std::max(7, window_size); window_size = std::min(pix_w < pix_h ? pix_w - 3 : pix_h - 3, window_size); int half_window_size = window_size / 2; @@ -242,10 +241,10 @@ std::tuple ImageThresholder::Threshold(ThresholdMetho ny = pix_h / (half_window_size + 2); } - double kfactor = tesseract_->thresholding_kfactor; + double kfactor = tesseract_.thresholding_kfactor; kfactor = std::max(0.0, kfactor); - if (tesseract_->thresholding_debug) { + if (tesseract_.thresholding_debug) { tprintDebug("Sauvola thresholding: window size: {} kfactor: {} nx: {} ny: {}\n", window_size, kfactor, nx, ny); } @@ -274,19 +273,19 @@ std::tuple ImageThresholder::Threshold(ThresholdMetho case ThresholdMethod::LeptonicaOtsu: { int tile_size; - double tile_size_factor = tesseract_->thresholding_tile_size; + double tile_size_factor = tesseract_.thresholding_tile_size; tile_size = tile_size_factor * yres_; tile_size = std::max(16, tile_size); int smooth_size; - double smooth_size_factor = tesseract_->thresholding_smooth_kernel_size; + double smooth_size_factor = tesseract_.thresholding_smooth_kernel_size; smooth_size_factor = std::max(0.0, smooth_size_factor); smooth_size = smooth_size_factor * yres_; int half_smooth_size = smooth_size / 2; - double score_fraction = tesseract_->thresholding_score_fraction; + double score_fraction = tesseract_.thresholding_score_fraction; - if (tesseract_->thresholding_debug) { + if (tesseract_.thresholding_debug) { tprintDebug("LeptonicaOtsu thresholding: tile size: {}, smooth_size: {}, score_fraction: {}\n", tile_size, smooth_size, score_fraction); } @@ -339,13 +338,13 @@ std::tuple ImageThresholder::Threshold(ThresholdMetho bool ImageThresholder::ThresholdToPix(Image *pix) { // tolerate overlarge images when they're about to be cropped by GetPixRect(): if (IsFullImage()) { - if (tesseract_->CheckAndReportIfImageTooLarge(pix_)) { + if (tesseract_.CheckAndReportIfImageTooLarge(pix_)) { return false; } } else { // validate against the future cropped image size: - if (tesseract_->CheckAndReportIfImageTooLarge(rect_width_, rect_height_)) { + if (tesseract_.CheckAndReportIfImageTooLarge(rect_width_, rect_height_)) { return false; } } diff --git a/src/ccmain/thresholder.h b/src/ccmain/thresholder.h index 4d91ed3a27..8f0f16ce62 100644 --- a/src/ccmain/thresholder.h +++ b/src/ccmain/thresholder.h @@ -85,7 +85,7 @@ class TessBaseAPI; /// desired. class TESS_API ImageThresholder { public: - ImageThresholder(Tesseract *tess); + ImageThresholder(Tesseract &tess); virtual ~ImageThresholder(); /// Destroy the Pix if there is one, freeing memory. @@ -218,7 +218,7 @@ class TESS_API ImageThresholder { const std::vector &hi_values, Image *pix) const; private: - Tesseract* tesseract_; // reference to the active instance + Tesseract& tesseract_; // reference to the active instance protected: /// Clone or other copy of the source Pix. diff --git a/src/ccstruct/debugpixa.cpp b/src/ccstruct/debugpixa.cpp index 5b555f3018..300552a95d 100644 --- a/src/ccstruct/debugpixa.cpp +++ b/src/ccstruct/debugpixa.cpp @@ -720,7 +720,7 @@ namespace tesseract { } #endif - DebugPixa::DebugPixa(Tesseract* tess) + DebugPixa::DebugPixa(Tesseract& tess) : tesseract_(tess) , content_has_been_written_to_file(false) , active_step_index(-1) @@ -1416,23 +1416,23 @@ namespace tesseract { int img_depth = pixGetDepth(pixs); ASSERT0(img_depth == 1 || img_depth == 8 || img_depth == 24 || img_depth == 32); - auto [image_extension, pix_format_id, image_format_id] = get_image_output_datums(img_depth, tesseract_->debug_output_diagnostics_images_format); + auto [image_extension, pix_format_id, image_format_id] = get_image_output_datums(img_depth, tesseract_.debug_output_diagnostics_images_format); std::string fn(partname + SanitizeFilenamePart(fmt::format(".img{:04d}.", counter) + caption) + image_extension); TBOX cliprect = cliprects[idx]; auto clip_area = cliprect.area(); Image bgimg; if (clip_area > 0) { - bgimg = tesseract_->pix_original(); // clones ownership + bgimg = tesseract_.pix_original(); // clones ownership } - write_one_pix_for_html(html, counter, image_format_id, tesseract_->jpg_quality, fn, pixs, bgimg, TruncatedForTitle(caption), caption); + write_one_pix_for_html(html, counter, image_format_id, tesseract_.jpg_quality, fn, pixs, bgimg, TruncatedForTitle(caption), caption); if (clip_area > 0 && false) { counter++; fn = partname + SanitizeFilenamePart(fmt::format(".img{:04d}.", counter) + caption) + image_extension; - write_one_pix_for_html(html, counter, image_format_id, tesseract_->jpg_quality, fn, pixs, bgimg, TruncatedForTitle(caption), caption, &cliprect); + write_one_pix_for_html(html, counter, image_format_id, tesseract_.jpg_quality, fn, pixs, bgimg, TruncatedForTitle(caption), caption, &cliprect); } //pixs.destroy(); @@ -1564,7 +1564,6 @@ namespace tesseract { void DebugPixa::WriteHTML(const char* filename) { - ASSERT0(tesseract_ != nullptr); if (HasContent()) { double time_elapsed_until_report = grand_clock.clock.get_elapsed_ns(); plf::nanotimer report_clock; @@ -1614,13 +1613,13 @@ namespace tesseract { std::string now_str = ss.str(); std::ostringstream languages; - int num_subs = tesseract_->num_sub_langs(); + int num_subs = tesseract_.num_sub_langs(); if (num_subs > 0) { int i; for (i = 0; i < num_subs - 1; ++i) { - languages << tesseract_->get_sub_lang(i)->lang_ << " + "; + languages << tesseract_.get_sub_lang(i)->lang_ << " + "; } - languages << tesseract_->get_sub_lang(i)->lang_; + languages << tesseract_.get_sub_lang(i)->lang_; } // CSS styles for the generated HTML @@ -1659,30 +1658,30 @@ namespace tesseract { Main directory{}\n\ \n\ ", - html_styling(tesseract_->datadir_, "normalize.css").c_str(), - html_styling(tesseract_->datadir_, "modern-normalize.css").c_str(), - html_styling(tesseract_->datadir_, "diag-report.css").c_str(), + html_styling(tesseract_.datadir_, "normalize.css").c_str(), + html_styling(tesseract_.datadir_, "modern-normalize.css").c_str(), + html_styling(tesseract_.datadir_, "diag-report.css").c_str(), TESSERACT_VERSION_STR, now_str.c_str(), - check_unknown_and_encode(tesseract_->input_file_path_).c_str(), - check_unknown_and_encode(tesseract_->imagebasename_).c_str(), - check_unknown_and_encode(tesseract_->imagefile_).c_str(), - tesseract_->lang_.c_str(), + check_unknown_and_encode(tesseract_.input_file_path_).c_str(), + check_unknown_and_encode(tesseract_.imagebasename_).c_str(), + check_unknown_and_encode(tesseract_.imagefile_).c_str(), + tesseract_.lang_.c_str(), languages.str().c_str(), - check_unknown_and_encode(tesseract_->language_data_path_prefix_).c_str(), - check_unknown_and_encode(tesseract_->datadir_).c_str(), - check_unknown_and_encode(tesseract_->directory_).c_str() + check_unknown_and_encode(tesseract_.language_data_path_prefix_).c_str(), + check_unknown_and_encode(tesseract_.datadir_).c_str(), + check_unknown_and_encode(tesseract_.directory_).c_str() ).c_str(), html); plf::nanotimer image_clock; image_clock.start(); { - Image pixs = tesseract_->pix_original(); + Image pixs = tesseract_.pix_original(); int img_depth = pixGetDepth(pixs); - auto [image_extension, pix_format_id, image_format_id] = get_image_output_datums(img_depth, tesseract_->debug_output_diagnostics_images_format); + auto [image_extension, pix_format_id, image_format_id] = get_image_output_datums(img_depth, tesseract_.debug_output_diagnostics_images_format); std::string fn(partname + SanitizeFilenamePart(".img-original.") + image_extension); - write_one_pix_for_html(html, 0, image_format_id, tesseract_->jpg_quality, fn, pixs, Image(), "original image", "The original image as registered with the Tesseract instance."); + write_one_pix_for_html(html, 0, image_format_id, tesseract_.jpg_quality, fn, pixs, Image(), "original image", "The original image as registered with the Tesseract instance."); } source_image_elapsed_ns = image_clock.get_elapsed_ns(); @@ -1859,7 +1858,7 @@ namespace tesseract { step.elapsed_ns = 0; } - tesseract::ParamsVectors *vec = tesseract_->params(); + tesseract::ParamsVectors *vec = tesseract_.params(); // produce a HTML-formatted parameter usage report by using the regular way to get such a report, // then feed it through the NDtext-to-HTML transformer and only then write the final result in one fell swoop to file. @@ -1888,7 +1887,7 @@ namespace tesseract { auto level = section_info.level; if (level == 3 && verbose_process) { - tesseract::ParamsVectors *vec = tesseract_->params(); + tesseract::ParamsVectors *vec = tesseract_.params(); ParamUtils::ReportParamsUsageStatistics(nullptr, vec, level, title); } } @@ -1917,13 +1916,13 @@ namespace tesseract { AutoPopDebugSectionLevel::~AutoPopDebugSectionLevel() { if (section_handle_ >= 0) { - tesseract_->PopPixDebugSection(section_handle_); + tesseract_.PopPixDebugSection(section_handle_); } } void AutoPopDebugSectionLevel::pop() { if (section_handle_ >= 0) { - tesseract_->PopPixDebugSection(section_handle_); + tesseract_.PopPixDebugSection(section_handle_); section_handle_ = INT_MIN; } } diff --git a/src/ccstruct/debugpixa.h b/src/ccstruct/debugpixa.h index dafebb7f91..7530a9ede1 100644 --- a/src/ccstruct/debugpixa.h +++ b/src/ccstruct/debugpixa.h @@ -36,7 +36,7 @@ namespace tesseract { class DebugPixa { public: // TODO(rays) add another constructor with size control. - DebugPixa(Tesseract* tess); + DebugPixa(Tesseract& tess); // If the filename_ has been set and there are any debug images, they are // written to the set filename_. @@ -118,7 +118,7 @@ namespace tesseract { }; private: - Tesseract* tesseract_; // reference to the driving tesseract instance + Tesseract& tesseract_; // reference to the driving tesseract instance private: // The collection of images to put in the PDF. @@ -180,7 +180,7 @@ namespace tesseract { class AutoPopDebugSectionLevel { public: - AutoPopDebugSectionLevel(Tesseract *tess, int section_handle) + AutoPopDebugSectionLevel(Tesseract &tess, int section_handle) : section_handle_(section_handle), tesseract_(tess) {} // auto-pop via end-of-scope i.e. object destructor: @@ -190,7 +190,7 @@ namespace tesseract { void pop(); protected: - Tesseract *tesseract_; + Tesseract &tesseract_; int section_handle_; }; diff --git a/src/ccstruct/pageres.h b/src/ccstruct/pageres.h index b9789cb72b..e7e3c2617a 100644 --- a/src/ccstruct/pageres.h +++ b/src/ccstruct/pageres.h @@ -192,7 +192,7 @@ class TESS_API WERD_RES : public ELIST_LINK { // match as they are both before any chopping. // TODO(rays) determine if docqual does anything useful and delete bln_boxes // if it doesn't. - tesseract::BoxWord *bln_boxes = nullptr; // BLN input bounding boxes. + BoxWord *bln_boxes = nullptr; // BLN input bounding boxes. // The ROW that this word sits in. NOT owned by the WERD_RES. ROW *blob_row = nullptr; // The denorm provides the transformation to get back to the rotated image @@ -269,7 +269,7 @@ class TESS_API WERD_RES : public ELIST_LINK { // The length of box_word matches rebuild_word, best_state (if set) and // correct_text (if set), as well as best_choice and represents the // number of classified units in the output. - tesseract::BoxWord *box_word = nullptr; // Denormalized output boxes. + BoxWord *box_word = nullptr; // Denormalized output boxes. // The Tesseract that was used to recognize this word. Just a borrowed // pointer. Note: Tesseract's class definition is in a higher-level library. // We avoid introducing a cyclic dependency by not using the Tesseract @@ -277,7 +277,7 @@ class TESS_API WERD_RES : public ELIST_LINK { // for the top-level multi-language controller, and maybe for output of // the recognized language. // tesseract points to data owned elsewhere. - tesseract::Tesseract *tesseract = nullptr; + Tesseract *tesseract = nullptr; // The best_state stores the relationship between chopped_word and // rebuild_word. Each blob[i] in rebuild_word is composed of best_state[i] // adjacent blobs in chopped_word. The seams in seam_array are hidden @@ -469,11 +469,11 @@ class TESS_API WERD_RES : public ELIST_LINK { // features on low resolution images. // // The norm_mode sets the default mode for normalization in absence - // of any of the above flags. It should really be a tesseract::OcrEngineMode + // of any of the above flags. It should really be a OcrEngineMode // but is declared as int for ease of use with tessedit_ocr_engine_mode. // Returns false if the word is empty and sets up fake results. bool SetupForRecognition(const UNICHARSET &unicharset_in, - tesseract::Tesseract *tesseract, + Tesseract *tesseract, int norm_mode, const TBOX *norm_box, bool numeric_mode, bool use_body_size, bool allow_detailed_fx, ROW *row, @@ -608,7 +608,7 @@ class TESS_API WERD_RES : public ELIST_LINK { // the given position. (When a sub/superscript is recognized as a separate // word, it falls victim to the rule that a whole word cannot be sub or // superscript, so this function overrides that problem.) - void SetAllScriptPositions(tesseract::ScriptPos position); + void SetAllScriptPositions(ScriptPos position); // Classifies the word with some already-calculated BLOB_CHOICEs. // The choices are an array of blob_count pointers to BLOB_CHOICE, diff --git a/src/tesseract.cpp b/src/tesseract.cpp index 20f56bd932..5048fbaefa 100644 --- a/src/tesseract.cpp +++ b/src/tesseract.cpp @@ -974,7 +974,8 @@ extern "C" int tesseract_main(int argc, const char **argv) } // TODO: set during init phase and/or when this parameter is edited. - monitor.set_deadline_msecs(api.tesseract()->activity_timeout_millisec); + Tesseract &tess = api.tesseract(); + monitor.set_deadline_msecs(tess.activity_timeout_millisec); // repeat the `-c var=val` load as debug_all MAY have overwritten some of these user-specified settings in the call above. if (!SetVariablesFromCLArgs(api, argc, argv)) { @@ -1055,14 +1056,15 @@ extern "C" int tesseract_main(int argc, const char **argv) // ambigs.train, box.train, box.train.stderr, linebox, rebox, lstm.train. // In this mode no other OCR result files are written. bool b = false; + ASSERT_HOST(&api.tesseract() == &tess); bool in_training_mode = - (bool(api.tesseract()->tessedit_ambigs_training)) || - (bool(api.tesseract()->tessedit_resegment_from_boxes)) || - (bool(api.tesseract()->tessedit_make_boxes_from_boxes)) || - (bool(api.tesseract()->tessedit_train_line_recognizer)); + (bool(tess.tessedit_ambigs_training)) || + (bool(tess.tessedit_resegment_from_boxes)) || + (bool(tess.tessedit_make_boxes_from_boxes)) || + (bool(tess.tessedit_train_line_recognizer)); if (api.GetPageSegMode() == tesseract::PSM_OSD_ONLY) { - if (!api.tesseract()->AnyTessLang()) { + if (!tess.AnyTessLang()) { fprintf(stderr, "Error, OSD requires a model for the legacy engine\n"); return EXIT_FAILURE; } @@ -1101,17 +1103,18 @@ extern "C" int tesseract_main(int argc, const char **argv) succeed &= !PreloadRenderers(api, renderers, pagesegmode, outputbase); if (succeed && renderers.empty()) { // default: TXT + HOCR renderer - api.tesseract()->tessedit_create_hocr.set_value(true); - api.tesseract()->tessedit_create_alto.set_value(true); - api.tesseract()->tessedit_create_page_xml.set_value(true); - api.tesseract()->tessedit_create_tsv.set_value(true); - api.tesseract()->tessedit_create_pdf.set_value(true); - api.tesseract()->textonly_pdf.set_value(true); - api.tesseract()->tessedit_write_unlv.set_value(true); - api.tesseract()->tessedit_create_lstmbox.set_value(true); - api.tesseract()->tessedit_create_boxfile.set_value(true); - api.tesseract()->tessedit_create_wordstrbox.set_value(true); - api.tesseract()->tessedit_create_txt.set_value(true); + ASSERT_HOST(&api.tesseract() == &tess); + tess.tessedit_create_hocr.set_value(true); + tess.tessedit_create_alto.set_value(true); + tess.tessedit_create_page_xml.set_value(true); + tess.tessedit_create_tsv.set_value(true); + tess.tessedit_create_pdf.set_value(true); + tess.textonly_pdf.set_value(true); + tess.tessedit_write_unlv.set_value(true); + tess.tessedit_create_lstmbox.set_value(true); + tess.tessedit_create_boxfile.set_value(true); + tess.tessedit_create_wordstrbox.set_value(true); + tess.tessedit_create_txt.set_value(true); succeed &= !PreloadRenderers(api, renderers, pagesegmode, outputbase); } diff --git a/src/training/ambiguous_words.cpp b/src/training/ambiguous_words.cpp index 3867e64535..f996dbeb97 100644 --- a/src/training/ambiguous_words.cpp +++ b/src/training/ambiguous_words.cpp @@ -75,7 +75,7 @@ extern "C" TESS_API int tesseract_ambiguous_words_main(int argc, const char** ar vars_values.emplace_back(output_file_str); api.InitFull(tessdata_dir, lang.c_str(), tesseract::OEM_TESSERACT_ONLY, nullptr, 0, &vars_vec, &vars_values, false); - tesseract::Dict &dict = api.tesseract()->getDict(); + tesseract::Dict &dict = api.tesseract().getDict(); FILE *input_file = fopen(input_file_str, "rb"); if (input_file == nullptr) { tesseract::tprintError("Failed to open input wordlist file {}\n", input_file_str); diff --git a/src/training/classifier_tester.cpp b/src/training/classifier_tester.cpp index 3d4a5e85c7..3a65d38994 100644 --- a/src/training/classifier_tester.cpp +++ b/src/training/classifier_tester.cpp @@ -74,7 +74,7 @@ static tesseract::ShapeClassifier *InitializeClassifier(const char *classifer_na tprintError("Tesseract initialization failed!\n"); return nullptr; } - tesseract = tess->tesseract(); + tesseract = &tess->tesseract(); classify = static_cast(tesseract); if (classify->shape_table() == nullptr) { tprintError("Tesseract must contain a ShapeTable!\n"); From 140be939686a789c28bd42fa866678786cdadec6 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Fri, 26 Jul 2024 12:28:55 +0200 Subject: [PATCH 64/66] guaranteed init all members of the class: assign the default/start values in the class declaration code chunk. # Conflicts: # include/tesseract/baseapi.h # Conflicts: # include/tesseract/baseapi.h --- include/tesseract/baseapi.h | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/include/tesseract/baseapi.h b/include/tesseract/baseapi.h index 6e436b6f62..59447ccc0a 100644 --- a/include/tesseract/baseapi.h +++ b/include/tesseract/baseapi.h @@ -832,35 +832,36 @@ class TESS_API TessBaseAPI { } protected: - Tesseract *tesseract_; ///< The underlying data object. - Tesseract *osd_tesseract_; ///< For orientation & script detection. - EquationDetect *equ_detect_; ///< The equation detector. + mutable Tesseract *tesseract_ = nullptr; ///< The underlying data object. + Tesseract *osd_tesseract_ = nullptr; ///< For orientation & script detection. + EquationDetect *equ_detect_ = nullptr; ///< The equation detector. FileReader reader_; ///< Reads files from any filesystem. - ImageThresholder *thresholder_; ///< Image thresholding module. - std::vector *paragraph_models_; - BLOCK_LIST *block_list_; ///< The page layout. - PAGE_RES *page_res_; ///< The page-level data. + ImageThresholder *thresholder_ = nullptr; ///< Image thresholding module. + std::vector *paragraph_models_ = nullptr; + BLOCK_LIST *block_list_ = nullptr; ///< The page layout. + PAGE_RES *page_res_ = nullptr; ///< The page-level data. std::string input_file_; ///< Name used by training code. std::string output_file_; ///< Name used by debug code. std::string datapath_; ///< Current location of tessdata. std::string language_; ///< Last initialized language. - OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested. - bool recognition_done_; ///< page_res_ contains recognition data. + OcrEngineMode last_oem_requested_ = OEM_DEFAULT; ///< Last ocr language mode requested. + bool recognition_done_ = false; ///< page_res_ contains recognition data. /** * @defgroup ThresholderParams Thresholder Parameters * Parameters saved from the Thresholder. Needed to rebuild coordinates. */ /* @{ */ - int rect_left_; - int rect_top_; - int rect_width_; - int rect_height_; - int image_width_; - int image_height_; + int rect_left_ = 0; + int rect_top_ = 0; + int rect_width_ = 0; + int rect_height_ = 0; + + int image_width_ = 0; + int image_height_ = 0; /* @} */ -private: +protected: // A list of image filenames gets special consideration // // If global parameter `tessedit_page_number` is non-negative, will only process that From eb6404cfeb3c963cedbe6afbe4cd343a6c974a95 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Fri, 26 Jul 2024 14:07:09 +0200 Subject: [PATCH 65/66] remove superfluous use of tesseract namespace identifier # Conflicts: # src/ccstruct/pageres.h --- src/ccstruct/pageres.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/ccstruct/pageres.h b/src/ccstruct/pageres.h index 62ba323346..c5c56533bb 100644 --- a/src/ccstruct/pageres.h +++ b/src/ccstruct/pageres.h @@ -190,7 +190,7 @@ class TESS_API WERD_RES : public ELIST_LINK { // match as they are both before any chopping. // TODO(rays) determine if docqual does anything useful and delete bln_boxes // if it doesn't. - tesseract::BoxWord *bln_boxes = nullptr; // BLN input bounding boxes. + BoxWord *bln_boxes = nullptr; // BLN input bounding boxes. // The ROW that this word sits in. NOT owned by the WERD_RES. ROW *blob_row = nullptr; // The denorm provides the transformation to get back to the rotated image @@ -267,7 +267,7 @@ class TESS_API WERD_RES : public ELIST_LINK { // The length of box_word matches rebuild_word, best_state (if set) and // correct_text (if set), as well as best_choice and represents the // number of classified units in the output. - tesseract::BoxWord *box_word = nullptr; // Denormalized output boxes. + BoxWord *box_word = nullptr; // Denormalized output boxes. // The Tesseract that was used to recognize this word. Just a borrowed // pointer. Note: Tesseract's class definition is in a higher-level library. // We avoid introducing a cyclic dependency by not using the Tesseract @@ -275,7 +275,7 @@ class TESS_API WERD_RES : public ELIST_LINK { // for the top-level multi-language controller, and maybe for output of // the recognized language. // tesseract points to data owned elsewhere. - tesseract::Tesseract *tesseract = nullptr; + Tesseract *tesseract = nullptr; // The best_state stores the relationship between chopped_word and // rebuild_word. Each blob[i] in rebuild_word is composed of best_state[i] // adjacent blobs in chopped_word. The seams in seam_array are hidden @@ -467,11 +467,11 @@ class TESS_API WERD_RES : public ELIST_LINK { // features on low resolution images. // // The norm_mode sets the default mode for normalization in absence - // of any of the above flags. It should really be a tesseract::OcrEngineMode + // of any of the above flags. It should really be a OcrEngineMode // but is declared as int for ease of use with tessedit_ocr_engine_mode. // Returns false if the word is empty and sets up fake results. bool SetupForRecognition(const UNICHARSET &unicharset_in, - tesseract::Tesseract *tesseract, Image pix, + Tesseract *tesseract, Image pix, int norm_mode, const TBOX *norm_box, bool numeric_mode, bool use_body_size, bool allow_detailed_fx, ROW *row, @@ -606,7 +606,7 @@ class TESS_API WERD_RES : public ELIST_LINK { // the given position. (When a sub/superscript is recognized as a separate // word, it falls victim to the rule that a whole word cannot be sub or // superscript, so this function overrides that problem.) - void SetAllScriptPositions(tesseract::ScriptPos position); + void SetAllScriptPositions(ScriptPos position); // Classifies the word with some already-calculated BLOB_CHOICEs. // The choices are an array of blob_count pointers to BLOB_CHOICE, From cb9c9a7445c5477c2c84a2b574f8142b185f193d Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Fri, 26 Jul 2024 17:55:33 +0200 Subject: [PATCH 66/66] typo fix in comment --- src/ccmain/tfacepp.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ccmain/tfacepp.cpp b/src/ccmain/tfacepp.cpp index 62749450f4..3c7fb7f434 100644 --- a/src/ccmain/tfacepp.cpp +++ b/src/ccmain/tfacepp.cpp @@ -130,7 +130,7 @@ void Tesseract::recog_word_recursive(WERD_RES *word, int call_depth) { if (!restrict_recursion && owner_.Monitor().bump_progress().exec_progress_func().kick_watchdog_and_check_for_cancel()) { // deadline reached: as we don't check all the way down once we get a cancel signal, dial down the call depth limit to insane low values in order to stop the word recognizer in its tracks for the remainder of the run. // - // what we do to also keep the userland configured value is to flip its sign: that way we can flip that value back at the end of the run if its only this particular session's + // what we do to also keep the userland configured value is to flip its sign: that way we can flip that value back at the end of the run if it's only this particular session's // deadline that's expired, not just the entire session's -- this is us anticipating tesseract core readying for batch processing in a single session. recog_word_recursion_depth_limit.set_value(-recog_word_recursion_depth_limit.value());