Skip to content

Commit

Permalink
Merge branch 'master' into page-output-with-int-coordinates
Browse files Browse the repository at this point in the history
  • Loading branch information
GerHobbelt committed Jul 21, 2024
2 parents 9a3cb70 + 18e130a commit f286356
Show file tree
Hide file tree
Showing 140 changed files with 3,312 additions and 2,654 deletions.
118 changes: 53 additions & 65 deletions include/tesseract/baseapi.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@

#include <tesseract/version.h>
#include <tesseract/memcost_estimate.h> // for ImageCostEstimate
#include <tesseract/ocrclass.h>
#include <tesseract/image.h>
#include <tesseract/params.h>
#include <tesseract/filepath.h>

Expand Down Expand Up @@ -109,6 +111,20 @@ class TESS_API TessBaseAPI {
*/
void SetInputName(const char *name);

/**
* Register a user-defined monitor instance, whose lifetime will equal
* or surpass this TesseractAPI instance's lifetime, i.e.
* the referenced monitor instance MUST remain valid until
* we're done with it.
*/
void RegisterMonitor(ETEXT_DESC *monitor);
ETEXT_DESC &Monitor();
const ETEXT_DESC &Monitor() const;

/// Note the given command (argv[] set as vector) for later reporting
/// in the diagnostics output as part of the HTML log heading.
void DebugAddCommandline(const std::vector<std::string> &argv);

/**
* These functions are required for searchable PDF output.
* We need our hands on the input file so that we can include
Expand All @@ -121,10 +137,15 @@ class TESS_API TessBaseAPI {

const char *GetInputName();

// Takes ownership of the input pix.
// DOES NOT takes ownership of the input pix, but COPIES it instead.
void SetInputImage(Pix *pix);
// Takes ownership of the input pix.
void SetInputImage(Image &&pix);
// DOES NOT takes ownership of the input pix, but CLONES it instead.
void SetInputImage(const Image &pix);

Pix *GetInputImage();
Pix *GetInputImage() const;
Image GetInputImageClone();

int GetSourceYResolution();

Expand All @@ -134,9 +155,15 @@ class TESS_API TessBaseAPI {

const char *GetVisibleImageFilename();

// DOES NOT takes ownership of the input pix, but COPIES it instead.
void SetVisibleImage(Pix *pix);
// Takes ownership of the input pix.
void SetVisibleImage(Image &&pix);
// DOES NOT takes ownership of the input pix, but CLONES it instead.
void SetVisibleImage(const Image &pix);

Pix* GetVisibleImage();
Image GetVisibleImageClone();

/**
* @}
Expand Down Expand Up @@ -381,8 +408,16 @@ class TESS_API TessBaseAPI {
const std::vector<std::string> &vars_values,
const std::vector<std::string> &configs);

int Init(const char *datapath, const char *language, OcrEngineMode mode,
const char **configs, int configs_size,
const std::vector<std::string> *vars_vec,
const std::vector<std::string> *vars_values,
bool set_only_non_debug_params, FileReader reader);

int Init(const char *datapath, const char *language, OcrEngineMode oem);

int Init(const char *datapath, const char *language, OcrEngineMode oem, FileReader reader);

int Init(const char *datapath, const char *language, OcrEngineMode oem,
const std::vector<std::string> &configs);

Expand Down Expand Up @@ -413,10 +448,6 @@ class TESS_API TessBaseAPI {
const std::vector<std::string> &configs,
FileReader reader);

int Init(const char *datapath,
const char *language, OcrEngineMode oem,
FileReader reader);

// In-memory version reads the traineddata directly from the given
// data[data_size] array.
int InitFromMemory(const char *data, size_t data_size,
Expand Down Expand Up @@ -477,7 +508,7 @@ class TESS_API TessBaseAPI {
/**
* Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
* The mode is stored as an IntParam so it can also be modified by
* ReadConfigFile or SetVariable("tessedit_pageseg_mode").
* ReadConfigFile() or SetVariable("tessedit_pageseg_mode").
*/
void SetPageSegMode(PageSegMode mode);

Expand Down Expand Up @@ -568,7 +599,7 @@ class TESS_API TessBaseAPI {

/**
* Stores lstmf based on in-memory data for one line with pix and text
*/
*/
bool WriteLSTMFLineData(const char *name, const char *path, Pix *pix, const char *truth_text, bool vertical);

/**
Expand All @@ -577,15 +608,8 @@ class TESS_API TessBaseAPI {
* May be called any time after SetImage, or after TesseractRect.
*/
Pix *GetThresholdedImage();

/**
* Saves a .png image of the type specified by `type` to the file `filename`.
*
* Type 0 is the original image, type 1 is the greyscale (derivative) image
* and type 2 is the binary (thresholded) derivative image.
*/
void WriteImage(const int type);

Image GetThresholdedImageClone();

/**
* Return average gradient of lines on page.
*/
Expand Down Expand Up @@ -698,7 +722,7 @@ class TESS_API TessBaseAPI {
* Optional. The Get*Text functions below will call Recognize if needed.
* After Recognize, the output is kept internally until the next SetImage.
*/
int Recognize(ETEXT_DESC *monitor);
int Recognize();

/**
* Methods to retrieve information after SetAndThresholdImage(),
Expand All @@ -713,13 +737,6 @@ class TESS_API TessBaseAPI {
* filename can point to a single image, a multi-page TIFF,
* or a plain text list of image filenames.
*
* retry_config is useful for debugging. If not nullptr, you can fall
* back to an alternate configuration if a page fails for some
* reason.
*
* timeout_millisec terminates processing if any single page
* takes too long. Set to 0 for unlimited time.
*
* renderer is responsible for creating the output. For example,
* use the TessTextRenderer if you want plaintext output, or
* the TessPDFRender to produce searchable PDF.
Expand All @@ -729,13 +746,13 @@ class TESS_API TessBaseAPI {
*
* Returns true if successful, false on error.
*/
bool ProcessPages(const char *filename, const char *retry_config,
int timeout_millisec, TessResultRenderer *renderer);
bool ProcessPages(const char *filename,
TessResultRenderer *renderer);

protected:
// Does the real work of ProcessPages.
bool ProcessPagesInternal(const char *filename, const char *retry_config,
int timeout_millisec, TessResultRenderer *renderer);
bool ProcessPagesInternal(const char *filename,
TessResultRenderer *renderer);

public:
/**
Expand All @@ -748,7 +765,6 @@ class TESS_API TessBaseAPI {
* See ProcessPages for descriptions of other parameters.
*/
bool ProcessPage(Pix *pix, const char *filename,
const char *retry_config, int timeout_millisec,
TessResultRenderer *renderer);

/**
Expand Down Expand Up @@ -804,30 +820,13 @@ class TESS_API TessBaseAPI {
* data structures.
* page_number is 0-based but will appear in the output as 1-based.
* monitor can be used to
* cancel the recognition
* receive progress callbacks
*
* Returned string must be freed with the delete [] operator.
*/
char *GetHOCRText(ETEXT_DESC *monitor, int page_number);

/**
* Make a HTML-formatted string with hOCR markup from the internal
* data structures.
* page_number is 0-based but will appear in the output as 1-based.
* - cancel the recognition
* - receive progress callbacks
*
* Returned string must be freed with the delete [] operator.
*/
char *GetHOCRText(int page_number);

/**
* Make an XML-formatted string with Alto markup from the internal
* data structures.
*
* Returned string must be freed with the delete [] operator.
*/
char *GetAltoText(ETEXT_DESC *monitor, int page_number);

/**
* Make an XML-formatted string with Alto markup from the internal
* data structures.
Expand All @@ -842,14 +841,6 @@ class TESS_API TessBaseAPI {
*
* Returned string must be freed with the delete [] operator.
*/
char *GetPAGEText(ETEXT_DESC *monitor, int page_number);

/**
* Make an XML-formatted string with PAGE markup from the internal
* data structures.
*
* Returned string must be freed with the delete [] operator.
*/
char *GetPAGEText(int page_number);

/**
Expand Down Expand Up @@ -1060,9 +1051,6 @@ class TESS_API TessBaseAPI {

void set_min_orientation_margin(double margin);

void SetupDebugAllPreset();
void SetupDefaultPreset();

void ReportDebugInfo();

/* @} */
Expand Down Expand Up @@ -1111,18 +1099,20 @@ class TESS_API TessBaseAPI {
}

protected:
mutable Tesseract *tesseract_; ///< The underlying data object.
Tesseract *tesseract_; ///< The underlying data object.
#if !DISABLED_LEGACY_ENGINE
Tesseract *osd_tesseract_; ///< For orientation & script detection.
EquationDetect *equ_detect_; ///< The equation detector.
#endif
ETEXT_DESC *monitor_ = nullptr;
ETEXT_DESC default_minimal_monitor_;
FileReader reader_; ///< Reads files from any filesystem.
ImageThresholder *thresholder_; ///< Image thresholding module.
std::vector<ParagraphModel *> *paragraph_models_;
BLOCK_LIST *block_list_; ///< The page layout.
PAGE_RES *page_res_; ///< The page-level data.
std::string visible_image_file_;
Pix* pix_visible_image_; ///< Image used in output PDF
Image pix_visible_image_; ///< Image used in output PDF
std::string output_file_; ///< Name used by debug code.
std::string datapath_; ///< Current location of tessdata.
std::string language_; ///< Last initialized language.
Expand All @@ -1148,15 +1138,13 @@ class TESS_API TessBaseAPI {
// If global parameter `tessedit_page_number` is non-negative, will only process that
// single page. Works for multi-page tiff file, or filelist.
bool ProcessPagesFileList(FILE *fp, std::string *buf,
const char *retry_config, int timeout_millisec,
TessResultRenderer *renderer);
// TIFF supports multipage so gets special consideration.
//
// If global parameter `tessedit_page_number` is non-negative, will only process that
// single page. Works for multi-page tiff file, or filelist.
bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size,
const char *filename, const char *retry_config,
int timeout_millisec,
const char *filename,
TessResultRenderer *renderer);
}; // class TessBaseAPI.

Expand Down
11 changes: 3 additions & 8 deletions include/tesseract/capi.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,9 +148,8 @@ typedef enum TessTextlineOrder {
typedef struct ETEXT_DESC ETEXT_DESC;
#endif

typedef bool (*TessCancelFunc)(void *cancel_this, int words);
typedef bool (*TessProgressFunc)(ETEXT_DESC *ths, int left, int right, int top,
int bottom);
typedef bool (*TessCancelFunc)(ETEXT_DESC *cancel_this, int word_count);
typedef void (*TessProgressFunc)(ETEXT_DESC *ths, int left, int right, int top, int bottom);

struct Pix;
struct Boxa;
Expand Down Expand Up @@ -310,16 +309,12 @@ TESS_API int TessBaseAPIGetThresholdedImageScaleFactor(

TESS_API TessPageIterator *TessBaseAPIAnalyseLayout(TessBaseAPI *handle);

TESS_API int TessBaseAPIRecognize(TessBaseAPI *handle, ETEXT_DESC *monitor);
TESS_API int TessBaseAPIRecognize(TessBaseAPI *handle);

TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI *handle, const char *filename,
const char *retry_config,
int timeout_millisec,
TessResultRenderer *renderer);
TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI *handle, struct Pix *pix,
int page_index, const char *filename,
const char *retry_config,
int timeout_millisec,
TessResultRenderer *renderer);

TESS_API TessResultIterator *TessBaseAPIGetIterator(TessBaseAPI *handle);
Expand Down
39 changes: 31 additions & 8 deletions src/ccstruct/image.h → include/tesseract/image.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,24 +27,47 @@ class TESS_API Image {
Pix *pix_ = nullptr;

public:
Image() = default;
Image(Pix *pix) : pix_(pix) {}
Image(Pix *pix); // mark this one as 'explicit' to see all the places where the code transitions between old/leptonica/C and new/C++/Image class styles.
Image(bool take_ownership, Pix *pix);

// https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines#c21-if-you-define-or-delete-any-copy-move-or-destructor-function-define-or-delete-them-all
// https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines#note-106

~Image();
Image();
Image(const Image &src);
Image &operator=(const Image &src);
Image(Image &&src) noexcept;
Image &operator=(Image &&src) noexcept;

// service
bool operator==(decltype(nullptr)) const { return pix_ == nullptr; }
bool operator!=(decltype(nullptr)) const { return pix_ != nullptr; }
explicit operator bool() const { return pix_ != nullptr; }
operator Pix *() const { return pix_; }
explicit operator Pix **() { return &pix_; }
Pix *operator->() const { return pix_; }
Image& operator =(Pix* pix);
operator Pix *() noexcept;
operator const Pix *() const noexcept;
explicit operator Pix **() noexcept;
const Pix *operator->() const noexcept;
Pix *operator->() noexcept;
Image &operator=(Pix *pix);
Image &operator=(Pix **pix); // move semantics, C style
Image &operator=(decltype(nullptr));

// api
Image clone() const; // increases refcount
Image copy() const; // does full copy
Pix *clone2pix() const; // increases refcount
Image cccclone() const; // increases refcount
Image copy() const; // does full copy
void destroy();
bool isZero() const;
void replace(Pix* pix);
void replace(Pix *&pix);
Pix *ptr() noexcept;
const Pix *ptr() const noexcept;

// equivalent of `operator Pix**` i.e. hard cast to `Pix **` type.
Pix **obtains() noexcept;
// relinquish control of the PIX: pass it on to some-one else. Move semantics simile for C code style.
Pix *relinquish() noexcept;

// ops
Image operator|(Image) const;
Expand Down
Loading

0 comments on commit f286356

Please sign in to comment.