Skip to content

Commit

Permalink
part 2 of: refactor/remove the tesseract->lstmrecognizer->ccutil inte…
Browse files Browse the repository at this point in the history
…rnal chain for unicode char array access and parameter settings.
  • Loading branch information
GerHobbelt committed Aug 3, 2024
1 parent 10d2966 commit 3ccbf01
Show file tree
Hide file tree
Showing 8 changed files with 27 additions and 25 deletions.
18 changes: 8 additions & 10 deletions src/training/classifier_tester.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@ static const char *names[] = {"pruner", "full"};

FZ_HEAPDBG_TRACKER_SECTION_END_MARKER(_)

static tesseract::ShapeClassifier *InitializeClassifier(const char *classifer_name,
static tesseract::ShapeClassifier *InitializeClassifier(tesseract::TessBaseAPI &api, const char *classifer_name,
const UNICHARSET &unicharset, int argc,
const char **argv, tesseract::TessBaseAPI **api) {
const char **argv) {
// Decode the classifier string.
ClassifierName classifier = CN_COUNT;
for (int c = 0; c < CN_COUNT; ++c) {
Expand All @@ -64,17 +64,15 @@ static tesseract::ShapeClassifier *InitializeClassifier(const char *classifer_na
}

// We need to initialize tesseract to test.
*api = new tesseract::TessBaseAPI;
tesseract::OcrEngineMode engine_mode = tesseract::OEM_TESSERACT_ONLY;
tesseract::Tesseract *tesseract = nullptr;
tesseract::Classify *classify = nullptr;
if (classifier == CN_PRUNER || classifier == CN_FULL) {
tesseract::TessBaseAPI *tess = *api;
if (tess->InitOem(test_tessdata_dir.c_str(), test_lang.c_str(), engine_mode) < 0) {
if (api.InitOem(test_tessdata_dir.c_str(), test_lang.c_str(), engine_mode) < 0) {
tprintError("Tesseract initialization failed!\n");
return nullptr;
}
tesseract = &tess->tesseract();
tesseract = &api.tesseract();
classify = static_cast<tesseract::Classify *>(tesseract);
if (classify->shape_table() == nullptr) {
tprintError("Tesseract must contain a ShapeTable!\n");
Expand Down Expand Up @@ -120,16 +118,17 @@ extern "C" TESS_API int tesseract_classifier_tester_main(int argc, const char**
tesseract::CheckSharedLibraryVersion();
(void)tesseract::SetConsoleModeToUTF8();

int rv = ParseArguments(&argc, &argv);
tesseract::TessBaseAPI api;

int rv = ParseArguments(api, &argc, &argv);
if (rv >= 0) {
return rv;
}
std::string file_prefix;
auto trainer = tesseract::LoadTrainingData(argv + 1, false, nullptr, file_prefix);
tesseract::TessBaseAPI *api;
// Decode the classifier string.
tesseract::ShapeClassifier *shape_classifier =
InitializeClassifier(test_classifier.c_str(), trainer->unicharset(), argc, argv, &api);
InitializeClassifier(api, test_classifier.c_str(), trainer->unicharset(), argc, argv);
if (shape_classifier == nullptr) {
tprintError("Classifier init failed!:{}\n", test_classifier.c_str());
return EXIT_FAILURE;
Expand All @@ -144,7 +143,6 @@ extern "C" TESS_API int tesseract_classifier_tester_main(int argc, const char**
test_report_level, false,
shape_classifier, nullptr);
delete shape_classifier;
delete api;

return EXIT_SUCCESS;
} /* main */
Expand Down
4 changes: 3 additions & 1 deletion src/training/cntraining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ extern "C" TESS_API int tesseract_cn_training_main(int argc, const char** argv)
tesseract::CheckSharedLibraryVersion();
(void)tesseract::SetConsoleModeToUTF8();

tesseract::TessBaseAPI api;

// Set the global Config parameters before parsing the command line.
Config = CNConfig;

Expand All @@ -129,7 +131,7 @@ extern "C" TESS_API int tesseract_cn_training_main(int argc, const char** argv)
FEATURE_DEFS_STRUCT FeatureDefs;
InitFeatureDefs(&FeatureDefs);

rv = ParseArguments(&argc, &argv);
rv = ParseArguments(api, &argc, &argv);
if (rv >= 0) {
return rv;
}
Expand Down
11 changes: 3 additions & 8 deletions src/training/common/commontraining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ void ParseArguments(int* argc, const char ***argv) {
# include "tessdatamanager.h"
# include <tesseract/tprintf.h>
# include "unicity_table.h"
# include "tesseractclass.h"

#if defined(HAVE_MUPDF)
#include "mupdf/assertions.h" // for ASSERT
Expand All @@ -98,8 +99,6 @@ FZ_HEAPDBG_TRACKER_SECTION_START_MARKER(_)
CLUSTERCONFIG Config = {elliptical, 0.625, 0.05, 1.0, 1e-6, 0};
FEATURE_DEFS_STRUCT feature_defs;

static CCUtil *ccutil = nullptr;

INT_VAR(trainer_debug_level, 0, "Level of Trainer debugging");
INT_VAR(trainer_load_images, 0, "Load images with tr files");
STRING_VAR(trainer_configfile, "", "File to load more configs from");
Expand Down Expand Up @@ -134,10 +133,7 @@ FZ_HEAPDBG_TRACKER_SECTION_END_MARKER(_)
* @param argc number of command line arguments to parse
* @param argv command line arguments
*/
int ParseArguments(int *argc, const char ***argv) {
if (!ccutil)
ccutil = new CCUtil();

int ParseArguments(TessBaseAPI &api, int *argc, const char ***argv) {
std::string usage;
if (*argc) {
usage += (*argv)[0];
Expand All @@ -156,8 +152,7 @@ int ParseArguments(int *argc, const char ***argv) {
Config.Confidence = std::max(0.0, std::min(1.0, double(clusterconfig_confidence)));
// Set additional parameters from config file if specified.
if (!trainer_configfile.empty()) {
ASSERT0(ccutil != nullptr);
tesseract::ParamUtils::ReadParamsFile(trainer_configfile.c_str(), tesseract::SET_PARAM_CONSTRAINT_NON_INIT_ONLY, ccutil->params());
tesseract::ParamUtils::ReadParamsFile(trainer_configfile.c_str(), tesseract::SET_PARAM_CONSTRAINT_NON_INIT_ONLY, api.tesseract().params());
}
return rv;
}
Expand Down
2 changes: 1 addition & 1 deletion src/training/common/commontraining.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
namespace tesseract {

TESS_COMMON_TRAINING_API
int ParseArguments(int* argc, const char ***argv);
int ParseArguments(TessBaseAPI &api, int *argc, const char ***argv);

// Check whether the shared tesseract library is the right one.
// This function must be inline because otherwise it would be part of
Expand Down
4 changes: 3 additions & 1 deletion src/training/lstmeval.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ extern "C" int tesseract_lstm_eval_main(int argc, const char** argv)
tesseract::CheckSharedLibraryVersion();
(void)tesseract::SetConsoleModeToUTF8();

int rv = ParseArguments(&argc, &argv);
tesseract::TessBaseAPI api;

int rv = ParseArguments(api, &argc, &argv);
if (rv >= 0) {
return rv;
}
Expand Down
5 changes: 3 additions & 2 deletions src/training/lstmtraining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,9 @@ extern "C" int tesseract_lstm_training_main(int argc, const char** argv)
tesseract::CheckSharedLibraryVersion();
(void)tesseract::SetConsoleModeToUTF8();

int rv = ParseArguments(&argc, &argv);
tesseract::TessBaseAPI api;

int rv = ParseArguments(api, &argc, &argv);
if (rv >= 0) {
return rv;
}
Expand Down Expand Up @@ -139,7 +141,6 @@ extern "C" int tesseract_lstm_training_main(int argc, const char** argv)
}

// Setup the trainer.
tesseract::TessBaseAPI api;
std::string checkpoint_file = training_model_output;
checkpoint_file += "_checkpoint";
std::string checkpoint_bak = checkpoint_file + ".bak";
Expand Down
4 changes: 3 additions & 1 deletion src/training/mftraining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,9 @@ extern "C" TESS_API int tesseract_mf_training_main(int argc, const char** argv)
{
tesseract::CheckSharedLibraryVersion();

int rv = ParseArguments(&argc, &argv);
tesseract::TessBaseAPI api;

int rv = ParseArguments(api, &argc, &argv);
if (rv >= 0) {
return rv;
}
Expand Down
4 changes: 3 additions & 1 deletion src/training/shapeclustering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ extern "C" TESS_API int tesseract_shape_clustering_main(int argc, const char** a
{
tesseract::CheckSharedLibraryVersion();

ParseArguments(&argc, &argv);
tesseract::TessBaseAPI api;

ParseArguments(api, &argc, &argv);

std::string file_prefix;
auto trainer = tesseract::LoadTrainingData(argv + 1, false, nullptr, file_prefix);
Expand Down

0 comments on commit 3ccbf01

Please sign in to comment.