Skip to content
Open
1 change: 0 additions & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,6 @@ libtesseract_la_SOURCES += src/ccstruct/blobs.cpp
libtesseract_la_SOURCES += src/ccstruct/blread.cpp
libtesseract_la_SOURCES += src/ccstruct/boxread.cpp
libtesseract_la_SOURCES += src/ccstruct/boxword.cpp
libtesseract_la_SOURCES += src/ccstruct/ccstruct.cpp
libtesseract_la_SOURCES += src/ccstruct/coutln.cpp
libtesseract_la_SOURCES += src/ccstruct/detlinefit.cpp
libtesseract_la_SOURCES += src/ccstruct/dppoint.cpp
Expand Down
1 change: 0 additions & 1 deletion cmake/SourceLists.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ set(TESSERACT_SRC_CCSTRUCT
src/ccstruct/blread.cpp
src/ccstruct/boxread.cpp
src/ccstruct/boxword.cpp
src/ccstruct/ccstruct.cpp
src/ccstruct/coutln.cpp
src/ccstruct/detlinefit.cpp
src/ccstruct/dppoint.cpp
Expand Down
26 changes: 15 additions & 11 deletions src/api/baseapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
#include <memory> // for std::unique_ptr
#include <set> // for std::pair
#include <sstream> // for std::stringstream
#include <string_view>
#include <vector> // for std::vector

#include <allheaders.h> // for pixDestroy, boxCreate, boxaAddBox, box...
Expand Down Expand Up @@ -125,18 +126,21 @@ static STRING_VAR(classify_font_name, kUnknownFontName,
// /path/to/dir/[lang].[fontname].exp[num]
// The [lang], [fontname] and [num] fields should not have '.' characters.
// If the global parameter classify_font_name is set, its value is used instead.
static void ExtractFontName(const char* filename, std::string* fontname) {
static void ExtractFontName(std::string_view filename, std::string* fontname) {
*fontname = classify_font_name;
if (*fontname == kUnknownFontName) {
// filename is expected to be of the form [lang].[fontname].exp[num]
// The [lang], [fontname] and [num] fields should not have '.' characters.
const char *basename = strrchr(filename, '/');
const char *firstdot = strchr(basename ? basename : filename, '.');
const char *lastdot = strrchr(filename, '.');
if (firstdot != lastdot && firstdot != nullptr && lastdot != nullptr) {
auto basename_pos = filename.find_last_of('/');
auto view = (basename_pos != std::string_view::npos)
? filename.substr(basename_pos + 1)
: filename;
auto firstdot = view.find_first_of('.');
auto lastdot = view.find_last_of('.');
if (firstdot != lastdot && firstdot != std::string_view::npos &&
lastdot != std::string_view::npos) {
++firstdot;
*fontname = firstdot;
fontname->resize(lastdot - firstdot);
*fontname = view.substr(firstdot, lastdot - firstdot);
}
}
}
Expand Down Expand Up @@ -225,7 +229,7 @@ bool TessBaseAPI::GetIntVariable(const char *name, int *value) const {
if (p == nullptr) {
return false;
}
*value = (int32_t)(*p);
*value = static_cast<int32_t>(*p);
return true;
}

Expand All @@ -251,7 +255,7 @@ bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const {
if (p == nullptr) {
return false;
}
*value = (double)(*p);
*value = static_cast<double>(*p);
return true;
}

Expand Down Expand Up @@ -527,7 +531,7 @@ void TessBaseAPI::SetImage(Pix *pix) {
// remove alpha channel from png
Pix *p1 = pixRemoveAlpha(pix);
pixSetSpp(p1, 3);
(void)pixCopy(pix, p1);
static_cast<void>(pixCopy(pix, p1));
pixDestroy(&p1);
}
thresholder_->SetImage(pix);
Expand Down Expand Up @@ -813,7 +817,7 @@ int TessBaseAPI::Recognize(ETEXT_DESC *monitor) {
#ifndef DISABLED_LEGACY_ENGINE
} else if (tesseract_->tessedit_train_from_boxes) {
std::string fontname;
ExtractFontName(output_file_.c_str(), &fontname);
ExtractFontName(output_file_, &fontname);
tesseract_->ApplyBoxTraining(fontname, page_res_);
} else if (tesseract_->tessedit_ambigs_training) {
FILE *training_output_file = tesseract_->init_recog_training(input_file_.c_str());
Expand Down
2 changes: 1 addition & 1 deletion src/ccmain/osdetect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ int OSResults::get_best_script(int orientation_id) const {
}

// Print the script scores for all possible orientations.
void OSResults::print_scores(void) const {
void OSResults::print_scores() const {
for (int i = 0; i < 4; ++i) {
tprintf("Orientation id #%d", i);
print_scores(i);
Expand Down
10 changes: 5 additions & 5 deletions src/ccmain/pgedit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -778,10 +778,10 @@ bool Tesseract::word_display(PAGE_RES_IT *pr_it) {
// display bounding box
if (word->display_flag(DF_BOX)) {
word->bounding_box().plot(image_win,
static_cast<ScrollView::Color>((int32_t)editor_image_word_bb_color),
static_cast<ScrollView::Color>((int32_t)editor_image_word_bb_color));
static_cast<ScrollView::Color>(static_cast<int32_t>(editor_image_word_bb_color)),
static_cast<ScrollView::Color>(static_cast<int32_t>(editor_image_word_bb_color)));

auto c = static_cast<ScrollView::Color>((int32_t)editor_image_blob_bb_color);
auto c = static_cast<ScrollView::Color>(static_cast<int32_t>(editor_image_blob_bb_color));
image_win->Pen(c);
// cblob iterator
C_BLOB_IT c_it(word->cblob_list());
Expand Down Expand Up @@ -859,8 +859,8 @@ bool Tesseract::word_display(PAGE_RES_IT *pr_it) {

if (!displayed_something) { // display BBox anyway
word->bounding_box().plot(image_win,
static_cast<ScrollView::Color>((int32_t)editor_image_word_bb_color),
static_cast<ScrollView::Color>((int32_t)editor_image_word_bb_color));
static_cast<ScrollView::Color>(static_cast<int32_t>(editor_image_word_bb_color)),
static_cast<ScrollView::Color>(static_cast<int32_t>(editor_image_word_bb_color)));
}
return true;
}
Expand Down
4 changes: 2 additions & 2 deletions src/ccmain/resultiterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ ResultIterator::ResultIterator(const LTRResultIterator &resit) : LTRResultIterat
auto *p = ParamUtils::FindParam<BoolParam>(
"preserve_interword_spaces", GlobalParams()->bool_params, tesseract_->params()->bool_params);
if (p != nullptr) {
preserve_interword_spaces_ = (bool)(*p);
preserve_interword_spaces_ = static_cast<bool>(*p);
}

current_paragraph_is_ltr_ = CurrentParagraphIsLtr();
Expand Down Expand Up @@ -781,7 +781,7 @@ bool ResultIterator::BidiDebug(int min_level) const {
auto *p = ParamUtils::FindParam<IntParam>("bidi_debug", GlobalParams()->int_params,
tesseract_->params()->int_params);
if (p != nullptr) {
debug_level = (int32_t)(*p);
debug_level = static_cast<int32_t>(*p);
}
return debug_level >= min_level;
}
Expand Down
8 changes: 4 additions & 4 deletions src/ccmain/thresholder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,8 @@ std::tuple<bool, Image, Image, Image> ImageThresholder::Threshold(
}

r = pixSauvolaBinarizeTiled(pix_grey, half_window_size, kfactor, nx, ny,
(PIX**)pix_thresholds,
(PIX**)pix_binary);
static_cast<PIX **>(pix_thresholds),
static_cast<PIX **>(pix_binary));
} else { // if (method == ThresholdMethod::LeptonicaOtsu)
int tile_size;
double tile_size_factor;
Expand All @@ -269,8 +269,8 @@ std::tuple<bool, Image, Image, Image> ImageThresholder::Threshold(
r = pixOtsuAdaptiveThreshold(pix_grey, tile_size, tile_size,
half_smooth_size, half_smooth_size,
score_fraction,
(PIX**)pix_thresholds,
(PIX**)pix_binary);
static_cast<PIX **>(pix_thresholds),
static_cast<PIX **>(pix_binary));
}

bool ok = (r == 0);
Expand Down
5 changes: 3 additions & 2 deletions src/ccstruct/blobbox.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "tprintf.h" // for tprintf
#include "werd.h" // for WERD_LIST

#include <array>
#include <cinttypes> // for PRId32
#include <cmath> // for std::sqrt
#include <cstdint> // for int16_t, int32_t
Expand Down Expand Up @@ -537,11 +538,11 @@ class BLOBNBOX : public ELIST<BLOBNBOX>::LINK {
int32_t line_crossings_; // Number of line intersections touched.
BLOBNBOX *base_char_blob_; // The blob that was the base char.
tesseract::ColPartition *owner_; // Who will delete me when I am not needed
BLOBNBOX *neighbours_[BND_COUNT];
std::array<BLOBNBOX *, BND_COUNT> neighbours_;
float horz_stroke_width_ = 0.0f; // Median horizontal stroke width
float vert_stroke_width_ = 0.0f; // Median vertical stroke width
float area_stroke_width_ = 0.0f; // Stroke width from area/perimeter ratio.
bool good_stroke_neighbours_[BND_COUNT];
std::array<bool, BND_COUNT> good_stroke_neighbours_;
bool horz_possible_; // Could be part of horizontal flow.
bool vert_possible_; // Could be part of vertical flow.
bool leader_on_left_; // There is a leader to the left.
Expand Down
31 changes: 0 additions & 31 deletions src/ccstruct/ccstruct.cpp

This file was deleted.

8 changes: 4 additions & 4 deletions src/ccstruct/ccstruct.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ class CCStruct : public CCUtil {
// Globally accessible constants.
// APPROXIMATIONS of the fractions of the character cell taken by
// the descenders, ascenders, and x-height.
static const double kDescenderFraction; // = 0.25;
static const double kXHeightFraction; // = 0.5;
static const double kAscenderFraction; // = 0.25;
static constexpr double kDescenderFraction = 0.25;
static constexpr double kXHeightFraction = 0.5;
static constexpr double kAscenderFraction = 0.25;
// Derived value giving the x-height as a fraction of cap-height.
static const double kXHeightCapRatio; // = XHeight/(XHeight + Ascender).
static constexpr double kXHeightCapRatio = kXHeightFraction / (kXHeightFraction + kAscenderFraction);
};

} // namespace tesseract
Expand Down
2 changes: 1 addition & 1 deletion src/ccstruct/coutln.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@

namespace tesseract {

ICOORD C_OUTLINE::step_coords[4] = {ICOORD(-1, 0), ICOORD(0, -1), ICOORD(1, 0), ICOORD(0, 1)};
std::array<ICOORD, 4> C_OUTLINE::step_coords = {ICOORD(-1, 0), ICOORD(0, -1), ICOORD(1, 0), ICOORD(0, 1)};

/**
* @name C_OUTLINE::C_OUTLINE
Expand Down
5 changes: 3 additions & 2 deletions src/ccstruct/coutln.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@

#include <tesseract/export.h> // for DLLSYM

#include <cstdint> // for int16_t, int32_t
#include <array>
#include <bitset> // for std::bitset<16>
#include <cstdint> // for int16_t, int32_t

struct Pix;

Expand Down Expand Up @@ -289,7 +290,7 @@ class C_OUTLINE : public ELIST<C_OUTLINE>::LINK {
std::vector<uint8_t> steps; // step array
EdgeOffset *offsets; // Higher precision edge.
C_OUTLINE_LIST children; // child elements
static ICOORD step_coords[4];
static std::array<ICOORD, 4> step_coords;
};

} // namespace tesseract
Expand Down
2 changes: 1 addition & 1 deletion src/ccstruct/debugpixa.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class DebugPixa {
DebugPixa() {
pixa_ = pixaCreate(0);
#ifdef TESSERACT_DISABLE_DEBUG_FONTS
fonts_ = NULL;
fonts_ = nullptr;
#else
fonts_ = bmfCreate(nullptr, 14);
#endif
Expand Down
6 changes: 3 additions & 3 deletions src/ccstruct/normalis.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ struct Pix;

namespace tesseract {

const int kBlnCellHeight = 256; // Full-height for baseline normalization.
const int kBlnXHeight = 128; // x-height for baseline normalization.
const int kBlnBaselineOffset = 64; // offset for baseline normalization.
constexpr int kBlnCellHeight = 256; // Full-height for baseline normalization.
constexpr int kBlnXHeight = 128; // x-height for baseline normalization.
constexpr int kBlnBaselineOffset = 64; // offset for baseline normalization.

class BLOCK;
class FCOORD;
Expand Down
23 changes: 14 additions & 9 deletions src/ccstruct/pageres.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include <cassert> // for assert
#include <cstdint> // for INT32_MAX
#include <cstring> // for strlen
#include <string_view> // for std::string_view

struct Pix;

Expand Down Expand Up @@ -1021,23 +1022,27 @@ void WERD_RES::MergeAdjacentBlobs(unsigned index) {
// Utility function for fix_quotes
// Return true if the next character in the string (given the UTF8 length in
// bytes) is a quote character.
static int is_simple_quote(const char *signed_str, int length) {
const auto *str = reinterpret_cast<const unsigned char *>(signed_str);
static int is_simple_quote(std::string_view str) {
// Standard 1 byte quotes.
return (length == 1 && (*str == '\'' || *str == '`')) ||
// UTF-8 3 bytes curved quotes.
(length == 3 &&
((*str == 0xe2 && *(str + 1) == 0x80 && *(str + 2) == 0x98) ||
(*str == 0xe2 && *(str + 1) == 0x80 && *(str + 2) == 0x99)));
if (str.size() == 1 && (str[0] == '\'' || str[0] == '`')) {
return true;
}
// UTF-8 3 bytes curved quotes.
if (str.size() == 3 && static_cast<unsigned char>(str[0]) == 0xe2 &&
static_cast<unsigned char>(str[1]) == 0x80 &&
(static_cast<unsigned char>(str[2]) == 0x98 ||
static_cast<unsigned char>(str[2]) == 0x99)) {
return true;
}
return false;
}

// Callback helper for fix_quotes returns a double quote if both
// arguments are quote, otherwise INVALID_UNICHAR_ID.
UNICHAR_ID WERD_RES::BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2) {
const char *ch = uch_set->id_to_unichar(id1);
const char *next_ch = uch_set->id_to_unichar(id2);
if (is_simple_quote(ch, strlen(ch)) &&
is_simple_quote(next_ch, strlen(next_ch))) {
if (is_simple_quote(ch) && is_simple_quote(next_ch)) {
return uch_set->unichar_to_id("\"");
}
return INVALID_UNICHAR_ID;
Expand Down
Loading
Loading