1 { lib, stdenv, fetchFromGitHub, autoreconfHook, autoconf-archive, pkg-config
2 , leptonica, libpng, libtiff, icu, pango, opencl-headers }:
4 stdenv.mkDerivation rec {
8 src = fetchFromGitHub {
9 owner = "tesseract-ocr";
12 hash = "sha256-sV3w53ky13ESc0dGPutMGQ4TcmOeWJkvUwBPIyzSTc8=";
15 # leptonica 1.83 made internal structures private. using internal headers isn't
16 # great, but tesseract4's days are numbered anyway
18 sed -i '/allheaders.h/a#include "pix_internal.h"' src/textord/devanagari_processing.cpp
20 # gcc-13 compat fix, simulate this upstream patch:
21 # https://github.com/tesseract-ocr/tesseract/commit/17e795aaae7d40dbcb7d3365835c2f55ecc6355d.patch
22 # https://github.com/tesseract-ocr/tesseract/commit/c0db7b7e930322826e09981360e39fdbd16cc9b0.patch
24 sed -i src/ccutil/helpers.h -e '1i #include <climits>'
25 sed -i src/ccutil/helpers.h -e '1i #include <cstdint>'
26 sed -i src/dict/matchdefs.h -e '1i #include <cstdint>'
29 enableParallelBuilding = true;
47 description = "OCR engine";
48 homepage = "https://github.com/tesseract-ocr/tesseract";
49 license = lib.licenses.asl20;
50 maintainers = with lib.maintainers; [ erikarvstedt ];
51 platforms = with lib.platforms; linux ++ darwin;
52 mainProgram = "tesseract";