1 commit cfc05af26b571e9ca09e9c709c0fb8934e9e46dd
2 Author: Guillaume Girol <symphorien+git@xlumurb.eu>
3 Date: Sat Aug 20 17:48:01 2022 +0200
7 diff --git a/src/pyocr/libtesseract/tesseract_raw.py b/src/pyocr/libtesseract/tesseract_raw.py
8 index 1edec8c..434a336 100644
9 --- a/src/pyocr/libtesseract/tesseract_raw.py
10 +++ b/src/pyocr/libtesseract/tesseract_raw.py
11 @@ -2,7 +2,6 @@ import ctypes
17 from ..error import TesseractError
19 @@ -10,51 +9,16 @@ from ..error import TesseractError
20 logger = logging.getLogger(__name__)
22 TESSDATA_PREFIX = os.getenv('TESSDATA_PREFIX', None)
24 +if TESSDATA_PREFIX is None:
25 + TESSDATA_PREFIX = '@tesseract@/share/tessdata'
26 + os.environ['TESSDATA_PREFIX'] = TESSDATA_PREFIX
29 # 70 is the minimum credible dpi for tesseract and force it to compute an
30 # estimate of the image dpi
34 -if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
35 - # Pyinstaller integration
36 - libnames += [os.path.join(sys._MEIPASS, "libtesseract-4.dll")]
37 - libnames += [os.path.join(sys._MEIPASS, "libtesseract-3.dll")]
38 - tessdata = os.path.join(sys._MEIPASS, "data")
39 - if not os.path.exists(os.path.join(tessdata, "tessdata")):
41 - "Running from container, but no tessdata ({}) found !".format(
46 - TESSDATA_PREFIX = os.path.join(tessdata, "tessdata")
49 -if sys.platform[:3] == "win": # pragma: no cover
51 - # Jflesch> Don't they have the equivalent of LD_LIBRARY_PATH on
53 - "../vs2010/DLL_Release/libtesseract302.dll",
54 - # prefer the most recent first
55 - "libtesseract305.dll",
56 - "libtesseract304.dll",
57 - "libtesseract303.dll",
58 - "libtesseract302.dll",
59 - "libtesseract400.dll", # Tesseract 4 is still in alpha stage
61 - "C:\\Program Files (x86)\\Tesseract-OCR\\libtesseract-4.dll",
62 - "C:\\Program Files (x86)\\Tesseract-OCR\\libtesseract-3.dll",
66 - "libtesseract.so.5",
67 - "libtesseract.so.4",
68 - "libtesseract.so.3",
69 - "libtesseract.5.dylib",
70 - "libtesseract.4.dylib",
73 +libnames = [ "@tesseractLibraryLocation@" ]
77 @@ -367,12 +331,12 @@ def init(lang=None):
80 lang = lang.encode("utf-8")
82 - if TESSDATA_PREFIX: # pragma: no cover
83 - prefix = TESSDATA_PREFIX.encode("utf-8")
85 + prefix = TESSDATA_PREFIX
87 g_libtesseract.TessBaseAPIInit3(
88 ctypes.c_void_p(handle),
89 - ctypes.c_char_p(prefix),
90 + ctypes.c_char_p(prefix.encode('utf-8')),
93 g_libtesseract.TessBaseAPISetVariable(
94 diff --git a/src/pyocr/tesseract.py b/src/pyocr/tesseract.py
95 index 0fe0d20..c1fdd27 100644
96 --- a/src/pyocr/tesseract.py
97 +++ b/src/pyocr/tesseract.py
98 @@ -28,8 +28,7 @@ from .builders import DigitBuilder # backward compatibility
99 from .error import TesseractError # backward compatibility
100 from .util import digits_only
102 -# CHANGE THIS IF TESSERACT IS NOT IN YOUR PATH, OR IS NAMED DIFFERENTLY
103 -TESSERACT_CMD = 'tesseract.exe' if os.name == 'nt' else 'tesseract'
104 +TESSERACT_CMD = '@tesseract@/bin/tesseract'
106 TESSDATA_EXTENSION = ".traineddata"
108 diff --git a/tests/test_libtesseract.py b/tests/test_libtesseract.py
109 index cc31a50..890c02c 100644
110 --- a/tests/test_libtesseract.py
111 +++ b/tests/test_libtesseract.py
112 @@ -167,7 +167,8 @@ class TestLibTesseractRaw(BaseTest):
113 args = libtess.TessBaseAPIInit3.call_args[0]
114 self.assertEqual(len(args), 3)
115 self.assertEqual(args[0].value, self.handle)
116 - self.assertEqual(args[1].value, None)
117 + # we hardcode tesseract data, so we don't get None
118 + #self.assertEqual(args[1].value, None)
119 self.assertEqual(args[2].value, lang.encode() if lang else None)
122 @@ -203,7 +204,8 @@ class TestLibTesseractRaw(BaseTest):
123 args = libtess.TessBaseAPIInit3.call_args[0]
124 self.assertEqual(len(args), 3)
125 self.assertEqual(args[0].value, self.handle)
126 - self.assertEqual(args[1].value, None)
127 + # we hardcode tesseract data, so we don't get None
128 + #self.assertEqual(args[1].value, None)
129 self.assertEqual(args[2].value, lang.encode() if lang else None)
132 diff --git a/tests/test_tesseract.py b/tests/test_tesseract.py
133 index 823818f..2ee5fb4 100644
134 --- a/tests/test_tesseract.py
135 +++ b/tests/test_tesseract.py
136 @@ -37,7 +37,7 @@ class TestTesseract(BaseTest):
137 def test_available(self, which):
138 which.return_value = True
139 self.assertTrue(tesseract.is_available())
140 - which.assert_called_once_with("tesseract")
141 + which.assert_called_once_with("@tesseract@/bin/tesseract")
143 @patch("subprocess.Popen")
144 def test_version_error(self, popen):
145 @@ -163,7 +163,7 @@ class TestTesseract(BaseTest):
146 for lang in ("eng", "fra", "jpn", "osd"):
147 self.assertIn(lang, langs)
148 popen.assert_called_once_with(
149 - ["tesseract", "--list-langs"],
150 + ["@tesseract@/bin/tesseract", "--list-langs"],
151 startupinfo=None, creationflags=0,
152 stdout=subprocess.PIPE, stderr=subprocess.STDOUT
154 @@ -178,7 +178,7 @@ class TestTesseract(BaseTest):
155 self.assertEqual(te.exception.status, 1)
156 self.assertEqual("unable to get languages", te.exception.message)
157 popen.assert_called_once_with(
158 - ["tesseract", "--list-langs"],
159 + ["@tesseract@/bin/tesseract", "--list-langs"],
160 startupinfo=None, creationflags=0,
161 stdout=subprocess.PIPE, stderr=subprocess.STDOUT
163 @@ -255,7 +255,7 @@ class TestTesseract(BaseTest):
164 self.assertEqual(status, 0)
165 self.assertEqual(error, message)
166 popen.assert_called_once_with(
167 - ["tesseract", "input.bmp", "output"],
168 + ["@tesseract@/bin/tesseract", "input.bmp", "output"],
172 @@ -278,7 +278,7 @@ class TestTesseract(BaseTest):
173 self.assertEqual(status, 0)
174 self.assertEqual(error, message)
175 popen.assert_called_with(
176 - ["tesseract", "input2.bmp", "output2", "-l", "fra", "--psm", "3"],
177 + ["@tesseract@/bin/tesseract", "input2.bmp", "output2", "-l", "fra", "--psm", "3"],
181 @@ -309,7 +309,7 @@ class TestTesseract(BaseTest):
182 self.assertEqual(result["angle"], 90)
183 self.assertEqual(result["confidence"], 9.30)
184 popen.assert_called_once_with(
185 - ["tesseract", "input.bmp", "stdout", "--psm", "0"],
186 + ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "--psm", "0"],
187 stdin=subprocess.PIPE,
190 @@ -345,7 +345,7 @@ class TestTesseract(BaseTest):
191 self.assertEqual(result["angle"], 90)
192 self.assertEqual(result["confidence"], 9.30)
193 popen.assert_called_once_with(
194 - ["tesseract", "input.bmp", "stdout", "--psm", "0"],
195 + ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "--psm", "0"],
196 stdin=subprocess.PIPE,
199 @@ -378,7 +378,7 @@ class TestTesseract(BaseTest):
200 self.assertEqual(result["angle"], 90)
201 self.assertEqual(result["confidence"], 9.30)
202 popen.assert_called_once_with(
203 - ["tesseract", "input.bmp", "stdout",
204 + ["@tesseract@/bin/tesseract", "input.bmp", "stdout",
205 "--psm", "0", "-l", "osd"],
206 stdin=subprocess.PIPE,
208 @@ -406,7 +406,7 @@ class TestTesseract(BaseTest):
209 with self.assertRaises(tesseract.TesseractError) as te:
210 tesseract.detect_orientation(self.image)
211 popen.assert_called_once_with(
212 - ["tesseract", "input.bmp", "stdout", "--psm", "0"],
213 + ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "--psm", "0"],
214 stdin=subprocess.PIPE,
217 @@ -440,7 +440,7 @@ class TestTesseract(BaseTest):
218 with self.assertRaises(tesseract.TesseractError) as te:
219 tesseract.detect_orientation(self.image)
220 popen.assert_called_once_with(
221 - ["tesseract", "input.bmp", "stdout", "--psm", "0"],
222 + ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "--psm", "0"],
223 stdin=subprocess.PIPE,
226 @@ -474,7 +474,7 @@ class TestTesseract(BaseTest):
227 self.assertEqual(result["angle"], 90)
228 self.assertEqual(result["confidence"], 9.30)
229 popen.assert_called_once_with(
230 - ["tesseract", "input.bmp", "stdout", "-psm", "0"],
231 + ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "-psm", "0"],
232 stdin=subprocess.PIPE,
235 @@ -507,7 +507,7 @@ class TestTesseract(BaseTest):
236 self.assertEqual(result["angle"], 90)
237 self.assertEqual(result["confidence"], 9.30)
238 popen.assert_called_once_with(
239 - ["tesseract", "input.bmp", "stdout", "-psm", "0", "-l", "fra"],
240 + ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "-psm", "0", "-l", "fra"],
241 stdin=subprocess.PIPE,
244 @@ -534,7 +534,7 @@ class TestTesseract(BaseTest):
245 with self.assertRaises(tesseract.TesseractError) as te:
246 tesseract.detect_orientation(self.image)
247 popen.assert_called_once_with(
248 - ["tesseract", "input.bmp", "stdout", "-psm", "0"],
249 + ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "-psm", "0"],
250 stdin=subprocess.PIPE,
253 @@ -568,7 +568,7 @@ class TestTesseract(BaseTest):
254 with self.assertRaises(tesseract.TesseractError) as te:
255 tesseract.detect_orientation(self.image)
256 popen.assert_called_once_with(
257 - ["tesseract", "input.bmp", "stdout", "-psm", "0"],
258 + ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "-psm", "0"],
259 stdin=subprocess.PIPE,