summary refs log tree commit diff stats
path: root/tests/acceptance/tesseract_utils.py
diff options
context:
space:
mode:
authorPhilippe Mathieu-Daudé <f4bug@amsat.org>2020-10-21 12:35:30 +0200
committerPhilippe Mathieu-Daudé <philmd@redhat.com>2021-02-08 12:37:33 +0100
commitca8224492854a2930d0cadc76e715bf59582bf66 (patch)
treebd3e0e89c42bf1f84bee8de4a862898d3496cdae /tests/acceptance/tesseract_utils.py
parent162127f29f2a5a628ecea79d4718d3a51b1bffac (diff)
downloadfocaccia-qemu-ca8224492854a2930d0cadc76e715bf59582bf66.tar.gz
focaccia-qemu-ca8224492854a2930d0cadc76e715bf59582bf66.zip
tests/acceptance: Introduce tesseract_ocr() helper
We are going to reuse the tesseract OCR code.
Create a new tesseract_ocr() helper and use it.

Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20201021105035.2477784-5-f4bug@amsat.org>
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Diffstat (limited to 'tests/acceptance/tesseract_utils.py')
-rw-r--r--tests/acceptance/tesseract_utils.py18
1 files changed, 18 insertions, 0 deletions
diff --git a/tests/acceptance/tesseract_utils.py b/tests/acceptance/tesseract_utils.py
index acd6e8c2fa..72cd9ab798 100644
--- a/tests/acceptance/tesseract_utils.py
+++ b/tests/acceptance/tesseract_utils.py
@@ -6,7 +6,9 @@
 # later. See the COPYING file in the top-level directory.
 
 import re
+import logging
 
+from avocado.utils import process
 from avocado.utils.path import find_command, CmdNotFoundError
 
 def tesseract_available(expected_version):
@@ -26,3 +28,19 @@ def tesseract_available(expected_version):
         return False
     # now this is guaranteed to be a digit
     return int(match.groups()[0]) == expected_version
+
+
+def tesseract_ocr(image_path, tesseract_args='', tesseract_version=3):
+    console_logger = logging.getLogger('tesseract')
+    console_logger.debug(image_path)
+    if tesseract_version == 4:
+        tesseract_args += ' --oem 1'
+    proc = process.run("tesseract {} {} stdout".format(tesseract_args,
+                                                       image_path))
+    lines = []
+    for line in proc.stdout_text.split('\n'):
+        sline = line.strip()
+        if len(sline):
+            console_logger.debug(sline)
+            lines += [sline]
+    return lines