-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathocr_engine.py
More file actions
115 lines (94 loc) · 4.2 KB
/
ocr_engine.py
File metadata and controls
115 lines (94 loc) · 4.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import pytesseract
import mss
import threading
import time
import sys
import os
from PIL import Image, ImageEnhance, ImageOps
class OCREngine:
def __init__(self, callback, get_region_callback):
self.callback = callback
self.get_region_callback = get_region_callback
self.running = False
self.thread = None
self.interval = 1.0 # seconds between captures
# Attempt to find tesseract
# If the user has it in PATH, this isn't needed, but often on Windows it's in standard dirs
self._configure_tesseract()
def _configure_tesseract(self):
# Check if tesseract is in PATH
try:
pytesseract.get_tesseract_version()
return
except pytesseract.TesseractNotFoundError:
pass
# Common Windows paths
paths = [
r'C:\Program Files\Tesseract-OCR\tesseract.exe',
r'C:\Program Files (x86)\Tesseract-OCR\tesseract.exe',
r'C:\Users\\' + os.getlogin() + r'\AppData\Local\Tesseract-OCR\tesseract.exe'
]
for p in paths:
if os.path.exists(p):
pytesseract.pytesseract.tesseract_cmd = p
return
print("Warning: Tesseract not found in standard paths. OCR may fail.")
def start(self):
if self.running:
return
self.running = True
self.thread = threading.Thread(target=self._run_loop, daemon=True)
self.thread.start()
def stop(self):
self.running = False
# Thread will join naturally
def _run_loop(self):
with mss.mss() as sct:
while self.running:
try:
# Get region from UI
# Region format: {'top': y, 'left': x, 'width': w, 'height': h}
region = self.get_region_callback()
if not region or region['width'] < 10 or region['height'] < 10:
# Box might be hidden or too small
time.sleep(self.interval)
continue
# mss requires integers
monitor = {
"top": int(region['top']),
"left": int(region['left']),
"width": int(region['width']),
"height": int(region['height'])
}
# Capture
sct_img = sct.grab(monitor)
img = Image.frombytes("RGB", sct_img.size, sct_img.bgra, "raw", "BGRX")
# Preprocessing
# 1. Grayscale
img = img.convert('L')
# 2. Resize (Upscale) for better clarity
# Using a safe bicubic or lanczos if available
img = img.resize((img.width * 3, img.height * 3), Image.Resampling.LANCZOS)
# 3. Enhance Contrast
enhancer = ImageEnhance.Contrast(img)
img = enhancer.enhance(2.0)
# 4. Enhance Sharpness
enhancer = ImageEnhance.Sharpness(img)
img = enhancer.enhance(2.0)
# 5. Thresholding (Binarization)
# Isolate text (assuming white text on dark background)
# Any pixel brighter than 128 becomes 255 (white), else 0 (black)
thresh = 100
fn = lambda x : 255 if x > thresh else 0
img = img.convert('L').point(fn, mode='1')
# 6. Inversion
# Convert White-Text/Black-BG -> Black-Text/White-BG for Tesseract
img = ImageOps.invert(img.convert('L'))
# OCR
# --psm 6 usually assumes a single uniform block of text
text = pytesseract.image_to_string(img, config='--psm 6')
if text.strip():
self.callback(text)
except Exception as e:
print(f"OCR Loop Error: {e}")
time.sleep(self.interval)