-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathimage_recognition.py
More file actions
329 lines (278 loc) · 12.7 KB
/
Copy pathimage_recognition.py
File metadata and controls
329 lines (278 loc) · 12.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
"""
圖像識別模組:從遊戲截圖中識別4x4網格中的數字
"""
import cv2
import numpy as np
from PIL import Image
import pytesseract
import re
# 設定 tesseract 路徑
pytesseract.pytesseract.tesseract_cmd = r'D:\Program Files\Tesseract-OCR\tesseract.exe'
def preprocess_image(img):
"""
預處理圖片:轉換為灰階、增強對比度、二值化
"""
# 轉換為灰階
if len(img.shape) == 3:
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
else:
gray = img.copy()
# 放大圖片(提高 OCR 準確度)
scale = 3
gray = cv2.resize(gray, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
# 增強對比度
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
enhanced = clahe.apply(gray)
# 嘗試多種二值化方法
# 方法1: OTSU
_, binary1 = cv2.threshold(enhanced, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# 方法2: 自適應閾值
binary2 = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2)
# 選擇較好的結果(通常 OTSU 效果較好)
binary = binary1
# 反轉,讓數字是白色(如果背景是黑色)
# 檢查是否需要反轉
mean_brightness = np.mean(binary)
if mean_brightness < 128:
binary = cv2.bitwise_not(binary)
# 去噪
binary = cv2.medianBlur(binary, 3)
return binary
def extract_grid_region(img, grid_x, grid_y, grid_width, grid_height):
"""
從完整截圖中提取4x4網格區域
需要根據實際遊戲畫面調整這些參數
"""
# 計算每個格子的寬度和高度
cell_width = grid_width // 4
cell_height = grid_height // 4
grid = []
for row in range(4):
grid_row = []
for col in range(4):
# 計算每個格子的位置(減少邊界裁剪,避免切到數字)
# 只裁剪很小的邊界,保留更多內容
margin_x = max(2, cell_width // 30) # 減少邊界裁剪
margin_y = max(2, cell_height // 30) # 減少邊界裁剪
x1 = grid_x + col * cell_width + margin_x
y1 = grid_y + row * cell_height + margin_y
x2 = grid_x + (col + 1) * cell_width - margin_x
y2 = grid_y + (row + 1) * cell_height - margin_y
# 確保座標有效
x1 = max(0, int(x1))
y1 = max(0, int(y1))
x2 = min(img.shape[1], int(x2))
y2 = min(img.shape[0], int(y2))
# 提取單個格子
if x2 > x1 and y2 > y1:
cell = img[y1:y2, x1:x2]
else:
# 如果座標無效,創建一個空的格子
cell = np.zeros((10, 10, 3), dtype=np.uint8) if len(img.shape) == 3 else np.zeros((10, 10), dtype=np.uint8)
grid_row.append(cell)
grid.append(grid_row)
return grid
def recognize_number_in_cell(cell_img, debug=False, cell_name=""):
"""
識別單個格子中的數字
使用 OCR 或模板匹配
"""
if cell_img is None or cell_img.size == 0:
return 0
# 檢查是否為空格子(先檢查原始圖片的亮度)
if len(cell_img.shape) == 3:
gray_original = cv2.cvtColor(cell_img, cv2.COLOR_BGR2GRAY)
else:
gray_original = cell_img.copy()
mean_brightness_original = np.mean(gray_original)
# 空格子通常是深色,但閾值要適中
if mean_brightness_original < 30: # 空格子通常是深色
return 0
# 預處理單個格子
processed = preprocess_image(cell_img)
# 如果調試模式,保存處理後的圖像
if debug and cell_name:
debug_dir = "debug_processed"
import os
os.makedirs(debug_dir, exist_ok=True)
cv2.imwrite(f"{debug_dir}/{cell_name}_processed.png", processed)
# 方法1: 使用 OCR(嘗試多種配置,優先使用置信度高的)
ocr_configs = [
(r'--oem 3 --psm 10 -c tessedit_char_whitelist=0123456789', 'single_char'), # 單個字符(最適合單個數字)
(r'--oem 3 --psm 8 -c tessedit_char_whitelist=0123456789', 'single_word'), # 單個單詞
(r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789', 'single_line'), # 單行文本
(r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789', 'single_block'), # 單一文本塊
(r'--oem 3 --psm 13 -c tessedit_char_whitelist=0123456789', 'raw_line'), # 原始行
]
best_result = None
best_confidence = 0
for config, config_name in ocr_configs:
try:
# 使用 image_to_data 獲取置信度
data = pytesseract.image_to_data(processed, config=config, output_type=pytesseract.Output.DICT)
# 提取所有識別到的數字
for i, text in enumerate(data['text']):
if text.strip():
numbers = re.findall(r'\d+', text)
if numbers:
num = int(numbers[0])
# 處理置信度(可能是字符串或數字)
try:
if isinstance(data['conf'][i], str):
conf = int(data['conf'][i]) if data['conf'][i].isdigit() else 0
else:
conf = int(data['conf'][i]) if data['conf'][i] else 0
except:
conf = 0
# 優先選擇置信度更高的結果
if num > 0 and num <= 9 and conf > best_confidence:
best_result = num
best_confidence = conf
# 如果沒有從 data 獲取到,嘗試 image_to_string
if best_result is None:
text = pytesseract.image_to_string(processed, config=config)
numbers = re.findall(r'\d+', text)
if numbers:
num = int(numbers[0])
if num > 0 and num <= 9:
# 如果之前沒有結果,使用這個(置信度較低)
if best_result is None:
best_result = num
best_confidence = 25 # 默認置信度
except Exception as e:
if debug:
print(f"OCR 錯誤 ({cell_name}, {config_name}): {e}")
continue
# 降低置信度閾值,因為某些數字可能識別置信度較低但仍正確
if best_result is not None: # 接受所有結果(置信度可能為0但結果仍可能正確)
return best_result
# 方法2: 如果 OCR 失敗,檢查處理後的圖像是否有內容
mean_brightness = np.mean(processed)
std_brightness = np.std(processed)
# 如果處理後很暗且對比度很低,可能是空格子
if mean_brightness < 30 and std_brightness < 15:
return 0
# 如果無法識別,返回 0(空格子)
return 0
def recognize_grid(img_path, grid_x=None, grid_y=None, grid_width=None, grid_height=None,
manual_positions=None, debug=False):
"""
從截圖中識別整個4x4網格的數字
返回一個4x4的矩陣,0表示空格子
參數:
- manual_positions: 手動指定的格子位置字典,格式 {(row, col): (x1, y1, x2, y2)}
- debug: 是否開啟調試模式(保存處理後的圖像)
"""
# 讀取圖片
img = cv2.imread(img_path)
if img is None:
# 如果是 PIL Image,轉換為 numpy array
pil_img = Image.open(img_path)
img = np.array(pil_img)
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
# 如果提供了手動位置,使用手動位置
if manual_positions and len(manual_positions) > 0:
if debug:
print(f"使用手動指定的格子位置(共 {len(manual_positions)} 個)")
grid_cells = []
for row in range(4):
grid_row = []
for col in range(4):
if (row, col) in manual_positions:
x1, y1, x2, y2 = manual_positions[(row, col)]
# 從指定位置提取格子(數字所在的精確範圍)
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
# 所有格子都有位置座標(不再使用 0,0,0,0 表示空格子)
# 空格子會在識別時通過亮度檢測自動判斷
# 擴大範圍,x 和 y 各加 10,讓框更好地包覆數字
x1 = max(0, x1 - 10)
y1 = max(0, y1 - 10)
x2 = min(img.shape[1], x2 + 10)
y2 = min(img.shape[0], y2 + 10)
if x2 > x1 and y2 > y1:
cell = img[y1:y2, x1:x2]
else:
cell = np.zeros((10, 10, 3), dtype=np.uint8) if len(img.shape) == 3 else np.zeros((10, 10), dtype=np.uint8)
else:
# 如果沒有手動指定,使用自動計算
if grid_x is None or grid_y is None or grid_width is None or grid_height is None:
h, w = img.shape[:2]
grid_x = 438
grid_y = 128
grid_width = 742
grid_height = 741
cell_width = grid_width // 4
cell_height = grid_height // 4
margin_x = max(2, cell_width // 30)
margin_y = max(2, cell_height // 30)
x1 = grid_x + col * cell_width + margin_x
y1 = grid_y + row * cell_height + margin_y
x2 = grid_x + (col + 1) * cell_width - margin_x
y2 = grid_y + (row + 1) * cell_height - margin_y
x1 = max(0, int(x1))
y1 = max(0, int(y1))
x2 = min(img.shape[1], int(x2))
y2 = min(img.shape[0], int(y2))
if x2 > x1 and y2 > y1:
cell = img[y1:y2, x1:x2]
else:
cell = np.zeros((10, 10, 3), dtype=np.uint8) if len(img.shape) == 3 else np.zeros((10, 10), dtype=np.uint8)
grid_row.append(cell)
grid_cells.append(grid_row)
else:
# 如果沒有提供網格位置,嘗試自動檢測
if grid_x is None or grid_y is None or grid_width is None or grid_height is None:
# 使用圖片中心區域作為預設(需要根據實際遊戲調整)
h, w = img.shape[:2]
grid_x = 438 # 網格左上角X座標
grid_y = 128 # 網格左上角Y座標(往上移動10像素)
grid_width = 742 # 網格寬度
grid_height = 741 # 網格高度(h20大小)
if debug:
print(f"使用網格位置: x={grid_x}, y={grid_y}, w={grid_width}, h={grid_height}")
# 提取網格區域
grid_cells = extract_grid_region(img, grid_x, grid_y, grid_width, grid_height)
# 如果調試模式,保存原始格子截圖
if debug:
debug_cells_dir = "debug_cells"
import os
os.makedirs(debug_cells_dir, exist_ok=True)
for row in range(4):
for col in range(4):
cell_name = f"cell_{row}_{col}"
cell_img = grid_cells[row][col]
if cell_img is not None and cell_img.size > 0:
cv2.imwrite(f"{debug_cells_dir}/{cell_name}.png", cell_img)
# 識別每個格子的數字
result = []
for row in range(4):
result_row = []
for col in range(4):
cell_name = f"cell_{row}_{col}"
number = recognize_number_in_cell(grid_cells[row][col], debug=debug, cell_name=cell_name)
result_row.append(number if number is not None else 0)
result.append(result_row)
return result
def visualize_grid(grid, save_path=None):
"""
視覺化識別結果,用於調試
"""
print("識別到的網格狀態:")
print("+" + "-" * 25 + "+")
for row in grid:
print("|", end="")
for cell in row:
if cell == 0:
print(f"{' ':>5}", end=" ")
else:
print(f"{cell:>5}", end=" ")
print("|")
print("+" + "-" * 25 + "+")
if save_path:
# 可以保存視覺化圖片
pass
if __name__ == "__main__":
# 測試識別功能
grid = recognize_grid("game_screenshot.png")
visualize_grid(grid)