Files
dnf-auto-cloud/server/image_processor.py
Your Name 55b1caf1f2 INIT
2025-03-26 18:08:05 +00:00

404 lines
12 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
图像处理模块,负责对接收到的图像进行预处理和后处理
优化版 - 增强图像处理性能和准确性
"""
import logging
import numpy as np
import cv2
from PIL import Image, ImageEnhance, ImageFilter
logger = logging.getLogger("DNFAutoCloud")
def process_image(yolo_model, image):
"""
处理图像并进行目标检测
参数:
yolo_model (YOLOModel): YOLO模型实例
image (PIL.Image): 输入图像
返回:
list: 检测结果
"""
try:
# 图像预处理
processed_image = preprocess_image(image)
# 使用YOLO模型进行检测
detections = yolo_model.detect(processed_image)
# 后处理检测结果
processed_detections = postprocess_detections(detections, image.size)
# 计算额外特征
enhanced_detections = calculate_extra_features(processed_detections, image)
return enhanced_detections
except Exception as e:
logger.error(f"图像处理出错: {e}")
return []
def preprocess_image(image):
"""
图像预处理 - 优化版
参数:
image (PIL.Image): 输入图像
返回:
PIL.Image: 预处理后的图像
"""
try:
# 确保图像是RGB模式
image = image.convert("RGB")
# 应用自适应直方图均衡化 (对于UI元素检测很有帮助)
# 将PIL图像转换为numpy数组/OpenCV格式
img_cv = np.array(image)
img_cv = img_cv[:, :, ::-1].copy() # RGB -> BGR
# 转换为LAB色彩空间并对L通道应用CLAHE
lab = cv2.cvtColor(img_cv, cv2.COLOR_BGR2LAB)
l, a, b = cv2.split(lab)
# 创建CLAHE对象
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
cl = clahe.apply(l)
# 合并处理后的通道
limg = cv2.merge((cl, a, b))
enhanced_cv = cv2.cvtColor(limg, cv2.COLOR_LAB2BGR)
# 将OpenCV图像转换回PIL格式
enhanced = Image.fromarray(cv2.cvtColor(enhanced_cv, cv2.COLOR_BGR2RGB))
# 适度锐化
enhancer = ImageEnhance.Sharpness(enhanced)
enhanced = enhancer.enhance(1.3)
# 稍微增加对比度
enhancer = ImageEnhance.Contrast(enhanced)
enhanced = enhancer.enhance(1.2)
return enhanced
except Exception as e:
logger.error(f"图像预处理出错: {e}")
return image # 返回原始图像
def postprocess_detections(detections, image_size):
"""
后处理检测结果 - 优化版
参数:
detections (list): 检测结果
image_size (tuple): 原始图像大小
返回:
list: 处理后的检测结果
"""
try:
# 过滤或合并重叠的检测框
filtered_detections = []
# 按类别和置信度排序
detections.sort(key=lambda x: (x['class_name'], -x['confidence']))
# 按类别分组
class_groups = {}
for det in detections:
class_name = det['class_name']
if class_name not in class_groups:
class_groups[class_name] = []
class_groups[class_name].append(det)
# 处理每个类别内的重叠框
for class_name, dets in class_groups.items():
# 使用非极大值抑制
remaining = non_max_suppression(dets, iou_threshold=0.5)
# 处理每个保留的检测
for det in remaining:
# 例如,过滤掉边缘的检测结果
bbox = det['bbox']
if is_valid_detection(bbox, image_size):
# 添加相对位置信息
det['center'] = [
(bbox[0] + bbox[2]) / 2, # x中心
(bbox[1] + bbox[3]) / 2 # y中心
]
det['size'] = [
bbox[2] - bbox[0], # 宽度
bbox[3] - bbox[1] # 高度
]
det['relative_position'] = [
det['center'][0] / image_size[0], # 相对x位置
det['center'][1] / image_size[1] # 相对y位置
]
# 添加一个唯一ID
det['id'] = f"{class_name}_{len(filtered_detections)}"
filtered_detections.append(det)
return filtered_detections
except Exception as e:
logger.error(f"检测结果后处理出错: {e}")
return detections # 返回原始检测结果
def non_max_suppression(detections, iou_threshold=0.5):
"""
非极大值抑制实现
参数:
detections (list): 检测列表
iou_threshold (float): IOU阈值
返回:
list: 抑制后的检测列表
"""
# 如果列表为空,直接返回
if not detections:
return []
# 获取所有边界框和置信度
boxes = [d['bbox'] for d in detections]
scores = [d['confidence'] for d in detections]
# 初始化保留的框索引列表
keep = []
# 按置信度降序排序
idxs = np.argsort(scores)[::-1]
while len(idxs) > 0:
# 取最高置信度的框
current = idxs[0]
keep.append(current)
# 计算当前框与其他框的IoU
ious = []
current_box = boxes[current]
for i in idxs[1:]:
iou = calculate_iou(current_box, boxes[i])
ious.append(iou)
# 保留IoU小于阈值的框
idxs = idxs[1:][np.array(ious) < iou_threshold]
# 返回保留的检测
return [detections[i] for i in keep]
def calculate_iou(box1, box2):
"""计算两个边界框的IoU"""
# 交集
x1 = max(box1[0], box2[0])
y1 = max(box1[1], box2[1])
x2 = min(box1[2], box2[2])
y2 = min(box1[3], box2[3])
if x2 < x1 or y2 < y1:
return 0.0
intersection = (x2 - x1) * (y2 - y1)
# 两个边界框的面积
box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
# IoU
iou = intersection / float(box1_area + box2_area - intersection)
return iou
def is_valid_detection(bbox, image_size):
"""
检查检测框是否有效
参数:
bbox (list): 边界框 [x1, y1, x2, y2]
image_size (tuple): 图像大小 (width, height)
返回:
bool: 是否是有效的检测
"""
# 检查框是否太小
width = bbox[2] - bbox[0]
height = bbox[3] - bbox[1]
if width < 5 or height < 5:
return False
# 检查框是否大部分超出图像
if bbox[0] < -width/2 or bbox[1] < -height/2 or bbox[2] > image_size[0] + width/2 or bbox[3] > image_size[1] + height/2:
return False
return True
def calculate_extra_features(detections, image):
"""
计算额外特征,例如血条/蓝条百分比
参数:
detections (list): 检测结果
image (PIL.Image): 原始图像
返回:
list: 增强的检测结果
"""
try:
# 转换图像为numpy数组
img_array = np.array(image)
# 增强检测结果的额外信息
for det in detections:
# 处理血条
if det["class_name"] == "hp_bar":
det["percent"] = estimate_bar_percent(img_array, det["bbox"], "hp")
# 处理蓝条
elif det["class_name"] == "mp_bar":
det["percent"] = estimate_bar_percent(img_array, det["bbox"], "mp")
# 处理技能冷却
elif det["class_name"] == "cooldown":
# 如果有文本尝试提取技能ID和剩余时间
if "text" in det:
# 简单示例真实情况可能需要OCR
det["skill_id"] = det.get("text", "unknown").split("_")[0]
det["remaining_time"] = 1.0 # 假设值
# 处理物品
elif det["class_name"] == "item":
# 尝试识别物品稀有度(通过颜色)
det["rarity"] = estimate_item_rarity(img_array, det["bbox"])
return detections
except Exception as e:
logger.error(f"计算额外特征时出错: {e}")
return detections
def estimate_bar_percent(img_array, bbox, bar_type):
"""
估计血条/蓝条的百分比 - 优化版
参数:
img_array (numpy.ndarray): 图像数组
bbox (list): 边界框 [x1, y1, x2, y2]
bar_type (str): 条形类型 ("hp""mp")
返回:
float: 百分比值(0-100)
"""
try:
# 确保边界框在图像范围内
x1, y1, x2, y2 = [int(v) for v in bbox]
height, width = img_array.shape[:2]
x1 = max(0, x1)
y1 = max(0, y1)
x2 = min(width - 1, x2)
y2 = min(height - 1, y2)
if x1 >= x2 or y1 >= y2:
return 50.0 # 默认值
# 提取条形区域
bar_region = img_array[y1:y2, x1:x2]
# 根据条形类型设置颜色阈值
if bar_type == "hp":
# 红色血条,查找红色像素
lower_threshold = np.array([150, 0, 0])
upper_threshold = np.array([255, 70, 70])
elif bar_type == "mp":
# 蓝色蓝条,查找蓝色像素
lower_threshold = np.array([0, 0, 150])
upper_threshold = np.array([70, 70, 255])
else:
return 50.0 # 默认值
# 创建掩码
mask = np.all((bar_region >= lower_threshold) & (bar_region <= upper_threshold), axis=2)
# 计算比例(假设条形从左到右填充)
bar_width = x2 - x1
# 找出最右边的填充像素
filled_width = 0
for col in range(bar_width):
if np.any(mask[:, col]):
filled_width = col + 1
# 计算百分比
percent = (filled_width / bar_width) * 100
return max(0, min(100, percent)) # 确保在0-100范围内
except Exception as e:
logger.error(f"估计条形百分比时出错: {e}")
return 50.0 # 出错时返回默认值
def estimate_item_rarity(img_array, bbox):
"""
估计物品稀有度(通过颜色)
参数:
img_array (numpy.ndarray): 图像数组
bbox (list): 边界框 [x1, y1, x2, y2]
返回:
str: 稀有度描述
"""
try:
# 提取物品区域
x1, y1, x2, y2 = [int(v) for v in bbox]
height, width = img_array.shape[:2]
x1 = max(0, x1)
y1 = max(0, y1)
x2 = min(width - 1, x2)
y2 = min(height - 1, y2)
# 提取边框区域(只取几个像素宽的边框)
border_size = max(2, min(5, int((x2 - x1) * 0.1)))
top_border = img_array[y1:y1+border_size, x1:x2]
bottom_border = img_array[y2-border_size:y2, x1:x2]
left_border = img_array[y1:y2, x1:x1+border_size]
right_border = img_array[y1:y2, x2-border_size:x2]
# 合并所有边框像素
borders = np.vstack([
top_border.reshape(-1, 3),
bottom_border.reshape(-1, 3),
left_border.reshape(-1, 3),
right_border.reshape(-1, 3)
])
# 计算平均颜色
avg_color = np.mean(borders, axis=0)
# 根据颜色判断稀有度
r, g, b = avg_color
# 简单启发式规则
if r > 200 and g < 100 and b < 100:
return "legendary" # 红色:传说
elif r > 200 and g > 150 and b < 100:
return "epic" # 橙色:史诗
elif r < 100 and g < 100 and b > 180:
return "rare" # 蓝色:稀有
elif r < 100 and g > 180 and b < 100:
return "uncommon" # 绿色:优秀
else:
return "common" # 白色/灰色:普通
except Exception as e:
logger.error(f"估计物品稀有度时出错: {e}")
return "unknown"