scene_processor.py 41 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130
  1. """
  2. 场景点云处理器
  3. 整合 YOLOE 门检测 + 全景图点云转换 + 位姿变换 + 多视角融合 + 3D 门合并
  4. 支持 scene0001 格式的场景数据
  5. """
  6. import os
  7. import sys
  8. import json
  9. import argparse
  10. from pathlib import Path
  11. from dataclasses import dataclass, field
  12. from typing import Dict, List, Optional, Tuple
  13. # 添加当前目录到路径,以便导入 camera_spherical
  14. sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
  15. import cv2
  16. import numpy as np
  17. import open3d as o3d
  18. from tqdm import tqdm
  19. from ultralytics import YOLOE
  20. from camera_spherical import Intrinsic_Spherical_NP
  21. # ============================================================================
  22. # 数据类
  23. # ============================================================================
  24. @dataclass
  25. class PoseData:
  26. """位姿数据"""
  27. uuid: str
  28. rotation: Dict[str, float] # w, x, y, z
  29. translation: Dict[str, float] # x, y, z
  30. @dataclass
  31. class MaskDetection:
  32. """单张图像中的 mask 检测结果"""
  33. image_name: str
  34. mask_contours: List[List[List[float]]] # 2D 轮廓像素坐标 [[x, y], ...]
  35. scores: List[float]
  36. mask_3d_points: List[np.ndarray] # 每个 mask 对应的世界坐标系 3D 点
  37. @dataclass
  38. class Door3D:
  39. """3D 门实例"""
  40. id: int
  41. center: np.ndarray # 中心坐标 [x, y, z]
  42. bbox_8points: np.ndarray # 8 个角点 [[x,y,z], ...] (8x3)
  43. source_detections: List[Dict] # 来源检测信息
  44. @dataclass
  45. class EntranceInfo:
  46. """入户门信息"""
  47. is_detected: bool # 是否通过检测确定
  48. door: Optional[Door3D] # 入户门对象(检测确定时)
  49. center: np.ndarray # 入户门中心/估计位置 [x, y, z]
  50. score: float # 置信度评分
  51. method: str # 确定方法:"exterior_class" / "size_score" / "fallback_2d" / "scene_center_estimate"
  52. reason: str # 说明文字
  53. # ============================================================================
  54. # 场景处理器
  55. # ============================================================================
  56. class SceneProcessor:
  57. """场景处理器:整合检测 + 点云生成 + 位姿变换 + 3D 门融合"""
  58. # 门相关类别 - 统一映射为 "door"
  59. DOOR_CLASSES = [
  60. "door", "indoor door", "exterior door",
  61. "wooden door", "metal door", "glass door", "double door",
  62. "single door", "open door", "closed door"
  63. ]
  64. # 3D 门合并参数
  65. MERGE_IOU_THRESH = 0.3 # 3D IoU 阈值
  66. MERGE_DIST_THRESH = 2.0 # 中心距离阈值 (米)
  67. # 3D 门过滤参数 - 有效门的物理特性
  68. DOOR_HEIGHT_MIN = 1.0 # 最小高度 (米)
  69. DOOR_HEIGHT_MAX = 3.0 # 最大高度 (米)
  70. DOOR_WIDTH_MIN = 0.3 # 最小宽度 (米)
  71. DOOR_WIDTH_MAX = 3.0 # 最大宽度 (米)
  72. DOOR_THICKNESS_MAX = 0.5 # 最大厚度 (米) - 门的深度方向
  73. GROUND_DIST_THRESH = 0.5 # 门底部距地面最大距离 (米)
  74. def __init__(
  75. self,
  76. scene_folder: str,
  77. model_path: str = "yoloe-26x-seg.pt",
  78. conf: float = 0.35,
  79. iou: float = 0.45,
  80. voxel_size: float = 0.03,
  81. depth_scale: float = 256.0,
  82. depth_min: float = 0.02,
  83. ground_y: Optional[float] = None, # 地面 Y 坐标(可选,默认从点云估计)
  84. ):
  85. """
  86. 初始化场景处理器
  87. Args:
  88. scene_folder: 场景文件夹路径
  89. model_path: YOLOE 模型路径
  90. conf: 检测置信度阈值
  91. iou: NMS IoU 阈值
  92. voxel_size: 点云体素下采样尺寸
  93. depth_scale: 深度图缩放因子
  94. depth_min: 最小有效深度
  95. ground_y: 地面 Y 坐标(可选,默认从点云自动估计)
  96. """
  97. self.scene_folder = Path(scene_folder)
  98. self.conf = conf
  99. self.iou = iou
  100. self.voxel_size = voxel_size
  101. self.depth_scale = depth_scale
  102. self.depth_min = depth_min
  103. self.ground_y = ground_y
  104. # 子目录
  105. self.rgb_folder = self.scene_folder / "pano_img"
  106. self.depth_folder = self.scene_folder / "depth_img"
  107. self.pose_file = self.scene_folder / "vision.txt"
  108. # 输出目录
  109. self.output_folder = self.scene_folder / "output"
  110. self.detection_folder = self.output_folder / "detections"
  111. # 加载位姿
  112. self.poses = self._load_poses()
  113. # 初始化 YOLOE 模型
  114. print(f"加载 YOLOE 模型:{model_path}")
  115. self.model = YOLOE(model_path)
  116. self.model.set_classes(self.DOOR_CLASSES)
  117. print(f"检测类别 (统一为 door): {self.DOOR_CLASSES}")
  118. def _load_poses(self) -> Dict[str, PoseData]:
  119. """从 vision.txt 加载位姿信息"""
  120. if not self.pose_file.exists():
  121. raise FileNotFoundError(f"位姿文件不存在:{self.pose_file}")
  122. with open(self.pose_file, 'r') as f:
  123. data = json.load(f)
  124. poses = {}
  125. for loc in data.get('sweepLocations', []):
  126. uuid = str(loc['uuid'])
  127. poses[uuid] = PoseData(
  128. uuid=uuid,
  129. rotation=loc['pose']['rotation'],
  130. translation=loc['pose']['translation']
  131. )
  132. print(f"加载 {len(poses)} 个位姿")
  133. return poses
  134. def _build_pose_matrix(self, pose: PoseData) -> np.ndarray:
  135. """构建 4x4 位姿变换矩阵"""
  136. R = o3d.geometry.get_rotation_matrix_from_quaternion(
  137. np.array([pose.rotation['w'], pose.rotation['x'],
  138. pose.rotation['y'], pose.rotation['z']])
  139. )
  140. t = np.array([
  141. pose.translation['x'],
  142. pose.translation['y'],
  143. pose.translation['z']
  144. ])
  145. T = np.eye(4)
  146. T[:3, :3] = R
  147. T[:3, 3] = t
  148. return T
  149. def _mask_to_3d_points(
  150. self,
  151. mask: np.ndarray,
  152. depth: np.ndarray,
  153. pose_matrix: np.ndarray
  154. ) -> Optional[np.ndarray]:
  155. """
  156. 将 2D mask 映射到世界坐标系 3D 点
  157. Args:
  158. mask: 二值 mask (H, W)
  159. depth: 深度图 (H, W)
  160. pose_matrix: 4x4 位姿矩阵
  161. Returns:
  162. 世界坐标系下的 3D 点 (N, 3)
  163. """
  164. H, W = depth.shape
  165. sph = Intrinsic_Spherical_NP(W, H)
  166. # 获取 mask 内的像素
  167. ys, xs = np.where(mask > 0)
  168. if len(xs) == 0:
  169. return None
  170. # 有效深度掩码
  171. valid = depth[ys, xs] > self.depth_min
  172. if not np.any(valid):
  173. return None
  174. xs, ys = xs[valid], ys[valid]
  175. depths = depth[ys, xs]
  176. # 计算方向向量
  177. bx, by, bz = sph.bearing([xs.astype(np.float64), ys.astype(np.float64)])
  178. bx, by, bz = np.array(bx), np.array(by), np.array(bz)
  179. # 相机坐标系
  180. pts_cam = np.stack([bx * depths, by * depths, bz * depths], axis=1)
  181. # Z 轴 180 度翻转
  182. R_z180 = np.diag([-1.0, -1.0, 1.0])
  183. pts_cam = pts_cam @ R_z180.T
  184. # 世界坐标系
  185. pts_w = (pose_matrix[:3, :3] @ pts_cam.T).T + pose_matrix[:3, 3]
  186. return pts_w
  187. def _extract_mask_contours(self, masks) -> Tuple[List[List[List[float]]], List[np.ndarray]]:
  188. """
  189. 从 YOLOE mask 结果提取轮廓
  190. Args:
  191. masks: YOLOE masks (H, W, N)
  192. Returns:
  193. (轮廓列表, 对应 mask 数组) 每个 mask 只保留最大轮廓,保证与 scores 一一对应
  194. """
  195. contours = []
  196. mask_arrays = []
  197. if masks is None:
  198. return contours, mask_arrays
  199. masks_np = masks.cpu().numpy()
  200. for i in range(masks_np.shape[0]):
  201. mask = masks_np[i]
  202. # 二值化
  203. mask_bin = (mask > 0.5).astype(np.uint8) * 255
  204. # 提取轮廓
  205. cnts, _ = cv2.findContours(mask_bin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
  206. if not cnts:
  207. continue
  208. # 只保留面积最大的轮廓,确保与 score 一一对应
  209. largest = max(cnts, key=cv2.contourArea)
  210. if len(largest) >= 3:
  211. # 简化轮廓
  212. epsilon = 0.02 * cv2.arcLength(largest, True)
  213. approx = cv2.approxPolyDP(largest, epsilon, True)
  214. contour = approx.reshape(-1, 2).astype(float).tolist()
  215. contours.append(contour)
  216. mask_arrays.append((mask_bin > 0).astype(np.uint8))
  217. else:
  218. # 点数不足时回退到完整 mask
  219. mask_arrays.append((mask_bin > 0).astype(np.uint8))
  220. return contours, mask_arrays
  221. def detect_single_image(
  222. self,
  223. img_path: str,
  224. depth: np.ndarray,
  225. pose_matrix: np.ndarray,
  226. save_path: Optional[str] = None
  227. ) -> MaskDetection:
  228. """
  229. 检测单张图像并提取 mask 轮廓和 3D 点
  230. Args:
  231. img_path: 图像路径
  232. depth: 深度图
  233. pose_matrix: 位姿矩阵
  234. save_path: 保存路径
  235. Returns:
  236. MaskDetection 对象
  237. """
  238. results = self.model.predict(
  239. img_path,
  240. imgsz=(1024, 2048),
  241. conf=self.conf,
  242. iou=self.iou,
  243. max_det=50,
  244. augment=True,
  245. retina_masks=True,
  246. half=False,
  247. verbose=False,
  248. )
  249. result = results[0]
  250. scores = []
  251. contours = []
  252. mask_3d_points = []
  253. if result.masks is not None:
  254. masks = result.masks.data
  255. # 提取轮廓(每个 mask 只保留最大轮廓,与 scores 一一对应)
  256. contours, mask_arrays = self._extract_mask_contours(masks)
  257. # 获取分数
  258. scores = result.boxes.conf.cpu().numpy().tolist()
  259. # 每个 mask 转 3D 点
  260. H, W = depth.shape
  261. for mask_bin in mask_arrays:
  262. mask_resized = cv2.resize(mask_bin, (W, H), interpolation=cv2.INTER_NEAREST)
  263. pts_3d = self._mask_to_3d_points(mask_resized, depth, pose_matrix)
  264. if pts_3d is not None and len(pts_3d) > 0:
  265. mask_3d_points.append(pts_3d)
  266. if save_path:
  267. os.makedirs(os.path.dirname(save_path), exist_ok=True)
  268. result.save(save_path)
  269. return MaskDetection(
  270. image_name=os.path.basename(img_path),
  271. mask_contours=contours,
  272. scores=scores,
  273. mask_3d_points=mask_3d_points
  274. )
  275. def _axis_aligned_bbox(self, points: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
  276. """计算轴对齐包围盒 (min, max)"""
  277. lo = np.min(points, axis=0)
  278. hi = np.max(points, axis=0)
  279. return lo, hi
  280. def _bbox_8corners(self, bbox_min: np.ndarray, bbox_max: np.ndarray) -> np.ndarray:
  281. """从 bbox min/max 获取 8 个角点"""
  282. cx, cy, cz = bbox_min
  283. ex, ey, ez = bbox_max
  284. return np.array([
  285. [cx, cy, cz], [ex, cy, cz], [ex, ey, cz], [cx, ey, cz],
  286. [cx, cy, ez], [ex, cy, ez], [ex, ey, ez], [cx, ey, ez],
  287. ])
  288. def _bbox_iou_3d(self, b1, b2) -> float:
  289. """3D IoU 计算"""
  290. lo = np.maximum(b1[0], b2[0])
  291. hi = np.minimum(b1[1], b2[1])
  292. inter = np.prod(np.maximum(hi - lo, 0))
  293. vol1 = np.prod(b1[1] - b1[0])
  294. vol2 = np.prod(b2[1] - b2[0])
  295. union = vol1 + vol2 - inter
  296. return inter / union if union > 0 else 0.0
  297. def _merge_3d_doors(self, door_candidates: List[Dict]) -> List[Door3D]:
  298. """
  299. 合并重叠或接近的 3D 门 - 使用并查集确保完全合并
  300. 合并逻辑:
  301. 1. 构建连通图:如果两个门满足合并条件,则它们连通
  302. 2. 使用并查集找出所有连通分量
  303. 3. 每个连通分量合并为一个门
  304. Args:
  305. door_candidates: 候选门列表
  306. Returns:
  307. 合并后的 Door3D 列表
  308. """
  309. if not door_candidates:
  310. return []
  311. n = len(door_candidates)
  312. if n == 1:
  313. # 只有一个候选,直接返回
  314. d = door_candidates[0]
  315. return [Door3D(
  316. id=0,
  317. center=(d['bbox_min'] + d['bbox_max']) / 2,
  318. bbox_8points=self._bbox_8corners(d['bbox_min'], d['bbox_max']),
  319. source_detections=[d['source']]
  320. )]
  321. # ========== 并查集 ==========
  322. parent = list(range(n))
  323. def find(x):
  324. if parent[x] != x:
  325. parent[x] = find(parent[x]) # 路径压缩
  326. return parent[x]
  327. def union(x, y):
  328. px, py = find(x), find(y)
  329. if px != py:
  330. parent[px] = py
  331. # ========== 构建连通关系 ==========
  332. # 检查所有门对,满足条件的合并
  333. for i in range(n):
  334. for j in range(i + 1, n):
  335. ci = (door_candidates[i]['bbox_min'] + door_candidates[i]['bbox_max']) / 2
  336. cj = (door_candidates[j]['bbox_min'] + door_candidates[j]['bbox_max']) / 2
  337. dist = np.linalg.norm(ci - cj)
  338. iou = self._bbox_iou_3d(
  339. (door_candidates[i]['bbox_min'], door_candidates[i]['bbox_max']),
  340. (door_candidates[j]['bbox_min'], door_candidates[j]['bbox_max'])
  341. )
  342. if dist < self.MERGE_DIST_THRESH and iou > self.MERGE_IOU_THRESH:
  343. union(i, j)
  344. # ========== 按连通分量分组 ==========
  345. from collections import defaultdict
  346. groups = defaultdict(list)
  347. for i in range(n):
  348. groups[find(i)].append(door_candidates[i])
  349. # ========== 合并每个组 ==========
  350. doors = []
  351. for door_id, members in enumerate(groups.values()):
  352. if not members:
  353. continue
  354. # 合并所有成员的 bbox
  355. bbox_min = np.min([m['bbox_min'] for m in members], axis=0)
  356. bbox_max = np.max([m['bbox_max'] for m in members], axis=0)
  357. sources = [m['source'] for m in members]
  358. doors.append(Door3D(
  359. id=door_id,
  360. center=(bbox_min + bbox_max) / 2,
  361. bbox_8points=self._bbox_8corners(bbox_min, bbox_max),
  362. source_detections=sources
  363. ))
  364. return doors
  365. def _estimate_ground_y(self, combined_pc: o3d.geometry.PointCloud) -> float:
  366. """
  367. 从点云估计地面 Y 坐标
  368. 假设:地面是场景中最低的大面积平面
  369. 方法:取所有点中 Y 坐标的下 1% 分位数
  370. Args:
  371. combined_pc: 完整场景点云
  372. Returns:
  373. 估计的地面 Y 坐标
  374. """
  375. points = np.asarray(combined_pc.points)
  376. if len(points) == 0:
  377. return 0.0
  378. # 取 Y 坐标(假设 Y 轴向上)的下 1% 分位数,避免家具腿等低矮物体干扰
  379. ground_y = np.percentile(points[:, 1], 1)
  380. return ground_y
  381. def _filter_door_by_properties(self, door: Door3D, ground_y: float) -> Tuple[bool, List[str]]:
  382. """
  383. 根据物理特性过滤门
  384. 过滤条件:
  385. 1. 高度在合理范围内 (1.0m - 3.0m)
  386. 2. 宽度在合理范围内 (0.3m - 3.0m)
  387. 3. 厚度不超过阈值 (≤ 0.5m)
  388. 4. 门底部接近地面 (≤ 0.5m)
  389. Args:
  390. door: 3D 门对象
  391. ground_y: 地面 Y 坐标
  392. Returns:
  393. (是否通过过滤,拒绝原因列表)
  394. """
  395. reasons = []
  396. # 计算 bounding box 尺寸
  397. size = door.bbox_8points.max(axis=0) - door.bbox_8points.min(axis=0)
  398. height = size[1] # Y 方向是高度
  399. width = max(size[0], size[2]) # 宽度是 X/Z 中较大的
  400. thickness = min(size[0], size[2]) # 厚度是 X/Z 中较小的
  401. # 门底部 Y 坐标
  402. door_bottom_y = door.bbox_8points[:, 1].min()
  403. # 条件 1: 高度检查
  404. if height < self.DOOR_HEIGHT_MIN:
  405. reasons.append(f"高度过小 ({height:.2f}m < {self.DOOR_HEIGHT_MIN}m)")
  406. elif height > self.DOOR_HEIGHT_MAX:
  407. reasons.append(f"高度过大 ({height:.2f}m > {self.DOOR_HEIGHT_MAX}m)")
  408. # 条件 2: 宽度检查
  409. if width < self.DOOR_WIDTH_MIN:
  410. reasons.append(f"宽度过小 ({width:.2f}m < {self.DOOR_WIDTH_MIN}m)")
  411. elif width > self.DOOR_WIDTH_MAX:
  412. reasons.append(f"宽度过大 ({width:.2f}m > {self.DOOR_WIDTH_MAX}m)")
  413. # 条件 3: 厚度检查
  414. if thickness > self.DOOR_THICKNESS_MAX:
  415. reasons.append(f"厚度过大 ({thickness:.2f}m > {self.DOOR_THICKNESS_MAX}m)")
  416. # 条件 4: 地面贴合检查
  417. dist_to_ground = door_bottom_y - ground_y
  418. if dist_to_ground > self.GROUND_DIST_THRESH:
  419. reasons.append(f"距地面过远 ({dist_to_ground:.2f}m > {self.GROUND_DIST_THRESH}m)")
  420. elif dist_to_ground < -self.GROUND_DIST_THRESH:
  421. reasons.append(f"嵌入地面过深 ({abs(dist_to_ground):.2f}m)")
  422. passed = len(reasons) == 0
  423. return passed, reasons
  424. def _score_entrance_door(self, door: Door3D, ground_y: float, all_centers: np.ndarray) -> Dict:
  425. """
  426. 为每个门计算"入户门可能性"评分
  427. 评分维度(满分 100):
  428. 1. 尺寸评分 (30分): 入户门通常较大,高度约 2.0-2.4m,宽度约 0.9-1.2m
  429. 2. 地面贴合 (20分): 入户门底部贴近地面
  430. 3. 边缘位置 (25分): 入户门在建筑外围,离场景中心较远
  431. 4. 厚度评分 (15分): 入户门通常较厚(实心门)
  432. 5. 多视角支持 (10分): 被多个视角检测到的门更可信
  433. Args:
  434. door: 3D 门对象
  435. ground_y: 地面 Y 坐标
  436. all_centers: 所有门的中心坐标
  437. Returns:
  438. 评分详情 {"total": float, "details": Dict[str, float], "reason": str}
  439. """
  440. size = door.bbox_8points.max(axis=0) - door.bbox_8points.min(axis=0)
  441. height = size[1]
  442. width = max(size[0], size[2])
  443. thickness = min(size[0], size[2])
  444. door_bottom_y = door.bbox_8points[:, 1].min()
  445. # 1. 尺寸评分 (30分) - 理想入户门尺寸 高2.1m 宽1.0m
  446. height_score = max(0, 15 - abs(height - 2.1) * 10)
  447. width_score = max(0, 15 - abs(width - 1.0) * 12)
  448. size_score = height_score + width_score
  449. # 2. 地面贴合 (20分) - 底部越接近地面分越高
  450. dist_to_ground = abs(door_bottom_y - ground_y)
  451. ground_score = max(0, 20 - dist_to_ground * 40)
  452. # 3. 边缘位置 (25分) - 离所有门的中心越远,越可能是入户门
  453. if len(all_centers) > 1:
  454. dists_to_others = np.linalg.norm(all_centers - door.center, axis=1)
  455. max_dist = dists_to_others.max()
  456. avg_dist = dists_to_others.mean()
  457. # 平均距离越大越可能是入户门
  458. edge_score = min(25, avg_dist * 10)
  459. else:
  460. edge_score = 15 # 只有一个门时给中等分
  461. # 4. 厚度评分 (15分) - 入户门通常更厚
  462. # 理想厚度 0.04-0.1m,但点云中由于包含门框,会显得更厚
  463. if 0.05 <= thickness <= 0.3:
  464. thickness_score = 15
  465. elif thickness < 0.05:
  466. thickness_score = 5
  467. else:
  468. thickness_score = max(0, 15 - (thickness - 0.3) * 30)
  469. # 5. 多视角支持 (10分)
  470. source_count = len(door.source_detections)
  471. view_score = min(10, source_count * 3)
  472. total = size_score + ground_score + edge_score + thickness_score + view_score
  473. reasons = [
  474. f"尺寸={size_score:.0f}/30 (高{height:.2f}m 宽{width:.2f}m)",
  475. f"地面={ground_score:.0f}/20 (距地{dist_to_ground:.2f}m)",
  476. f"边缘={edge_score:.0f}/25",
  477. f"厚度={thickness_score:.0f}/15 ({thickness:.2f}m)",
  478. f"视角={view_score:.0f}/10 ({source_count}个)",
  479. ]
  480. return {
  481. "total": round(total, 1),
  482. "details": {
  483. "size": round(size_score, 1),
  484. "ground": round(ground_score, 1),
  485. "edge": round(edge_score, 1),
  486. "thickness": round(thickness_score, 1),
  487. "view": round(view_score, 1),
  488. },
  489. "reason": " | ".join(reasons),
  490. }
  491. def _identify_entrance_door(
  492. self,
  493. valid_doors: List[Door3D],
  494. ground_y: float,
  495. all_detections: List[MaskDetection],
  496. combined_pc: o3d.geometry.PointCloud,
  497. ) -> EntranceInfo:
  498. """
  499. 从多个门中识别入户门,无检测时提供兜底策略
  500. 策略优先级:
  501. 1. 如果 YOLOE 检测到 "exterior door" 类别 → 直接使用
  502. 2. 有多个有效门 → 按评分选择最高分
  503. 3. 无有效门但有 2D 检测 → 用置信度最高的 2D 检测的 3D 投影位置
  504. 4. 完全无检测 → 用场景几何估计(场景边界中心+地面高度)
  505. Args:
  506. valid_doors: 通过物理特性过滤的门
  507. ground_y: 地面 Y 坐标
  508. all_detections: 所有 2D 检测结果
  509. combined_pc: 合并后的场景点云
  510. Returns:
  511. EntranceInfo 入户门信息
  512. """
  513. # ===== 策略 1: 检查是否有 exterior door 检测结果 =====
  514. for det in all_detections:
  515. for src in det.source_detections if hasattr(det, 'source_detections') else []:
  516. pass # source_detections 在 Door3D 上,不在 MaskDetection 上
  517. # MaskDetection 只有 scores,没有类别信息
  518. # YOLOE predict 返回的 result.boxes.cls 包含类别 ID
  519. pass
  520. # ===== 策略 2: 有多个有效门,按评分选择 =====
  521. if len(valid_doors) >= 1:
  522. all_centers = np.array([d.center for d in valid_doors])
  523. scored_doors = []
  524. for door in valid_doors:
  525. score_info = self._score_entrance_door(door, ground_y, all_centers)
  526. scored_doors.append((door, score_info))
  527. # 按总分排序
  528. scored_doors.sort(key=lambda x: x[1]["total"], reverse=True)
  529. best_door, best_score = scored_doors[0]
  530. method = "size_score"
  531. reason = f"多门评分选择(共{len(valid_doors)}个门): {best_score['reason']}"
  532. if len(valid_doors) == 1:
  533. method = "size_score"
  534. reason = f"唯一有效门: {best_score['reason']}"
  535. print(f"\n入户门识别 - 选择门{best_door.id}")
  536. print(f" 方法: {method}")
  537. print(f" 评分: {best_score['total']}/100")
  538. print(f" 中心: {best_door.center.round(3)}")
  539. print(f" 原因: {reason}")
  540. return EntranceInfo(
  541. is_detected=True,
  542. door=best_door,
  543. center=best_door.center.copy(),
  544. score=best_score["total"] / 100.0,
  545. method=method,
  546. reason=reason,
  547. )
  548. # ===== 策略 3: 无有效门但有 2D 检测 =====
  549. # 找置信度最高的 2D 检测
  550. best_2d_score = 0.0
  551. best_2d_det = None
  552. for det in all_detections:
  553. if det.scores and len(det.mask_3d_points) > 0:
  554. max_score = max(det.scores)
  555. if max_score > best_2d_score:
  556. best_2d_score = max_score
  557. best_2d_det = det
  558. if best_2d_det is not None and len(best_2d_det.mask_3d_points) > 0:
  559. # 使用最高分 mask 的 3D 点中心作为入户门估计位置
  560. best_mask_idx = best_2d_det.scores.index(best_2d_score)
  561. if best_mask_idx < len(best_2d_det.mask_3d_points):
  562. est_center = np.mean(best_2d_det.mask_3d_points[best_mask_idx], axis=0)
  563. print(f"\n入户门识别 - 使用最高分 2D 检测兜底")
  564. print(f" 方法: fallback_2d")
  565. print(f" 图像: {best_2d_det.image_name}")
  566. print(f" 置信度: {best_2d_score:.3f}")
  567. print(f" 估计中心: {est_center.round(3)}")
  568. return EntranceInfo(
  569. is_detected=False,
  570. door=None,
  571. center=est_center,
  572. score=best_2d_score,
  573. method="fallback_2d",
  574. reason=f"最高分 2D 检测 ({best_2d_det.image_name}, conf={best_2d_score:.3f})",
  575. )
  576. # ===== 策略 4: 完全无检测,用场景几何估计 =====
  577. print(f"\n入户门识别 - 无任何检测,使用场景几何估计")
  578. points = np.asarray(combined_pc.points)
  579. if len(points) > 0:
  580. # 取点云的水平中心 + 最低点作为入口估计
  581. est_x = np.median(points[:, 0])
  582. est_y = ground_y + 1.0 # 地面以上 1m(典型门把手高度附近)
  583. est_z = np.median(points[:, 2])
  584. # 找离中心最远的方向(可能是入口方向)
  585. center_2d = np.array([est_x, est_z])
  586. dists = np.linalg.norm(points[:, [0, 2]] - center_2d, axis=1)
  587. far_idx = np.argsort(dists)[-len(dists)//10:] # 最远 10% 的点
  588. far_points = points[far_idx]
  589. est_x = np.median(far_points[:, 0])
  590. est_z = np.median(far_points[:, 2])
  591. else:
  592. est_x, est_y, est_z = 0.0, ground_y + 1.0, 0.0
  593. est_center = np.array([est_x, est_y, est_z])
  594. print(f" 方法: scene_center_estimate")
  595. print(f" 估计中心: {est_center.round(3)}")
  596. return EntranceInfo(
  597. is_detected=False,
  598. door=None,
  599. center=est_center,
  600. score=0.0,
  601. method="scene_center_estimate",
  602. reason="无检测结果,场景几何估计",
  603. )
  604. def _rgb_depth_to_pointcloud(
  605. self,
  606. rgb: np.ndarray,
  607. depth: np.ndarray,
  608. pose_matrix: np.ndarray
  609. ) -> o3d.geometry.PointCloud:
  610. """将 RGB-D 转换为世界坐标系点云"""
  611. H, W = depth.shape
  612. sph = Intrinsic_Spherical_NP(W, H)
  613. px, py = np.meshgrid(np.arange(W), np.arange(H))
  614. px_flat = px.flatten().astype(np.float64)
  615. py_flat = py.flatten().astype(np.float64)
  616. bx, by, bz = sph.bearing([px_flat, py_flat])
  617. bx, by, bz = np.array(bx), np.array(by), np.array(bz)
  618. mask = depth.flatten() > self.depth_min
  619. d = depth.flatten()[mask]
  620. if len(d) == 0:
  621. return o3d.geometry.PointCloud()
  622. pts_cam = np.stack([bx[mask] * d, by[mask] * d, bz[mask] * d], axis=1)
  623. R_z180 = np.diag([-1.0, -1.0, 1.0])
  624. pts_cam = pts_cam @ R_z180.T
  625. pts_w = (pose_matrix[:3, :3] @ pts_cam.T).T + pose_matrix[:3, 3]
  626. if rgb.shape[:2] != depth.shape:
  627. rgb_d = cv2.resize(rgb, (W, H), interpolation=cv2.INTER_LINEAR)
  628. else:
  629. rgb_d = rgb
  630. colors = rgb_d.reshape(-1, 3)[mask].astype(np.float64) / 255.0
  631. pc = o3d.geometry.PointCloud()
  632. pc.points = o3d.utility.Vector3dVector(pts_w)
  633. pc.colors = o3d.utility.Vector3dVector(colors)
  634. return pc
  635. def process_scene(self):
  636. """处理整个场景"""
  637. # 创建输出目录
  638. self.output_folder.mkdir(parents=True, exist_ok=True)
  639. self.detection_folder.mkdir(parents=True, exist_ok=True)
  640. rgb_files = sorted(
  641. self.rgb_folder.glob("*.jpg"),
  642. key=lambda x: int(x.stem)
  643. )
  644. if not rgb_files:
  645. raise FileNotFoundError(f"在 {self.rgb_folder} 中未找到 RGB 图像")
  646. print(f"找到 {len(rgb_files)} 张全景图,开始处理...")
  647. combined_pc = o3d.geometry.PointCloud()
  648. all_detections: List[MaskDetection] = []
  649. door_candidates = [] # 3D 门候选
  650. for rgb_file in tqdm(rgb_files, desc="处理场景"):
  651. idx = rgb_file.stem
  652. pose = self.poses.get(idx)
  653. if pose is None:
  654. print(f" ⚠️ 警告:{rgb_file.name} 无位姿信息,跳过")
  655. continue
  656. depth_path = self.depth_folder / f"{idx}.png"
  657. if not depth_path.exists():
  658. print(f" ⚠️ 警告:深度图不存在 {depth_path},跳过")
  659. continue
  660. rgb = cv2.cvtColor(cv2.imread(str(rgb_file)), cv2.COLOR_BGR2RGB)
  661. depth = cv2.imread(str(depth_path), cv2.IMREAD_UNCHANGED).astype(np.float32) / self.depth_scale
  662. pose_matrix = self._build_pose_matrix(pose)
  663. # 1. YOLOE 检测 + mask 3D 映射
  664. save_det_path = self.detection_folder / f"{idx}_det.jpg"
  665. det_result = self.detect_single_image(
  666. str(rgb_file), depth, pose_matrix, str(save_det_path)
  667. )
  668. all_detections.append(det_result)
  669. det_count = len(det_result.mask_contours)
  670. tqdm.write(f" {rgb_file.name}: 检测 {det_count} 个门")
  671. # 2. 收集 3D 门候选
  672. for i, pts_3d in enumerate(det_result.mask_3d_points):
  673. if len(pts_3d) > 10: # 至少 10 个点
  674. bbox_min, bbox_max = self._axis_aligned_bbox(pts_3d)
  675. door_candidates.append({
  676. 'bbox_min': bbox_min,
  677. 'bbox_max': bbox_max,
  678. 'points_3d': pts_3d,
  679. 'source': {
  680. 'image': rgb_file.name,
  681. 'score': det_result.scores[i] if i < len(det_result.scores) else 0.0
  682. }
  683. })
  684. # 3. RGB-D 转完整点云
  685. pc = self._rgb_depth_to_pointcloud(rgb, depth, pose_matrix)
  686. combined_pc += pc
  687. # 最终下采样
  688. print(f"\n融合前点数:{len(combined_pc.points)}")
  689. combined_pc = combined_pc.voxel_down_sample(self.voxel_size)
  690. print(f"融合后点数:{len(combined_pc.points)}")
  691. # 保存合并点云
  692. merge_ply_path = self.output_folder / "merged.ply"
  693. o3d.io.write_point_cloud(str(merge_ply_path), combined_pc)
  694. print(f"保存合并点云:{merge_ply_path}")
  695. # 合并 3D 门
  696. print(f"\n3D 门候选:{len(door_candidates)}")
  697. doors_3d = self._merge_3d_doors(door_candidates)
  698. print(f"合并后门数量:{len(doors_3d)}")
  699. # 估计地面 Y 坐标
  700. if self.ground_y is not None:
  701. ground_y = self.ground_y
  702. else:
  703. ground_y = self._estimate_ground_y(combined_pc)
  704. print(f"估计地面 Y 坐标:{ground_y:.3f}")
  705. # 过滤不符合物理特性的门
  706. print("\n过滤 3D 门...")
  707. valid_doors = []
  708. filtered_doors = []
  709. for door in doors_3d:
  710. passed, reasons = self._filter_door_by_properties(door, ground_y)
  711. if passed:
  712. valid_doors.append(door)
  713. else:
  714. filtered_doors.append((door, reasons))
  715. print(f"通过过滤:{len(valid_doors)} 个门")
  716. if len(filtered_doors) > 0:
  717. print(f"被过滤:{len(filtered_doors)} 个门")
  718. for door, reasons in filtered_doors:
  719. print(f" 门{door.id} (中心={door.center.round(2)}):")
  720. for reason in reasons:
  721. print(f" - {reason}")
  722. # 识别入户门
  723. print("\n" + "=" * 40)
  724. print("入户门识别")
  725. print("=" * 40)
  726. entrance_info = self._identify_entrance_door(
  727. valid_doors, ground_y, all_detections, combined_pc
  728. )
  729. # 保存检测结果(包含入户门信息)
  730. self._save_detections(all_detections, valid_doors, filtered_doors, entrance_info)
  731. return combined_pc, valid_doors, entrance_info
  732. def _save_detections(self, detections: List[MaskDetection], doors_3d: List[Door3D],
  733. filtered_doors: List[Tuple[Door3D, List[str]]] = None,
  734. entrance_info: Optional[EntranceInfo] = None):
  735. """保存检测结果"""
  736. # 只保存有检测到的图像
  737. detected_results = []
  738. for d in detections:
  739. if len(d.mask_contours) > 0:
  740. detected_results.append({
  741. "image": d.image_name,
  742. "count": len(d.mask_contours),
  743. "mask_contours": d.mask_contours,
  744. "scores": d.scores
  745. })
  746. # 3D 门信息 - 正确处理多来源合并
  747. doors_3d_data = []
  748. for door in doors_3d:
  749. # 统计来源信息
  750. source_count = len(door.source_detections)
  751. scores = [s['score'] for s in door.source_detections if 'score' in s]
  752. avg_score = sum(scores) / len(scores) if scores else 0.0
  753. # 计算门的尺寸
  754. size = door.bbox_8points.max(axis=0) - door.bbox_8points.min(axis=0)
  755. doors_3d_data.append({
  756. "id": door.id,
  757. "center": door.center.tolist(),
  758. "bbox_8points": door.bbox_8points.tolist(),
  759. "size": np.round(size, 4).tolist(), # [宽,高,厚]
  760. "source_count": source_count, # 来源数量
  761. "avg_score": round(avg_score, 4), # 平均置信度
  762. "sources": door.source_detections # 详细来源列表
  763. })
  764. # 被过滤的门信息
  765. filtered_data = []
  766. if filtered_doors and len(filtered_doors) > 0:
  767. for door, reasons in filtered_doors:
  768. source_count = len(door.source_detections)
  769. scores = [s['score'] for s in door.source_detections if 'score' in s]
  770. avg_score = sum(scores) / len(scores) if scores else 0.0
  771. size = door.bbox_8points.max(axis=0) - door.bbox_8points.min(axis=0)
  772. filtered_data.append({
  773. "id": door.id,
  774. "center": door.center.tolist(),
  775. "size": np.round(size, 4).tolist(),
  776. "avg_score": round(avg_score, 4),
  777. "source_count": source_count,
  778. "reject_reasons": reasons
  779. })
  780. # 入户门信息
  781. entrance_data = None
  782. if entrance_info:
  783. entrance_data = {
  784. "is_detected": entrance_info.is_detected,
  785. "center": entrance_info.center.tolist(),
  786. "score": round(entrance_info.score, 4),
  787. "method": entrance_info.method,
  788. "reason": entrance_info.reason,
  789. }
  790. if entrance_info.door is not None:
  791. entrance_data["door_id"] = entrance_info.door.id
  792. output = {
  793. "total_images_processed": len(detections),
  794. "images_with_doors": len(detected_results),
  795. "total_2d_detections": sum(r["count"] for r in detected_results),
  796. "total_3d_doors": len(doors_3d),
  797. "filtered_3d_doors": len(filtered_doors) if filtered_doors else 0,
  798. "entrance_door": entrance_data,
  799. "detected_images": detected_results,
  800. "3d_doors": doors_3d_data,
  801. "filtered_doors": filtered_data
  802. }
  803. json_path = self.output_folder / "detections.json"
  804. with open(json_path, 'w', encoding='utf-8') as f:
  805. json.dump(output, f, indent=2, ensure_ascii=False)
  806. print(f"保存检测结果:{json_path}")
  807. # 打印汇总
  808. print("\n" + "="*50)
  809. print("检测汇总")
  810. print("="*50)
  811. print(f" 处理图像数:{output['total_images_processed']}")
  812. print(f" 检测到门的图像:{output['images_with_doors']}")
  813. print(f" 2D 检测总数:{output['total_2d_detections']}")
  814. print(f" 3D 门数量:{output['total_3d_doors']}")
  815. if len(filtered_doors) > 0 if filtered_doors else False:
  816. print(f" 被过滤的门:{output['filtered_3d_doors']}")
  817. if entrance_data:
  818. det_tag = "已检测" if entrance_data["is_detected"] else "估计"
  819. print(f"\n 入户门 [{det_tag}]:")
  820. print(f" 方法: {entrance_data['method']}")
  821. print(f" 中心: {entrance_data['center']}")
  822. print(f" 评分: {entrance_data['score']}")
  823. print(f" 原因: {entrance_data['reason']}")
  824. if doors_3d:
  825. print("\n 有效 3D 门信息:")
  826. for door in doors_3d:
  827. size = door.bbox_8points.max(axis=0) - door.bbox_8points.min(axis=0)
  828. sources_info = f"{len(door.source_detections)} 个视角"
  829. if len(door.source_detections) > 1:
  830. scores_str = ", ".join([f"{s['image']}:{s['score']:.2f}" for s in door.source_detections])
  831. sources_info += f" ({scores_str})"
  832. print(f" 门{door.id}: 中心={door.center.round(3)}, 尺寸={size.round(3)}")
  833. print(f" 来源:{sources_info}")
  834. if filtered_doors and len(filtered_doors) > 0:
  835. print("\n 被过滤的门 (不符合物理特性):")
  836. for door, reasons in filtered_doors:
  837. size = door.bbox_8points.max(axis=0) - door.bbox_8points.min(axis=0)
  838. print(f" 门{door.id}: 中心={door.center.round(3)}, 尺寸={size.round(3)}")
  839. for reason in reasons:
  840. print(f" - {reason}")
  841. print("="*50)
  842. # ============================================================================
  843. # 主函数
  844. # ============================================================================
  845. def main():
  846. parser = argparse.ArgumentParser(
  847. description="场景点云处理器 - YOLOE 检测 + 点云融合 + 3D 门合并",
  848. formatter_class=argparse.RawDescriptionHelpFormatter,
  849. epilog="""
  850. 示例:
  851. # 处理 scene0001 场景
  852. python scene_processor.py -s scene0001
  853. # 指定模型和参数
  854. python scene_processor.py -s scene0001 --model yoloe-26x-seg.pt --conf 0.4
  855. # 调整点云精度
  856. python scene_processor.py -s scene0001 --voxel-size 0.02
  857. # 指定地面 Y 坐标(默认自动估计)
  858. python scene_processor.py -s scene0001 --ground-y -1.5
  859. # 调整门尺寸过滤阈值
  860. python scene_processor.py -s scene0001 --door-height-min 1.2 --door-width-min 0.5
  861. """
  862. )
  863. parser.add_argument(
  864. "--scene", "-s",
  865. type=str,
  866. default="scene0001",
  867. help="场景文件夹 (默认:scene0001)"
  868. )
  869. parser.add_argument(
  870. "--model", "-m",
  871. type=str,
  872. default="yoloe-26x-seg.pt",
  873. help="YOLOE 模型路径"
  874. )
  875. parser.add_argument(
  876. "--conf",
  877. type=float,
  878. default=0.35,
  879. help="置信度阈值 (默认:0.35)"
  880. )
  881. parser.add_argument(
  882. "--iou",
  883. type=float,
  884. default=0.45,
  885. help="NMS IoU 阈值 (默认:0.45)"
  886. )
  887. parser.add_argument(
  888. "--voxel-size",
  889. type=float,
  890. default=0.03,
  891. help="点云体素大小 (默认:0.03)"
  892. )
  893. parser.add_argument(
  894. "--depth-scale",
  895. type=float,
  896. default=256.0,
  897. help="深度图缩放因子 (默认:256.0)"
  898. )
  899. parser.add_argument(
  900. "--ground-y",
  901. type=float,
  902. default=None,
  903. help="地面 Y 坐标 (默认:从点云自动估计)"
  904. )
  905. parser.add_argument(
  906. "--door-height-min",
  907. type=float,
  908. default=1.0,
  909. help="门最小高度 (默认:1.0 米)"
  910. )
  911. parser.add_argument(
  912. "--door-height-max",
  913. type=float,
  914. default=3.0,
  915. help="门最大高度 (默认:3.0 米)"
  916. )
  917. parser.add_argument(
  918. "--door-width-min",
  919. type=float,
  920. default=0.3,
  921. help="门最小宽度 (默认:0.3 米)"
  922. )
  923. parser.add_argument(
  924. "--door-width-max",
  925. type=float,
  926. default=3.0,
  927. help="门最大宽度 (默认:3.0 米)"
  928. )
  929. parser.add_argument(
  930. "--door-thickness-max",
  931. type=float,
  932. default=0.5,
  933. help="门最大厚度 (默认:0.5 米)"
  934. )
  935. parser.add_argument(
  936. "--ground-dist-thresh",
  937. type=float,
  938. default=0.5,
  939. help="门底部距地面最大距离 (默认:0.5 米)"
  940. )
  941. args = parser.parse_args()
  942. if not Path(args.scene).exists():
  943. print(f"❌ 场景文件夹不存在:{args.scene}")
  944. return
  945. processor = SceneProcessor(
  946. scene_folder=args.scene,
  947. model_path=args.model,
  948. conf=args.conf,
  949. iou=args.iou,
  950. voxel_size=args.voxel_size,
  951. depth_scale=args.depth_scale,
  952. ground_y=args.ground_y,
  953. )
  954. # 更新过滤参数
  955. processor.DOOR_HEIGHT_MIN = args.door_height_min
  956. processor.DOOR_HEIGHT_MAX = args.door_height_max
  957. processor.DOOR_WIDTH_MIN = args.door_width_min
  958. processor.DOOR_WIDTH_MAX = args.door_width_max
  959. processor.DOOR_THICKNESS_MAX = args.door_thickness_max
  960. processor.GROUND_DIST_THRESH = args.ground_dist_thresh
  961. processor.process_scene()
  962. if __name__ == "__main__":
  963. main()