Skip to content

Commit 554fd97

Browse files
kew6688wangyukai
andauthored
[Feature] Support vis_debug option for Habitat evaluation (#265)
* add vis debug in habitat * fix draw pixel bug * update default config setting --------- Co-authored-by: wangyukai <wangyukai@pjlab.org.cn>
1 parent b8d36a9 commit 554fd97

File tree

3 files changed

+66
-0
lines changed

3 files changed

+66
-0
lines changed

internnav/habitat_extensions/vln/habitat_vln_evaluator.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
import cv2
1515
import habitat
16+
import imageio
1617
import numpy as np
1718
import quaternion
1819
import torch
@@ -105,6 +106,10 @@ def __init__(self, cfg: EvalCfg):
105106

106107
# ------------------------------------- model ------------------------------------------
107108
self.model_args = argparse.Namespace(**cfg.agent.model_settings)
109+
self.vis_debug = bool(getattr(self.model_args, "vis_debug", False))
110+
self.vis_debug_path = getattr(
111+
self.model_args, "vis_debug_path", os.path.join(self.output_path, "vis_debug")
112+
)
108113

109114
processor = AutoProcessor.from_pretrained(self.model_args.model_path)
110115
processor.tokenizer.padding_side = 'left'
@@ -288,9 +293,17 @@ def _run_eval_dual_system(self) -> tuple:
288293

289294
vis_frames = []
290295
step_id = 0
296+
vis_writer = None
291297

292298
if self.save_video:
293299
os.makedirs(os.path.join(self.output_path, f'vis_{self.epoch}', f'{scene_id}'), exist_ok=True)
300+
if self.vis_debug:
301+
debug_dir = os.path.join(self.vis_debug_path, f'epoch_{self.epoch}')
302+
os.makedirs(debug_dir, exist_ok=True)
303+
vis_writer = imageio.get_writer(
304+
os.path.join(debug_dir, f'{scene_id}_{episode_id:04d}.mp4'),
305+
fps=5,
306+
)
294307

295308
rgb_list = []
296309
action_seq = []
@@ -307,6 +320,7 @@ def _run_eval_dual_system(self) -> tuple:
307320

308321
# ---------- 2. Episode step loop -----------
309322
while (not done) and (step_id <= self.max_steps_per_episode):
323+
draw_pixel_goal = False
310324
# refactor agent get action
311325
rgb = observations["rgb"]
312326
depth = observations["depth"]
@@ -422,6 +436,7 @@ def _run_eval_dual_system(self) -> tuple:
422436
coord = [int(c) for c in re.findall(r'\d+', llm_outputs)]
423437

424438
pixel_goal = [int(coord[1]), int(coord[0])]
439+
draw_pixel_goal = True
425440

426441
# look down --> horizontal
427442
self.env.step(action_code.LOOKUP)
@@ -526,6 +541,24 @@ def _run_eval_dual_system(self) -> tuple:
526541

527542
print("step_id", step_id, "action", action)
528543

544+
if vis_writer is not None:
545+
vis = np.asarray(save_raw_image).copy()
546+
vis = cv2.putText(
547+
vis,
548+
f"step {step_id} action {int(action)}",
549+
(20, 40),
550+
cv2.FONT_HERSHEY_SIMPLEX,
551+
1,
552+
(0, 255, 0),
553+
2,
554+
)
555+
if pixel_goal is not None:
556+
if draw_pixel_goal:
557+
cv2.circle(
558+
vis, (pixel_goal[0], pixel_goal[1]), radius=8, color=(255, 0, 0), thickness=-1
559+
)
560+
vis_writer.append_data(vis)
561+
529562
if action == action_code.LOOKDOWN:
530563
self.env.step(action)
531564
observations, _, done, _ = self.env.step(action)
@@ -586,6 +619,8 @@ def _run_eval_dual_system(self) -> tuple:
586619
quality=9,
587620
)
588621
vis_frames.clear()
622+
if vis_writer is not None:
623+
vis_writer.close()
589624

590625
self.env.close()
591626

@@ -643,9 +678,17 @@ def _run_eval_system2(self) -> tuple:
643678

644679
vis_frames = []
645680
step_id = 0
681+
vis_writer = None
646682

647683
if self.save_video:
648684
os.makedirs(os.path.join(self.output_path, f'vis_{self.epoch}', f'{scene_id}'), exist_ok=True)
685+
if self.vis_debug:
686+
debug_dir = os.path.join(self.vis_debug_path, f'epoch_{self.epoch}')
687+
os.makedirs(debug_dir, exist_ok=True)
688+
vis_writer = imageio.get_writer(
689+
os.path.join(debug_dir, f'{scene_id}_{episode_id:04d}.mp4'),
690+
fps=5,
691+
)
649692
initial_height = self.env._env.sim.get_agent_state().position[1]
650693

651694
rgb_list = []
@@ -662,6 +705,7 @@ def _run_eval_system2(self) -> tuple:
662705

663706
# ---------- 2. Episode step loop -----------
664707
while (not done) and (step_id <= self.max_steps_per_episode):
708+
draw_pixel_goal = False
665709
# refactor agent get action
666710
rgb = observations["rgb"]
667711
depth = observations["depth"]
@@ -755,6 +799,7 @@ def _run_eval_system2(self) -> tuple:
755799
coord = [int(c) for c in re.findall(r'\d+', llm_outputs)]
756800

757801
pixel_goal = [int(coord[1]), int(coord[0])]
802+
draw_pixel_goal = True
758803

759804
# look down --> horizontal
760805
self.env.step(action_code.LOOKUP)
@@ -818,6 +863,21 @@ def _run_eval_system2(self) -> tuple:
818863

819864
print("step_id", step_id, "action", action)
820865

866+
if vis_writer is not None:
867+
vis = np.asarray(save_raw_image).copy()
868+
vis = cv2.putText(
869+
vis,
870+
f"step {step_id} action {int(action)}",
871+
(20, 40),
872+
cv2.FONT_HERSHEY_SIMPLEX,
873+
1,
874+
(0, 255, 0),
875+
2,
876+
)
877+
if draw_pixel_goal:
878+
cv2.circle(vis, (pixel_goal[0], pixel_goal[1]), radius=8, color=(255, 0, 0), thickness=-1)
879+
vis_writer.append_data(vis)
880+
821881
if action == action_code.LOOKDOWN:
822882
self.env.step(action)
823883
observations, _, done, _ = self.env.step(action)
@@ -875,6 +935,8 @@ def _run_eval_system2(self) -> tuple:
875935
quality=9,
876936
)
877937
vis_frames.clear()
938+
if vis_writer is not None:
939+
vis_writer.close()
878940

879941
self.env.close()
880942

scripts/eval/configs/habitat_dual_system_cfg.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
"resize_w": 384, # image resize width
1212
"resize_h": 384, # image resize height
1313
"max_new_tokens": 1024, # maximum number of tokens for generation
14+
"vis_debug": False, # If vis_debug=True, save debug videos per episode
15+
"vis_debug_path": "./logs/habitat/vis_debug",
1416
},
1517
),
1618
env=EnvCfg(

scripts/eval/configs/habitat_s2_cfg.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
"resize_w": 384, # image resize width
1212
"resize_h": 384, # image resize height
1313
"max_new_tokens": 1024, # maximum number of tokens for generation
14+
"vis_debug": False, # If vis_debug=True, save debug videos per episode
15+
"vis_debug_path": "./logs/habitat/vis_debug",
1416
},
1517
),
1618
env=EnvCfg(

0 commit comments

Comments
 (0)