From 86ceb5d666c4388db3f8215e58b37c35120485e7 Mon Sep 17 00:00:00 2001
From: Brian Roland <broland@nvidia.com>
Date: Sun, 31 Aug 2025 21:22:52 +0000
Subject: [PATCH 01/12] br: emacs

Signed-off-by: Brian Roland <broland@nvidia.com>
---
 Dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Dockerfile b/Dockerfile
index 59ef90009..fba97c10a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -49,6 +49,7 @@ apt-get install -qyy \
   curl \
   pre-commit \
   sudo \
+  emacs-nox \
   gnupg \
   unzip \
   libsqlite3-dev

From 3d9e8b615ac6d14ac6e5b5f2f26c5ae386b40347 Mon Sep 17 00:00:00 2001
From: Brian Roland <broland@nvidia.com>
Date: Sun, 31 Aug 2025 22:51:45 +0000
Subject: [PATCH 02/12] br: printing output

Signed-off-by: Brian Roland <broland@nvidia.com>
---
 3rdparty/NeMo                                           | 2 +-
 sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py | 8 ++++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/3rdparty/NeMo b/3rdparty/NeMo
index 7ccb0d4c5..654437102 160000
--- a/3rdparty/NeMo
+++ b/3rdparty/NeMo
@@ -1 +1 @@
-Subproject commit 7ccb0d4c5544dbcc454930acb3a1fe29d9db5090
+Subproject commit 654437102f3ac09cd0ba69ae78d8f5c0576d8239
diff --git a/sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py b/sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py
index 4be3a8d0b..f04725669 100644
--- a/sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py
+++ b/sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py
@@ -630,7 +630,7 @@ def train(args: argparse.Namespace) -> nl.Trainer:
         lora_transform = None
         if args.lora_finetune:
             lora_transform = Evo2LoRA(peft_ckpt_path=args.lora_checkpoint_path)
-
+        print("********************train: init llm.HyenaModel*******")
         model = llm.HyenaModel(model_config, tokenizer=data_module.tokenizer, model_transform=lora_transform)
     else:  # mamba
         if args.no_weight_decay_embeddings:
@@ -855,6 +855,7 @@ def train(args: argparse.Namespace) -> nl.Trainer:
         ),
         val_check_interval=args.val_check_interval,
         enable_checkpointing=args.create_checkpoint_callback,
+        enable_progress_bar=True,
     )
 
     # Logger setup
@@ -892,15 +893,18 @@ def train(args: argparse.Namespace) -> nl.Trainer:
     opt = MegatronOptimizerModule(opt_config, sched, no_weight_decay_cond=model_config.hyena_no_weight_decay_cond_fn)
     opt.connect(model)
     # Start training
+    print("*******************train: before trainer.fit")
     trainer.fit(model, data_module)
+    print("*******************train: after trainer.fit")
     return trainer
 
 
 def main():
     """Parsing args and running evo2 training."""
     args = parse_args()
+    print("*******************main: before train")
     train(args=args)
-
+    print("*******************main: after train")
 
 if __name__ == "__main__":
     main()

From 46b299059fced9f5a689e3c250b8cadb5cab89bb Mon Sep 17 00:00:00 2001
From: Brian Roland <broland@nvidia.com>
Date: Mon, 1 Sep 2025 17:34:52 +0000
Subject: [PATCH 03/12] br: added scripts [skip ci]

Signed-off-by: Brian Roland <broland@nvidia.com>
---
 internal/scripts_br/build_dev_image_br.sh     |  37 +++++
 internal/scripts_br/install_tap.sh            |  14 ++
 internal/scripts_br/run_dev_br.sh             | 146 ++++++++++++++++++
 internal/scripts_br/run_evo2_train.sh         |  89 +++++++++++
 internal/scripts_br/run_mig_br.sh             |  46 ++++++
 .../scripts_br/run_nsys_with_evo2_train.sh    | 108 +++++++++++++
 internal/scripts_br/run_precommit.sh          |  43 ++++++
 internal/scripts_br/run_pytest.sh             |  51 ++++++
 internal/scripts_br/run_simple_torch_app.py   |  45 ++++++
 .../scripts_br/run_tap_with_evo2_train.sh     | 122 +++++++++++++++
 .../scripts_br/run_update_git_submodules.sh   |   2 +
 .../scripts_br/show_git_submodule_config.sh   |   2 +
 internal/scripts_br/venv_create.sh            |  67 ++++++++
 13 files changed, 772 insertions(+)
 create mode 100755 internal/scripts_br/build_dev_image_br.sh
 create mode 100755 internal/scripts_br/install_tap.sh
 create mode 100755 internal/scripts_br/run_dev_br.sh
 create mode 100755 internal/scripts_br/run_evo2_train.sh
 create mode 100755 internal/scripts_br/run_mig_br.sh
 create mode 100755 internal/scripts_br/run_nsys_with_evo2_train.sh
 create mode 100755 internal/scripts_br/run_precommit.sh
 create mode 100755 internal/scripts_br/run_pytest.sh
 create mode 100644 internal/scripts_br/run_simple_torch_app.py
 create mode 100755 internal/scripts_br/run_tap_with_evo2_train.sh
 create mode 100755 internal/scripts_br/run_update_git_submodules.sh
 create mode 100755 internal/scripts_br/show_git_submodule_config.sh
 create mode 100755 internal/scripts_br/venv_create.sh

diff --git a/internal/scripts_br/build_dev_image_br.sh b/internal/scripts_br/build_dev_image_br.sh
new file mode 100755
index 000000000..03eaa9aae
--- /dev/null
+++ b/internal/scripts_br/build_dev_image_br.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+
+# ------------------------------------------------------------------------
+# (0) preamble
+# ------------------------------------------------------------------------
+MESSAGE_TEMPLATE='********build_dev_image_br.sh: %s\n'
+DATE_OF_SCRIPT=$(date +'%Y%m%dT%H%M%S')
+SCRIPT_DIR="$(dirname "$(realpath "$BASH_SOURCE")")"
+printf "${MESSAGE_TEMPLATE}" "SCRIPT_DIR=${SCRIPT_DIR}"
+printf "${MESSAGE_TEMPLATE}" "hostname=$(hostname)"
+printf "${MESSAGE_TEMPLATE}" "whoami=$(whoami)"
+printf "${MESSAGE_TEMPLATE}" "uid=$(id -u)"
+printf "${MESSAGE_TEMPLATE}" "gid=$(id -g)"
+
+set -euo pipefail
+
+BRANCH=$(git rev-parse --abbrev-ref HEAD)
+COMMIT=$(git rev-parse --short HEAD)
+DATE=$(date --iso-8601=seconds -u)
+
+set -x
+DOCKER_BUILDKIT=1 docker buildx build \
+  -t "nvcr.io/nvidian/cvai_bnmo_trng/bionemo:dev-${BRANCH}-${DATE_OF_SCRIPT}-${COMMIT}" \
+  --ulimit 'nofile=65535:65535' \
+  --target="development" \
+  --load \
+  --cache-from nvcr.io/nvidia/clara/bionemo-framework:nightly \
+  --cache-to type=inline \
+  --label com.nvidia.bionemo.git_sha=${COMMIT} \
+  --label com.nvidia.bionemo.created_at=${DATE} \
+  -f ./Dockerfile \
+  .
+
+# ----------------------
+# (-1) post-amble
+# --------------------------
+printf "${MESSAGE_TEMPLATE}" "end script"
\ No newline at end of file
diff --git a/internal/scripts_br/install_tap.sh b/internal/scripts_br/install_tap.sh
new file mode 100755
index 000000000..b8771128f
--- /dev/null
+++ b/internal/scripts_br/install_tap.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+# prelim required by release version of TAP
+mkdir -p /workspace/bionemo2/.local/lib/python3.12/site-packages/
+touch /workspace/bionemo2/.local/lib/python3.12/site-packages/usercustomize.py
+
+# install from gitlab server
+pip install git+https://gitlab-master.nvidia.com/dl/gwe/torch_automated_profiler@release
+
+# RUN --mount=type=ssh cd /opt && git clone ssh://git@gitlab-master.nvidia.com:12051/dl/gwe/torch_automated_profiler.git\
+#     && cd torch_automated_profiler\
+#     && git fetch origin br_max_depth_1\
+#     && git checkout -b br_max_depth_1 origin/br_max_depth_1\
+#     && pip install -e . -v
\ No newline at end of file
diff --git a/internal/scripts_br/run_dev_br.sh b/internal/scripts_br/run_dev_br.sh
new file mode 100755
index 000000000..3e66b1995
--- /dev/null
+++ b/internal/scripts_br/run_dev_br.sh
@@ -0,0 +1,146 @@
+#!/usr/bin/env bash
+
+# ------------------------------------------------------------------------
+# (0) preamble
+# ------------------------------------------------------------------------
+MESSAGE_TEMPLATE='********run_dev_br.sh: %s\n'
+DATE_OF_SCRIPT=$(date +'%Y%m%dT%H%M%S')
+SCRIPT_DIR="$(dirname "$(realpath "$BASH_SOURCE")")"
+printf "${MESSAGE_TEMPLATE}" "SCRIPT_DIR=${SCRIPT_DIR}"
+printf "${MESSAGE_TEMPLATE}" "hostname=$(hostname)"
+printf "${MESSAGE_TEMPLATE}" "whoami=$(whoami)"
+printf "${MESSAGE_TEMPLATE}" "uid=$(id -u)"
+printf "${MESSAGE_TEMPLATE}" "gid=$(id -g)"
+
+
+#set -euo pipefail
+
+source .env
+
+
+# -----------------------------------------------------
+# (1) user paramerters
+# -----------------------------------------------------
+USER_IN_CTR=root                    # if profiling, run as root
+HOME_IN_CTR=/opt/${USER_IN_CTR}
+
+#GPU_ARG='--gpus "\"device=0,1,2,3,4,5,6,7\""'
+GPU_ARG='--gpus all'
+LOCAL_RESULTS_PATH="/home/scratch.broland_sw_1/data_for_projects/evo2/results/bionemo2_results"
+LOCAL_DATA_PATH="./data"
+LOCAL_MODELS_PATH="./models"
+
+COMMIT_AT_START=$(git rev-parse --short HEAD)
+BRANCH_AT_START=$(git rev-parse --abbrev-ref HEAD)
+IMAGE_REPO='nvcr.io/nvidian/cvai_bnmo_trng/bionemo'
+IMAGE_TAG='dev-br_bnm2532_dlsim_val_in_fw_a-20250831T164028-a29272f1'
+IMAGE_NAME="${IMAGE_REPO}:${IMAGE_TAG}"
+
+DOCKER_REPO_PATH="/workspace/bionemo2"
+DOCKER_RESULTS_PATH="/workspace/bionemo2/results"
+DOCKER_MODELS_PATH="/workspace/bionemo2/models"
+DOCKER_DATA_PATH="/workspace/bionemo2/data"
+
+# -----------------------------------------------------
+# (2) santity checks
+# ----------------------------------------------------
+LOCAL_REPO_PATH="$(realpath $(pwd))"
+if [[ "$(basename ${LOCAL_REPO_PATH})" != *"bionemo-framework"* ]]; then
+    echo "ERROR: must run this script from the bionemo repository root!"
+    exit 1
+fi
+
+# ---------------------------------------------------------------------
+# (3) make expected directories in external filesystem as user, not as docker
+# ----------------------------------------------------
+expected_local_dirs=("${LOCAL_RESULTS_PATH}" "${LOCAL_DATA_PATH}" "${LOCAL_MODELS_PATH}" "./htmlcov")
+for expected_local_dir in "${expected_local_dirs[@]}"; do
+    printf "${MESSAGE_TEMPLATE}" "expected_local_dir=${expected_local_dir}"
+    mkdir -p "${expected_local_dir}"
+    chmod -R a+rw "${expected_local_dir}"
+done
+
+# ---------------------------------------------------------------------
+# (4) delete external directories with state
+# ----------------------------------------------------
+sudo rm -rf ${LOCAL_RESULTS_PATH}/evo2
+
+# -----------------------------------------------------
+# (5) assemble docker run command
+# ----------------------------------------------------
+
+printf "${MESSAGE_TEMPLATE}" "create DOCKER_RUN_COMMAND"
+
+read -r -d '' DOCKER_RUN_OPTIONS_FOR_PROFILING <<EOF
+    --user ${USER_IN_CTR} \\
+    --cap-add=SYS_ADMIN \\
+    --cap-add=SYS_PTRACE \\
+    --cap-add=PERFMON \\
+    --security-opt seccomp=unconfined \\
+    --privileged
+EOF
+
+read -r -d '' SECRETS<<EOF
+    -e WANDB_API_KEY=$WANDB_API_KEY
+EOF
+
+read -r -d '' DOCKER_RUN_OPTIONS <<EOF
+    -u $(id -u):$(id -g) \\
+    --rm \\
+    -it \\
+    ${DOCKER_RUN_OPTIONS_FOR_PROFILING} \\
+    --network host \\
+    ${GPU_ARG} \\
+    -p ${JUPYTER_PORT}:8888 \\
+    --shm-size=64g \\
+    -e TMPDIR=/tmp/ \\
+    -e BRANCH_AT_START=${BRANCH_AT_START} \\
+    -e COMMIT_AT_START=${COMMIT_AT_START} \\
+    -e NUMBA_CACHE_DIR=/tmp/ \\
+    -e HOME=${DOCKER_REPO_PATH} \\
+    -v ${HOME}/.bash_aliases:${HOME_IN_CTR}/.bash_aliases \\
+    -w ${DOCKER_REPO_PATH} \\
+    -v ${LOCAL_RESULTS_PATH}:${DOCKER_RESULTS_PATH} \\
+    -v ${LOCAL_DATA_PATH}:${DOCKER_DATA_PATH} \\
+    -v ${LOCAL_MODELS_PATH}:${DOCKER_MODELS_PATH} \\
+    -v /etc/passwd:/etc/passwd:ro \\
+    -v /etc/group:/etc/group:ro \\
+    -v /etc/shadow:/etc/shadow:ro \\
+    -v ${HOME}/.ssh:${DOCKER_REPO_PATH}/.ssh:ro \\
+    -v ${LOCAL_REPO_PATH}/htmlcov:/${DOCKER_REPO_PATH}/htmlcov \\
+    -v ${LOCAL_REPO_PATH}:${DOCKER_REPO_PATH} \\
+    -e NGC_CLI_ORG \\
+    -e NGC_CLI_TEAM \\
+    -e NGC_CLI_FORMAT_TYPE \\
+    -e NGC_CLI_API_KEY \\
+    -e AWS_ENDPOINT_URL \\
+    -e AWS_REGION \\
+    -e AWS_ACCESS_KEY_ID \\
+    -e AWS_SECRET_ACCESS_KEY
+EOF
+read -r -d '' DOCKER_RUN_WITHOUT_SECRETS <<EOF
+docker run \\
+    ${DOCKER_RUN_OPTIONS} \\
+    ${IMAGE_NAME} \\
+    bash --rcfile ${HOME_IN_CTR}/.bash_aliases
+EOF
+
+read -r -d '' DOCKER_RUN_WITH_SECRETS <<EOF
+docker run \\
+    ${DOCKER_RUN_OPTIONS} \\
+    ${SECRETS} \\
+    ${IMAGE_NAME} \\
+    bash --rcfile ${HOME_IN_CTR}/.bash_aliases
+EOF
+
+# -----------------------------------------------------
+# (5) run docker run command
+# ----------------------------------------------------
+printf "${MESSAGE_TEMPLATE}" "DOCKER_RUN_WITHOUT_SECRETS=${DOCKER_RUN_WITHOUT_SECRETS}"
+eval "$DOCKER_RUN_WITH_SECRETS"
+
+# -----------------------------------------------------
+# (-1) summarize
+# ----------------------------------------------------
+
+printf "${MESSAGE_TEMPLATE}" "run_dev_br.sh: end script----"
diff --git a/internal/scripts_br/run_evo2_train.sh b/internal/scripts_br/run_evo2_train.sh
new file mode 100755
index 000000000..93b057f20
--- /dev/null
+++ b/internal/scripts_br/run_evo2_train.sh
@@ -0,0 +1,89 @@
+#!/bin/bash
+
+# ----------------------------------------
+# (0) preamble
+# ----------------------------------------
+MESSAGE_TEMPLATE='********run_evo2_train.sh: %s\n'
+DATE_OF_SCRIPT=$(date +'%Y%m%dT%H%M')
+WHOAMI="$(whoami)"
+SCRIPT_DIR="$(dirname "$(realpath "$BASH_SOURCE")")"
+printf "${MESSAGE_TEMPLATE}" "begin"
+printf "${MESSAGE_TEMPLATE}" "DATE_OF_SCRIPT=${DATE_OF_SCRIPT}"
+printf "${MESSAGE_TEMPLATE}" "WHOAMI=${WHOAMI}"
+
+# ----------------------------------------
+# (1) set some user parameters
+# ----------------------------------------
+RESULTS_DIR="./results"  # i.e. /workspace/bionemo2/results
+RESULTS_THIS_APP_DIR="${RESULTS_DIR}/run_evo2_train"
+
+RUN_LABEL_PREFIX="bionemo_evo2_train"
+PYTHON_SCRIPT_PATH=sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py
+
+TRAIN_ARGS_ARRAY=(
+    "--mock-data" 
+    "--seq-length"
+    "256" 
+    "--micro-batch-size"
+    "1" 
+    "--model-size"
+    "test"
+    "--max-steps"
+    "40" 
+    "--context-parallel-size"
+    "1"
+    "--devices"
+    "1"
+)
+
+# ----------------------------------------
+# (2) dump parameters
+# ----------------------------------------
+printf "${MESSAGE_TEMPLATE}" "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}"
+
+# ---------------------------------------------------
+# (3) purge training app state
+#   - delete / move dirs with training application state
+# ----------------------------------------------------
+if [[ -d "${RESULTS_DIR}/tmp/evo2" ]]; then
+    rm -rf "${RESULTS_DIR}/tmp/evo2"
+    printf "${MESSAGE_TEMPLATE}" "rm -rf ${RESULTS_DIR}/tmp/evo2, end"
+fi
+if [[ -d "${RESULTS_DIR}/evo2" ]]; then
+    mv "${RESULTS_DIR}/evo2" "${RESULTS_DIR}/tmp/"
+    printf "${MESSAGE_TEMPLATE}" "mv ${RESULTS_DIR}/evo2 ${RESULTS_DIR}/tmp/, end"
+fi
+# ----------------------------------------
+# (4) create output dirs and file names
+# ----------------------------------------
+run_label_arr=(
+    ${RUN_LABEL_PREFIX}
+    ${BRANCH_AT_START}
+    ${DATE_OF_SCRIPT}
+    ${COMMIT_AT_START}
+)
+RUN_LABEL=$(IFS='_'; echo "${run_label_arr[*]}")
+printf "${MESSAGE_TEMPLATE}" "RUN_LABEL=${RUN_LABEL}"
+
+RESULTS_THIS_APP_THIS_RUN_DIR="${RESULTS_THIS_APP_DIR}/${RUN_LABEL}"
+mkdir -p ${RESULTS_THIS_APP_THIS_RUN_DIR}
+chmod a+rw ${RESULTS_THIS_APP_THIS_RUN_DIR}
+
+LOG_FILE="${RESULTS_THIS_APP_THIS_RUN_DIR}/${RUN_LABEL}.log"
+
+
+# ----------------------------------------
+# (5) create python training script comman
+# ---------------------------------------
+read -r -d '' PY_COMMAND <<EOF
+python -u ${PYTHON_SCRIPT_PATH} ${TRAIN_ARGS_ARRAY[@]} 2>&1 | tee -a ${LOG_FILE}
+EOF
+
+printf "${MESSAGE_TEMPLATE}" "PY_COMMAND=${PY_COMMAND}"
+eval "${PY_COMMAND}"
+
+# ----------------------------------------
+# (-1) post-amble
+# ----------------------------------------
+printf "${MESSAGE_TEMPLATE}" "LOG_FILE=${LOG_FILE}"
+printf "${MESSAGE_TEMPLATE}" "end with success"
\ No newline at end of file
diff --git a/internal/scripts_br/run_mig_br.sh b/internal/scripts_br/run_mig_br.sh
new file mode 100755
index 000000000..446615479
--- /dev/null
+++ b/internal/scripts_br/run_mig_br.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+
+# profiles are specified to each GPU, e.g. profile 15 can be used to divide into 4 devices of size 20gb
+# NVIDIA H100 80GB HBM3
+#| => sudo nvidia-smi mig -i 3 -cgi 15 -C
+#Successfully created GPU instance ID  5 on GPU  3 using profile MIG 1g.20gb (ID 15)
+#Successfully created compute instance ID  0 on GPU  3 GPU instance ID  5 using profile MIG 1g.20gb (ID  7)
+
+
+
+
+# GPU 0: NVIDIA H100 80GB HBM3 (UUID: GPU-afddd1b4-4464-96c8-a712-aaeb0acf1170)  # cudo 0 on torch
+# GPU 1: NVIDIA H100 80GB HBM3 (UUID: GPU-6faf0136-7870-5767-10be-a0827a158829)
+# GPU 2: NVIDIA H100 80GB HBM3 (UUID: GPU-20d20fc3-bcc7-e715-32d6-ffd646ea062f)
+# GPU 3: NVIDIA H100 80GB HBM3 (UUID: GPU-182e6bd5-b7ac-e0a6-48cf-96e198063dd3)
+#   MIG 1g.20gb     Device  0: (UUID: MIG-56679450-0984-50db-83a3-7e549eb60883)  # cudo 4 on torch
+#   MIG 1g.20gb     Device  1: (UUID: MIG-a155b8d5-2484-52fc-a2ed-e47dc89996cd)
+#   MIG 1g.20gb     Device  2: (UUID: MIG-9dc27b3c-b567-5802-a2a7-27ad657ab079)
+#   MIG 1g.20gb     Device  3: (UUID: MIG-f6102e7f-bbf5-5db4-abea-156619dd4ce2)
+
+
+
+# Split into to 40gb device sudo nvidia-smi mig -i 5 -cgi 5,5
+
+# (0) choose a device
+DEVICE_INDEX_FOR_MIG=1
+PROFILE=15
+PROFILE=9 # 
+
+# (1) show all gpu indices, uuids,  and product names
+nvidia-smi -L
+
+# (2) list all MIG instances
+sudo nvidia-smi mig -lgi
+
+# (3) activate multi-instance gpu for 
+sudo nvidia-smi --id ${DEVICE_INDEX_FOR_MIG} -mig 1
+
+# split device with index 0 into 3 compute instances
+for i in {0..3}; do
+    sudo nvidia-smi mig --id ${DEVICE_INDEX_FOR_MIG} -cgi ${PROFILE} -C
+done
+
+# show all gpu indices, uuids, and produce names
+nvidia-smi -L
diff --git a/internal/scripts_br/run_nsys_with_evo2_train.sh b/internal/scripts_br/run_nsys_with_evo2_train.sh
new file mode 100755
index 000000000..e59cf0fb8
--- /dev/null
+++ b/internal/scripts_br/run_nsys_with_evo2_train.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+#
+#
+# nsys option like --pytorch function-trace: 
+#   nvtx markers for common torch operations at the pytorch level like torch.Tensor.to
+#
+# nsys option like --pytorch autograd-shapes-nvtx: 
+#   nvtx markers for common torch operations at the kernel level like "to", "to_copy"
+#
+
+
+# ----------------------------------------
+# (0) preamble
+# ----------------------------------------
+MESSAGE_TEMPLATE='********run_evo2_train.sh: %s\n'
+DATE_OF_SCRIPT=$(date +'%Y%m%dT%H%M')
+SCRIPT_DIR="$(dirname "$(realpath "$BASH_SOURCE")")"
+printf "${MESSAGE_TEMPLATE}" "begin"
+printf "${MESSAGE_TEMPLATE}" "DATE_OF_SCRIPT=${DATE_OF_SCRIPT}"
+
+# ----------------------------------------
+# (1) set some user parameters
+# ----------------------------------------
+RESULTS_DIR="./results/run_nsys_with_evo2_train"
+
+read -r -d '' NSYS_PROFILE_OPTIONS <<EOF
+    -s none \\
+    --trace=cuda,nvtx \\
+    --pytorch autograd-shapes-nvtx,functions-trace \\
+    --force-overwrite true
+EOF
+
+TRAIN_ARGS_ARRAY=(
+    "--nsys-profiling"
+    "--nsys-start-step"
+    "20"
+    "--nsys-end-step"
+    "28"
+    "--mock-data"
+    "--seq-length"
+    "256"
+    "--micro-batch-size"
+    "1"
+    "--model-size"
+    "test"
+    "--max-steps"
+    "30"
+    "--context-parallel-size"
+    "1"
+    "--devices"
+    "1"
+)
+
+RUN_LABEL_PREFIX="nsys_bionemo_evo2_train"
+PYTHON_SCRIPT_PATH=sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py
+
+# ----------------------------------------
+# (2) dump parameters
+# ----------------------------------------
+printf "${MESSAGE_TEMPLATE}" "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}"
+
+# ----------------------------------------
+# (3) create log file name and report filename
+# ----------------------------------------
+run_label_arr=(
+    ${RUN_LABEL_PREFIX}
+    "mock-data"
+    ${BRANCH_AT_START}
+    ${DATE_OF_SCRIPT}
+    ${COMMIT_AT_START}
+)
+RUN_LABEL="$(IFS='_'; echo "${run_label_arr[*]}")"
+
+RESULTS_SUBDIR="${RESULTS_DIR}/"nsys"/${RUN_LABEL}"
+mkdir -p "${RESULTS_SUBDIR}"
+chmod a+r "${RESULTS_SUBDIR}"
+
+LOG_FILE="${RESULTS_SUBDIR}/${RUN_LABEL}.log"
+REPORT_FILE="${RESULTS_SUBDIR}/${RUN_LABEL}.nsys-rep"
+
+# ----------------------------------------
+# (4) create command
+# ----------------------------------------
+APPLICATION_TO_PROFILE="python ${PYTHON_SCRIPT_PATH} ${TRAIN_ARGS_ARRAY[@]}"
+
+read -r -d '' NSYS_PROFILE_CMD <<EOF
+nsys profile \\
+    -o ${REPORT_FILE} \\
+    ${NSYS_PROFILE_OPTIONS} \\
+    ${APPLICATION_TO_PROFILE} | tee -a ${LOG_FILE}
+EOF
+# ----------------------------------------
+# (5) run command
+# ----------------------------------------
+printf "\n"
+printf "${MESSAGE_TEMPLATE}" "nsys version: $(nsys --version)"
+
+printf "\n"
+printf "${MESSAGE_TEMPLATE}" "APPLICATION_TO_PROFILE=${APPLICATION_TO_PROFILE}"
+printf "\n"
+printf "${MESSAGE_TEMPLATE}" "NSYS_PROFILE_CMD"
+echo "${NSYS_PROFILE_CMD}"
+eval "${NSYS_PROFILE_CMD}"
+
+# ----------------------------------------
+# (-1) post-amble
+# ----------------------------------------
+printf "${MESSAGE_TEMPLATE}" "end with success"
\ No newline at end of file
diff --git a/internal/scripts_br/run_precommit.sh b/internal/scripts_br/run_precommit.sh
new file mode 100755
index 000000000..639d4c193
--- /dev/null
+++ b/internal/scripts_br/run_precommit.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+# comment
+
+
+# files_to_check=(
+#     src/boltz/distributed/model/layers/distribute_module_tools.py
+#     src/boltz/distributed/model/layers/swiglu.py
+#     src/boltz/distributed/model/layers/mult_for_same_placement_and_shape.py
+#     tests/model/layers/test_dtensor_swiglu.py
+#     src/boltz/testing/utils.py
+# )
+# files_to_check=(
+#     src/boltz/distributed/model/layers/distribute_module_tools.py
+#     src/boltz/distributed/model/layers/layernorm.py
+#     tests/distributed/test_dtensor_layernorm.py
+# )
+
+# files_to_check=(
+#     src/boltz/distributed/model/layers/attention.py
+#     src/boltz/distributed/model/layers/attention_impl.py
+#     src/boltz/distributed/model/layers/distribute_module_tools.py
+#     src/boltz/distributed/model/layers/dtensor_metadata_tools.py
+#     tests/distributed/model/layers/test_attention_with_dtensor_for_pairformer_use_case.py
+# )
+
+# files_to_check=(
+#     src/boltz/distributed/model/layers/dtensor_metadata_tools.py
+#     tests/distributed/test_dtensor_metadata_tools.py
+# )
+
+files_to_check=(
+    sub-packages/bionemo-testing/src/bionemo/testing/torch.py
+    sub-packages/bionemo-evo2/tests/bionemo/evo2/test_evo2.py
+    sub-packages/bionemo-evo2/tests/bionemo/evo2/conftest.py
+    sub-packages/bionemo-testing/tests/bionemo/testing/test_torch.py
+)
+
+
+for file in "${files_to_check[@]}"; do
+    echo "Checking $file"
+    pre-commit run --files $file
+done
+
diff --git a/internal/scripts_br/run_pytest.sh b/internal/scripts_br/run_pytest.sh
new file mode 100755
index 000000000..bd10c53a9
--- /dev/null
+++ b/internal/scripts_br/run_pytest.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# ----------------------------------------
+# (0) preamble
+# ----------------------------------------
+MESSAGE_TEMPLATE='********run_pytest.sh: %s\n'
+DATE_OF_SCRIPT=$(date +'%Y%m%dT%H%M')
+SCRIPT_DIR="$(dirname "$(realpath "$BASH_SOURCE")")"
+printf "${MESSAGE_TEMPLATE}" "begin"
+printf "${MESSAGE_TEMPLATE}" "DATE_OF_SCRIPT=${DATE_OF_SCRIPT}"
+
+# ----------------------------------------
+# (1) set some user parameters
+# ----------------------------------------
+#CUDA_VISIBLE_DEVICE_LIST=MIG-0e9a0f4b-dfee-5517-a54e-a73d5c450f24 # 20gb
+#CUDA_VISIBLE_DEVICE_LIST=MIG-08fb5198-a9d8-5984-b31a-a8e7044320d0 # 40gb
+export CUDA_VISIBLE_DEVICE_LIST=GPU-6f9dcb23-36a0-81a9-8942-78e5f07e3817 # gpu 0 with 80gb
+PYTEST_LOG_FILE_PREFIX="pytests_pr1058_unskip_evo2_tests"
+PYTEST_MARKERS=("not slow")
+#PYTEST_MARKERS=("not slow" "slow")
+#TEST_PATH=$(pwd)
+TEST_PATH=sub-packages/bionemo-evo2/tests/bionemo/evo2/test_evo2.py
+#TEST_PATH=sub-packages/bionemo-evo2/tests/bionemo/evo2/test_evo2.py::test_golden_values_top_k_logits_and_cosine_similarity_7b
+
+# ----------------------------------------
+# (2) dump parameters
+# ----------------------------------------
+printf "${MESSAGE_TEMPLATE}" "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}"
+
+# ----------------------------------------
+# (3) create log file name
+# ----------------------------------------
+TEST_PATH_LABEL="${TEST_PATH//\//-}"  # replace forward slashes with hypthens
+TEST_PATH_LABEL="${TEST_PATH_LABEL//::/__}"  # replace forward slashes with hypthens
+TEST_PATH_LABEL="${TEST_PATH_LABEL//.py/' '}"  # remove .py 
+
+for PYTEST_MARKER in "${PYTEST_MARKERS[@]}"; do
+
+    PYTEST_MARKER_LABEL="${PYTEST_MARKER// /}"
+    PYTEST_LOG_FILE="test_logs_for_evo2/${PYTEST_LOG_FILE_PREFIX}_${TEST_PATH_LABEL}_${PYTEST_MARKER_LABEL}_${BRANCH_AT_START}_${DATE_OF_SCRIPT}_${COMMIT_AT_START}.log"
+    PYTEST_COMMAND="pytest -s -v -m '${PYTEST_MARKER}' ${TEST_PATH} | tee -a ${PYTEST_LOG_FILE}"
+    printf "${MESSAGE_TEMPLATE}" "PYTEST_COMMAND=${PYTEST_COMMAND}"
+    eval "${PYTEST_COMMAND}"
+
+done
+
+
+# ----------------------------------------
+# (-1) post-amble
+# ----------------------------------------
+printf "${MESSAGE_TEMPLATE}" "end with success"
\ No newline at end of file
diff --git a/internal/scripts_br/run_simple_torch_app.py b/internal/scripts_br/run_simple_torch_app.py
new file mode 100644
index 000000000..6af0a7317
--- /dev/null
+++ b/internal/scripts_br/run_simple_torch_app.py
@@ -0,0 +1,45 @@
+import torch
+import torch.profiler
+
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+# Define a simple module: Linear -> ReLU -> Linear
+class SimpleModel(nn.Module):
+    def __init__(self, input_size=10, hidden_size=20, output_size=5):
+        super(SimpleModel, self).__init__()
+        self.fc1 = nn.Linear(input_size, hidden_size)
+        self.fc2 = nn.Linear(hidden_size, output_size)
+
+    def forward(self, x):
+        x = F.relu(self.fc1(x))
+        x = self.fc2(x)
+        return x
+
+# Instantiate the model
+model = SimpleModel()
+
+# Generate random input data (batch_size=4, input_size=10)
+x = torch.randn(4, 10)
+
+
+with torch.profiler.profile(
+    activities=[torch.profiler.ProfilerActivity.CPU,
+                torch.profiler.ProfilerActivity.CUDA],
+    on_trace_ready=torch.profiler.tensorboard_trace_handler("./log"),
+    record_shapes=True,
+    with_stack=True
+) as prof:
+    for _ in range(5):
+        output = model(x)
+        
+        print("Input:", x)
+        print("Output:", output)
+
+print(
+    prof.key_averages(group_by_input_shape=True).table(
+        sort_by="cuda_time_total", row_limit=10
+    )
+)
diff --git a/internal/scripts_br/run_tap_with_evo2_train.sh b/internal/scripts_br/run_tap_with_evo2_train.sh
new file mode 100755
index 000000000..f22a8fb36
--- /dev/null
+++ b/internal/scripts_br/run_tap_with_evo2_train.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+#
+#
+# nsys option like --pytorch function-trace: 
+#   nvtx markers for common torch operations at the pytorch level like torch.Tensor.to
+#
+# nsys option like --pytorch autograd-shapes-nvtx: 
+#   nvtx markers for common torch operations at the kernel level like "to", "to_copy"
+#
+
+
+# ----------------------------------------
+# (0) preamble
+# ----------------------------------------
+MESSAGE_TEMPLATE='********run_evo2_train.sh: %s\n'
+DATE_OF_SCRIPT=$(date +'%Y%m%dT%H%M')
+SCRIPT_DIR="$(dirname "$(realpath "$BASH_SOURCE")")"
+printf "${MESSAGE_TEMPLATE}" "begin"
+printf "${MESSAGE_TEMPLATE}" "DATE_OF_SCRIPT=${DATE_OF_SCRIPT}"
+
+# ----------------------------------------
+# (1) set some user parameters
+# ----------------------------------------
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+export TAP_NSIGHT_LOCATION='/usr/local/cuda/bin/nsys'
+export TAP_LOG_LEVEL='debug'
+export TAP_MODE='nsight'     # '', nsight, or anna..... '' means ignore all profiling
+export TAP_NVTX='pytorch'       # pytorch, apex, python
+export TAP_BACKWARD_NVTX='false'   # true or false
+export TAP_PROFILE_MEMORY='false'
+export TAP_WAIT_STEPS='1'       # 2 is my default
+export TAP_WARMUP_STEPS='1'    # 12 is my default, 
+export TAP_ACTIVE_STEPS='4'     # 1 is my default
+export TAP_WAIT_EPOCHS='1'
+#!/usr/bin/env sh
+
+
+export TAP_EXIT_ON_STOP=true
+
+#export APP_NVTX_CATEGORIES='main,lit_module,dataset'
+export TAP_NSIGHT_FLAGS='--trace nvtx,cuda'
+#export TAP_MAX_DEPTH=14    # minimal is 4, since torch compile adds a ldevel,  default is 14
+
+
+RESULTS_DIR="./results/run_tap_with_evo2_train"
+
+TRAIN_ARGS_ARRAY=(
+    "--mock-data"
+    "--seq-length"
+    "256"
+    "--micro-batch-size"
+    "1"
+    "--model-size"
+    "test"
+    "--max-steps"
+    "30"
+    "--context-parallel-size"
+    "1"
+    "--devices"
+    "1"
+    "--val-check-interval" "0"
+)
+
+RUN_LABEL_PREFIX="tap_bionemo_evo2_train"
+PYTHON_SCRIPT_PATH=sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py
+
+# ----------------------------------------
+# (2) dump parameters
+# ----------------------------------------
+printf "${MESSAGE_TEMPLATE}" "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}"
+
+# ----------------------------------------
+# (3) create output dir names and file names
+#       - create log file name and report filename
+# ----------------------------------------
+run_label_arr=(
+    ${RUN_LABEL_PREFIX}
+    "mock-data"
+    ${BRANCH_AT_START}
+    ${DATE_OF_SCRIPT}
+    ${COMMIT_AT_START}
+)
+RUN_LABEL="$(IFS='_'; echo "${run_label_arr[*]}")"
+
+RESULTS_SUBDIR="${RESULTS_DIR}/${RUN_LABEL}"
+export TAP_SAVE_DIR="${RESULTS_SUBDIR}"
+mkdir -p "${RESULTS_SUBDIR}"
+chmod a+r "${RESULTS_SUBDIR}"
+
+LOG_FILE="${RESULTS_SUBDIR}/${RUN_LABEL}.log"
+REPORT_FILE="${RESULTS_SUBDIR}/${RUN_LABEL}.nsys-rep"
+
+# ----------------------------------------
+# (4) create command
+# ----------------------------------------
+APPLICATION_TO_PROFILE="python ${PYTHON_SCRIPT_PATH} ${TRAIN_ARGS_ARRAY[@]}"
+#APPLICATION_TO_PROFILE="python -c 'import torch; x= torch.ones(500)'"
+
+read -r -d '' TAP_PROFILE_CMD <<EOF
+${APPLICATION_TO_PROFILE} 2>&1 | tee -a ${LOG_FILE}
+EOF
+# ----------------------------------------
+# (5) run command
+# ----------------------------------------
+printf "\n"
+printf "${MESSAGE_TEMPLATE}" "nsys version: $(nsys --version)"
+
+printf "\n"
+printf "${MESSAGE_TEMPLATE}" "APPLICATION_TO_PROFILE=${APPLICATION_TO_PROFILE}"
+printf "\n"
+printf "${MESSAGE_TEMPLATE}" "TAP_PROFILE_CMD"
+echo "${TAP_PROFILE_CMD}"
+eval "${TAP_PROFILE_CMD}"
+
+if [[ -f '/tmp/.tap_dummy_nsight_report.nsys-rep' ]]; then
+    cp /tmp/.tap_dummy_nsight_report.nsys-rep ${RESULTS_SUBDIR}/tap_dummy_nsight_report.nsys-rep
+fi 
+# ----------------------------------------
+# (-1) post-amble
+# ----------------------------------------
+printf "${MESSAGE_TEMPLATE}" "TAP_SAVE_DIR=${TAP_SAVE_DIR}"
+printf "${MESSAGE_TEMPLATE}" "end script"
\ No newline at end of file
diff --git a/internal/scripts_br/run_update_git_submodules.sh b/internal/scripts_br/run_update_git_submodules.sh
new file mode 100755
index 000000000..1b0c74c98
--- /dev/null
+++ b/internal/scripts_br/run_update_git_submodules.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+git submodule update --init --recursive
\ No newline at end of file
diff --git a/internal/scripts_br/show_git_submodule_config.sh b/internal/scripts_br/show_git_submodule_config.sh
new file mode 100755
index 000000000..1289082cd
--- /dev/null
+++ b/internal/scripts_br/show_git_submodule_config.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+git config --local --get-regexp submodule
\ No newline at end of file
diff --git a/internal/scripts_br/venv_create.sh b/internal/scripts_br/venv_create.sh
new file mode 100755
index 000000000..80a20d63b
--- /dev/null
+++ b/internal/scripts_br/venv_create.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+#
+# title: virtual_env_create.sh
+# usage:
+#   cd <repo root>; ./scripts/virtual_env_create.sh
+#
+#   create a virtual environment for the benchmarking repo
+#
+MESSAGE_TEMPLATE='********virtual_env_create.sh: %s\n'
+DATE_OF_SCRIPT=$(date +'%Y%m%dT%H%M%S')
+SCRIPT_DIR="$(dirname "$(realpath "$BASH_SOURCE")")"
+GIT_BRANCH=$(git branch --show-current)
+
+# -----------------------------------------------
+# (1) set script-level parameters
+# ------------------------------------------------
+ENV_DIR=./venv_bionemo_fw
+
+# -----------------------------------------------
+# (2) create venv
+# ------------------------------------------------
+printf "${MESSAGE_TEMPLATE}" "attempt to created a virtual env in directory ${ENV_DIR}"
+
+# --------------------------------------------------
+# on computelab run
+#   (1) cannot run as sudo
+# -------------------------------------------------
+#apt update
+#apt install -y python3 python3-pip python3.10-venv
+
+
+sudo add-apt-repository ppa:deadsnakes/ppa -y
+sudo apt update
+sudo apt install python3.13 python3.13-venv python3.13-dev
+
+
+python3 -m venv ${ENV_DIR}
+
+
+# -----------------------------------------------
+# (3) enter venv
+# ------------------------------------------------
+source ${ENV_DIR}/bin/activate
+printf "${MESSAGE_TEMPLATE}" "you are in virtual env in directory ${ENV_DIR}"
+
+# -----------------------------------------------
+# (3) install pip to virtual environment
+# ------------------------------------------------
+if [[ "$(hostname)" == *viking-prod* ]]; then
+    printf "${MESSAGE_TEMPLATE}" "installing pip inside virtual-environment on viking host"
+    sudo apt update
+    sudo apt install -y python3-pip
+else
+    printf "${MESSAGE_TEMPLATE}" "installing pip inside virtual-environment"
+    apt update python3-pip python3.10-venv
+    apt install -y python3-pip
+fi
+
+
+# -----------------------------------------------
+# (4) pip install
+# ------------------------------------------------
+pip install pre-commit==4.1.0
+
+printf "${MESSAGE_TEMPLATE}" "to enter this virtual env, source ${ENV_DIR}/bin/activate"
+printf "${MESSAGE_TEMPLATE}" "to exit this virtual env, deactivate"
+printf "${MESSAGE_TEMPLATE}" "exiting"
\ No newline at end of file

From 860b557b0a2bf2c36d7380b6239e5e587b3661df Mon Sep 17 00:00:00 2001
From: Brian Roland <broland@nvidia.com>
Date: Mon, 1 Sep 2025 21:19:50 +0000
Subject: [PATCH 04/12] br: [skip ci]

Signed-off-by: Brian Roland <broland@nvidia.com>
---
 3rdparty/NeMo                                 |  2 +-
 ..._train.sh => run_evo2_train_to_profile.sh} | 20 +++++++++++++++----
 .../src/bionemo/evo2/run/train.py             |  6 ++++++
 3 files changed, 23 insertions(+), 5 deletions(-)
 rename internal/scripts_br/{run_evo2_train.sh => run_evo2_train_to_profile.sh} (77%)

diff --git a/3rdparty/NeMo b/3rdparty/NeMo
index 654437102..ea54a221a 160000
--- a/3rdparty/NeMo
+++ b/3rdparty/NeMo
@@ -1 +1 @@
-Subproject commit 654437102f3ac09cd0ba69ae78d8f5c0576d8239
+Subproject commit ea54a221a495b4dd8afae3993a18167bac42ba3f
diff --git a/internal/scripts_br/run_evo2_train.sh b/internal/scripts_br/run_evo2_train_to_profile.sh
similarity index 77%
rename from internal/scripts_br/run_evo2_train.sh
rename to internal/scripts_br/run_evo2_train_to_profile.sh
index 93b057f20..1edbd1365 100755
--- a/internal/scripts_br/run_evo2_train.sh
+++ b/internal/scripts_br/run_evo2_train_to_profile.sh
@@ -7,15 +7,21 @@ MESSAGE_TEMPLATE='********run_evo2_train.sh: %s\n'
 DATE_OF_SCRIPT=$(date +'%Y%m%dT%H%M')
 WHOAMI="$(whoami)"
 SCRIPT_DIR="$(dirname "$(realpath "$BASH_SOURCE")")"
+LIT_VERSION=$(pip show lightning | grep Version)
+TORCH_VERSION=$(pip show torch | grep Version)
+PYTHON_VERSION=$(python --version | grep Python)
 printf "${MESSAGE_TEMPLATE}" "begin"
 printf "${MESSAGE_TEMPLATE}" "DATE_OF_SCRIPT=${DATE_OF_SCRIPT}"
 printf "${MESSAGE_TEMPLATE}" "WHOAMI=${WHOAMI}"
+printf "${MESSAGE_TEMPLATE}" "LIT_VERSION=${LIT_VERSION}"
+printf "${MESSAGE_TEMPLATE}" "TORCH_VERSION=${TORCH_VERSION}"
+printf "${MESSAGE_TEMPLATE}" "PYTHON_VERSION=${PYTHON_VERSION}"
 
 # ----------------------------------------
 # (1) set some user parameters
 # ----------------------------------------
 RESULTS_DIR="./results"  # i.e. /workspace/bionemo2/results
-RESULTS_THIS_APP_DIR="${RESULTS_DIR}/run_evo2_train"
+RESULTS_THIS_APP_DIR="${RESULTS_DIR}/run_evo2_train_to_profile"
 
 RUN_LABEL_PREFIX="bionemo_evo2_train"
 PYTHON_SCRIPT_PATH=sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py
@@ -29,7 +35,7 @@ TRAIN_ARGS_ARRAY=(
     "--model-size"
     "test"
     "--max-steps"
-    "40" 
+    "1" 
     "--context-parallel-size"
     "1"
     "--devices"
@@ -66,12 +72,13 @@ RUN_LABEL=$(IFS='_'; echo "${run_label_arr[*]}")
 printf "${MESSAGE_TEMPLATE}" "RUN_LABEL=${RUN_LABEL}"
 
 RESULTS_THIS_APP_THIS_RUN_DIR="${RESULTS_THIS_APP_DIR}/${RUN_LABEL}"
+export RESULTS_THIS_APP_THIS_RUN_DIR=${RESULTS_THIS_APP_THIS_RUN_DIR}
+
 mkdir -p ${RESULTS_THIS_APP_THIS_RUN_DIR}
 chmod a+rw ${RESULTS_THIS_APP_THIS_RUN_DIR}
 
 LOG_FILE="${RESULTS_THIS_APP_THIS_RUN_DIR}/${RUN_LABEL}.log"
-
-
+export BNM_MODULE_HOOK_MANAGER_RESULTS_DIR=${RESULTS_THIS_APP_THIS_RUN_DIR}
 # ----------------------------------------
 # (5) create python training script comman
 # ---------------------------------------
@@ -85,5 +92,10 @@ eval "${PY_COMMAND}"
 # ----------------------------------------
 # (-1) post-amble
 # ----------------------------------------
+printf "${MESSAGE_TEMPLATE}" "script summary:"
 printf "${MESSAGE_TEMPLATE}" "LOG_FILE=${LOG_FILE}"
+printf "${MESSAGE_TEMPLATE}" "LIT_VERSION=${LIT_VERSION}"
+printf "${MESSAGE_TEMPLATE}" "TORCH_VERSION=${TORCH_VERSION}"
+printf "${MESSAGE_TEMPLATE}" "PYTHON_VERSION=${PYTHON_VERSION}"
+
 printf "${MESSAGE_TEMPLATE}" "end with success"
\ No newline at end of file
diff --git a/sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py b/sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py
index f04725669..b6716514c 100644
--- a/sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py
+++ b/sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py
@@ -55,6 +55,8 @@
 from bionemo.llm.utils.logger_utils import WandbConfig, setup_nemo_lightning_logger
 
 
+from bionemo.evo2.utils.logging.bnm_module_hook_manager import BNM_MODULE_HOOK_HANDLES
+
 torch._dynamo.config.suppress_errors = True
 
 
@@ -894,7 +896,11 @@ def train(args: argparse.Namespace) -> nl.Trainer:
     opt.connect(model)
     # Start training
     print("*******************train: before trainer.fit")
+    print(f"*************type(model.modules)={type(model.modules())}*********")
     trainer.fit(model, data_module)
+    for h in BNM_MODULE_HOOK_HANDLES:
+        h.remove()
+    print(f"*************type(model.module)={type(model.module)}*********")
     print("*******************train: after trainer.fit")
     return trainer
 

From 32d5345961b9484ace0af7dc0c4865850fa4a683 Mon Sep 17 00:00:00 2001
From: Brian Roland <broland@nvidia.com>
Date: Tue, 2 Sep 2025 15:47:38 +0000
Subject: [PATCH 05/12] br: [skip ci]

Signed-off-by: Brian Roland <broland@nvidia.com>
---
 internal/scripts_br/build_dev_image_br.sh     |  37 ----
 internal/scripts_br/install_tap.sh            |  14 --
 internal/scripts_br/run_dev_br.sh             | 146 ---------------
 .../scripts_br/run_evo2_train_to_profile.sh   | 101 -----------
 internal/scripts_br/run_mig_br.sh             |  46 -----
 .../scripts_br/run_nsys_with_evo2_train.sh    | 108 -----------
 internal/scripts_br/run_precommit.sh          |  43 -----
 internal/scripts_br/run_pytest.sh             |  51 ------
 internal/scripts_br/run_simple_torch_app.py   |  45 -----
 .../scripts_br/run_tap_with_evo2_train.sh     | 122 -------------
 .../scripts_br/run_update_git_submodules.sh   |   2 -
 .../scripts_br/show_git_submodule_config.sh   |   2 -
 internal/scripts_br/venv_create.sh            |  67 -------
 .../utils/logging/bnm_module_hook_manager.py  | 171 ++++++++++++++++++
 14 files changed, 171 insertions(+), 784 deletions(-)
 delete mode 100755 internal/scripts_br/build_dev_image_br.sh
 delete mode 100755 internal/scripts_br/install_tap.sh
 delete mode 100755 internal/scripts_br/run_dev_br.sh
 delete mode 100755 internal/scripts_br/run_evo2_train_to_profile.sh
 delete mode 100755 internal/scripts_br/run_mig_br.sh
 delete mode 100755 internal/scripts_br/run_nsys_with_evo2_train.sh
 delete mode 100755 internal/scripts_br/run_precommit.sh
 delete mode 100755 internal/scripts_br/run_pytest.sh
 delete mode 100644 internal/scripts_br/run_simple_torch_app.py
 delete mode 100755 internal/scripts_br/run_tap_with_evo2_train.sh
 delete mode 100755 internal/scripts_br/run_update_git_submodules.sh
 delete mode 100755 internal/scripts_br/show_git_submodule_config.sh
 delete mode 100755 internal/scripts_br/venv_create.sh
 create mode 100644 sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/bnm_module_hook_manager.py

diff --git a/internal/scripts_br/build_dev_image_br.sh b/internal/scripts_br/build_dev_image_br.sh
deleted file mode 100755
index 03eaa9aae..000000000
--- a/internal/scripts_br/build_dev_image_br.sh
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env bash
-
-# ------------------------------------------------------------------------
-# (0) preamble
-# ------------------------------------------------------------------------
-MESSAGE_TEMPLATE='********build_dev_image_br.sh: %s\n'
-DATE_OF_SCRIPT=$(date +'%Y%m%dT%H%M%S')
-SCRIPT_DIR="$(dirname "$(realpath "$BASH_SOURCE")")"
-printf "${MESSAGE_TEMPLATE}" "SCRIPT_DIR=${SCRIPT_DIR}"
-printf "${MESSAGE_TEMPLATE}" "hostname=$(hostname)"
-printf "${MESSAGE_TEMPLATE}" "whoami=$(whoami)"
-printf "${MESSAGE_TEMPLATE}" "uid=$(id -u)"
-printf "${MESSAGE_TEMPLATE}" "gid=$(id -g)"
-
-set -euo pipefail
-
-BRANCH=$(git rev-parse --abbrev-ref HEAD)
-COMMIT=$(git rev-parse --short HEAD)
-DATE=$(date --iso-8601=seconds -u)
-
-set -x
-DOCKER_BUILDKIT=1 docker buildx build \
-  -t "nvcr.io/nvidian/cvai_bnmo_trng/bionemo:dev-${BRANCH}-${DATE_OF_SCRIPT}-${COMMIT}" \
-  --ulimit 'nofile=65535:65535' \
-  --target="development" \
-  --load \
-  --cache-from nvcr.io/nvidia/clara/bionemo-framework:nightly \
-  --cache-to type=inline \
-  --label com.nvidia.bionemo.git_sha=${COMMIT} \
-  --label com.nvidia.bionemo.created_at=${DATE} \
-  -f ./Dockerfile \
-  .
-
-# ----------------------
-# (-1) post-amble
-# --------------------------
-printf "${MESSAGE_TEMPLATE}" "end script"
\ No newline at end of file
diff --git a/internal/scripts_br/install_tap.sh b/internal/scripts_br/install_tap.sh
deleted file mode 100755
index b8771128f..000000000
--- a/internal/scripts_br/install_tap.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/bash
-
-# prelim required by release version of TAP
-mkdir -p /workspace/bionemo2/.local/lib/python3.12/site-packages/
-touch /workspace/bionemo2/.local/lib/python3.12/site-packages/usercustomize.py
-
-# install from gitlab server
-pip install git+https://gitlab-master.nvidia.com/dl/gwe/torch_automated_profiler@release
-
-# RUN --mount=type=ssh cd /opt && git clone ssh://git@gitlab-master.nvidia.com:12051/dl/gwe/torch_automated_profiler.git\
-#     && cd torch_automated_profiler\
-#     && git fetch origin br_max_depth_1\
-#     && git checkout -b br_max_depth_1 origin/br_max_depth_1\
-#     && pip install -e . -v
\ No newline at end of file
diff --git a/internal/scripts_br/run_dev_br.sh b/internal/scripts_br/run_dev_br.sh
deleted file mode 100755
index 3e66b1995..000000000
--- a/internal/scripts_br/run_dev_br.sh
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/usr/bin/env bash
-
-# ------------------------------------------------------------------------
-# (0) preamble
-# ------------------------------------------------------------------------
-MESSAGE_TEMPLATE='********run_dev_br.sh: %s\n'
-DATE_OF_SCRIPT=$(date +'%Y%m%dT%H%M%S')
-SCRIPT_DIR="$(dirname "$(realpath "$BASH_SOURCE")")"
-printf "${MESSAGE_TEMPLATE}" "SCRIPT_DIR=${SCRIPT_DIR}"
-printf "${MESSAGE_TEMPLATE}" "hostname=$(hostname)"
-printf "${MESSAGE_TEMPLATE}" "whoami=$(whoami)"
-printf "${MESSAGE_TEMPLATE}" "uid=$(id -u)"
-printf "${MESSAGE_TEMPLATE}" "gid=$(id -g)"
-
-
-#set -euo pipefail
-
-source .env
-
-
-# -----------------------------------------------------
-# (1) user paramerters
-# -----------------------------------------------------
-USER_IN_CTR=root                    # if profiling, run as root
-HOME_IN_CTR=/opt/${USER_IN_CTR}
-
-#GPU_ARG='--gpus "\"device=0,1,2,3,4,5,6,7\""'
-GPU_ARG='--gpus all'
-LOCAL_RESULTS_PATH="/home/scratch.broland_sw_1/data_for_projects/evo2/results/bionemo2_results"
-LOCAL_DATA_PATH="./data"
-LOCAL_MODELS_PATH="./models"
-
-COMMIT_AT_START=$(git rev-parse --short HEAD)
-BRANCH_AT_START=$(git rev-parse --abbrev-ref HEAD)
-IMAGE_REPO='nvcr.io/nvidian/cvai_bnmo_trng/bionemo'
-IMAGE_TAG='dev-br_bnm2532_dlsim_val_in_fw_a-20250831T164028-a29272f1'
-IMAGE_NAME="${IMAGE_REPO}:${IMAGE_TAG}"
-
-DOCKER_REPO_PATH="/workspace/bionemo2"
-DOCKER_RESULTS_PATH="/workspace/bionemo2/results"
-DOCKER_MODELS_PATH="/workspace/bionemo2/models"
-DOCKER_DATA_PATH="/workspace/bionemo2/data"
-
-# -----------------------------------------------------
-# (2) santity checks
-# ----------------------------------------------------
-LOCAL_REPO_PATH="$(realpath $(pwd))"
-if [[ "$(basename ${LOCAL_REPO_PATH})" != *"bionemo-framework"* ]]; then
-    echo "ERROR: must run this script from the bionemo repository root!"
-    exit 1
-fi
-
-# ---------------------------------------------------------------------
-# (3) make expected directories in external filesystem as user, not as docker
-# ----------------------------------------------------
-expected_local_dirs=("${LOCAL_RESULTS_PATH}" "${LOCAL_DATA_PATH}" "${LOCAL_MODELS_PATH}" "./htmlcov")
-for expected_local_dir in "${expected_local_dirs[@]}"; do
-    printf "${MESSAGE_TEMPLATE}" "expected_local_dir=${expected_local_dir}"
-    mkdir -p "${expected_local_dir}"
-    chmod -R a+rw "${expected_local_dir}"
-done
-
-# ---------------------------------------------------------------------
-# (4) delete external directories with state
-# ----------------------------------------------------
-sudo rm -rf ${LOCAL_RESULTS_PATH}/evo2
-
-# -----------------------------------------------------
-# (5) assemble docker run command
-# ----------------------------------------------------
-
-printf "${MESSAGE_TEMPLATE}" "create DOCKER_RUN_COMMAND"
-
-read -r -d '' DOCKER_RUN_OPTIONS_FOR_PROFILING <<EOF
-    --user ${USER_IN_CTR} \\
-    --cap-add=SYS_ADMIN \\
-    --cap-add=SYS_PTRACE \\
-    --cap-add=PERFMON \\
-    --security-opt seccomp=unconfined \\
-    --privileged
-EOF
-
-read -r -d '' SECRETS<<EOF
-    -e WANDB_API_KEY=$WANDB_API_KEY
-EOF
-
-read -r -d '' DOCKER_RUN_OPTIONS <<EOF
-    -u $(id -u):$(id -g) \\
-    --rm \\
-    -it \\
-    ${DOCKER_RUN_OPTIONS_FOR_PROFILING} \\
-    --network host \\
-    ${GPU_ARG} \\
-    -p ${JUPYTER_PORT}:8888 \\
-    --shm-size=64g \\
-    -e TMPDIR=/tmp/ \\
-    -e BRANCH_AT_START=${BRANCH_AT_START} \\
-    -e COMMIT_AT_START=${COMMIT_AT_START} \\
-    -e NUMBA_CACHE_DIR=/tmp/ \\
-    -e HOME=${DOCKER_REPO_PATH} \\
-    -v ${HOME}/.bash_aliases:${HOME_IN_CTR}/.bash_aliases \\
-    -w ${DOCKER_REPO_PATH} \\
-    -v ${LOCAL_RESULTS_PATH}:${DOCKER_RESULTS_PATH} \\
-    -v ${LOCAL_DATA_PATH}:${DOCKER_DATA_PATH} \\
-    -v ${LOCAL_MODELS_PATH}:${DOCKER_MODELS_PATH} \\
-    -v /etc/passwd:/etc/passwd:ro \\
-    -v /etc/group:/etc/group:ro \\
-    -v /etc/shadow:/etc/shadow:ro \\
-    -v ${HOME}/.ssh:${DOCKER_REPO_PATH}/.ssh:ro \\
-    -v ${LOCAL_REPO_PATH}/htmlcov:/${DOCKER_REPO_PATH}/htmlcov \\
-    -v ${LOCAL_REPO_PATH}:${DOCKER_REPO_PATH} \\
-    -e NGC_CLI_ORG \\
-    -e NGC_CLI_TEAM \\
-    -e NGC_CLI_FORMAT_TYPE \\
-    -e NGC_CLI_API_KEY \\
-    -e AWS_ENDPOINT_URL \\
-    -e AWS_REGION \\
-    -e AWS_ACCESS_KEY_ID \\
-    -e AWS_SECRET_ACCESS_KEY
-EOF
-read -r -d '' DOCKER_RUN_WITHOUT_SECRETS <<EOF
-docker run \\
-    ${DOCKER_RUN_OPTIONS} \\
-    ${IMAGE_NAME} \\
-    bash --rcfile ${HOME_IN_CTR}/.bash_aliases
-EOF
-
-read -r -d '' DOCKER_RUN_WITH_SECRETS <<EOF
-docker run \\
-    ${DOCKER_RUN_OPTIONS} \\
-    ${SECRETS} \\
-    ${IMAGE_NAME} \\
-    bash --rcfile ${HOME_IN_CTR}/.bash_aliases
-EOF
-
-# -----------------------------------------------------
-# (5) run docker run command
-# ----------------------------------------------------
-printf "${MESSAGE_TEMPLATE}" "DOCKER_RUN_WITHOUT_SECRETS=${DOCKER_RUN_WITHOUT_SECRETS}"
-eval "$DOCKER_RUN_WITH_SECRETS"
-
-# -----------------------------------------------------
-# (-1) summarize
-# ----------------------------------------------------
-
-printf "${MESSAGE_TEMPLATE}" "run_dev_br.sh: end script----"
diff --git a/internal/scripts_br/run_evo2_train_to_profile.sh b/internal/scripts_br/run_evo2_train_to_profile.sh
deleted file mode 100755
index 1edbd1365..000000000
--- a/internal/scripts_br/run_evo2_train_to_profile.sh
+++ /dev/null
@@ -1,101 +0,0 @@
-#!/bin/bash
-
-# ----------------------------------------
-# (0) preamble
-# ----------------------------------------
-MESSAGE_TEMPLATE='********run_evo2_train.sh: %s\n'
-DATE_OF_SCRIPT=$(date +'%Y%m%dT%H%M')
-WHOAMI="$(whoami)"
-SCRIPT_DIR="$(dirname "$(realpath "$BASH_SOURCE")")"
-LIT_VERSION=$(pip show lightning | grep Version)
-TORCH_VERSION=$(pip show torch | grep Version)
-PYTHON_VERSION=$(python --version | grep Python)
-printf "${MESSAGE_TEMPLATE}" "begin"
-printf "${MESSAGE_TEMPLATE}" "DATE_OF_SCRIPT=${DATE_OF_SCRIPT}"
-printf "${MESSAGE_TEMPLATE}" "WHOAMI=${WHOAMI}"
-printf "${MESSAGE_TEMPLATE}" "LIT_VERSION=${LIT_VERSION}"
-printf "${MESSAGE_TEMPLATE}" "TORCH_VERSION=${TORCH_VERSION}"
-printf "${MESSAGE_TEMPLATE}" "PYTHON_VERSION=${PYTHON_VERSION}"
-
-# ----------------------------------------
-# (1) set some user parameters
-# ----------------------------------------
-RESULTS_DIR="./results"  # i.e. /workspace/bionemo2/results
-RESULTS_THIS_APP_DIR="${RESULTS_DIR}/run_evo2_train_to_profile"
-
-RUN_LABEL_PREFIX="bionemo_evo2_train"
-PYTHON_SCRIPT_PATH=sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py
-
-TRAIN_ARGS_ARRAY=(
-    "--mock-data" 
-    "--seq-length"
-    "256" 
-    "--micro-batch-size"
-    "1" 
-    "--model-size"
-    "test"
-    "--max-steps"
-    "1" 
-    "--context-parallel-size"
-    "1"
-    "--devices"
-    "1"
-)
-
-# ----------------------------------------
-# (2) dump parameters
-# ----------------------------------------
-printf "${MESSAGE_TEMPLATE}" "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}"
-
-# ---------------------------------------------------
-# (3) purge training app state
-#   - delete / move dirs with training application state
-# ----------------------------------------------------
-if [[ -d "${RESULTS_DIR}/tmp/evo2" ]]; then
-    rm -rf "${RESULTS_DIR}/tmp/evo2"
-    printf "${MESSAGE_TEMPLATE}" "rm -rf ${RESULTS_DIR}/tmp/evo2, end"
-fi
-if [[ -d "${RESULTS_DIR}/evo2" ]]; then
-    mv "${RESULTS_DIR}/evo2" "${RESULTS_DIR}/tmp/"
-    printf "${MESSAGE_TEMPLATE}" "mv ${RESULTS_DIR}/evo2 ${RESULTS_DIR}/tmp/, end"
-fi
-# ----------------------------------------
-# (4) create output dirs and file names
-# ----------------------------------------
-run_label_arr=(
-    ${RUN_LABEL_PREFIX}
-    ${BRANCH_AT_START}
-    ${DATE_OF_SCRIPT}
-    ${COMMIT_AT_START}
-)
-RUN_LABEL=$(IFS='_'; echo "${run_label_arr[*]}")
-printf "${MESSAGE_TEMPLATE}" "RUN_LABEL=${RUN_LABEL}"
-
-RESULTS_THIS_APP_THIS_RUN_DIR="${RESULTS_THIS_APP_DIR}/${RUN_LABEL}"
-export RESULTS_THIS_APP_THIS_RUN_DIR=${RESULTS_THIS_APP_THIS_RUN_DIR}
-
-mkdir -p ${RESULTS_THIS_APP_THIS_RUN_DIR}
-chmod a+rw ${RESULTS_THIS_APP_THIS_RUN_DIR}
-
-LOG_FILE="${RESULTS_THIS_APP_THIS_RUN_DIR}/${RUN_LABEL}.log"
-export BNM_MODULE_HOOK_MANAGER_RESULTS_DIR=${RESULTS_THIS_APP_THIS_RUN_DIR}
-# ----------------------------------------
-# (5) create python training script comman
-# ---------------------------------------
-read -r -d '' PY_COMMAND <<EOF
-python -u ${PYTHON_SCRIPT_PATH} ${TRAIN_ARGS_ARRAY[@]} 2>&1 | tee -a ${LOG_FILE}
-EOF
-
-printf "${MESSAGE_TEMPLATE}" "PY_COMMAND=${PY_COMMAND}"
-eval "${PY_COMMAND}"
-
-# ----------------------------------------
-# (-1) post-amble
-# ----------------------------------------
-printf "${MESSAGE_TEMPLATE}" "script summary:"
-printf "${MESSAGE_TEMPLATE}" "LOG_FILE=${LOG_FILE}"
-printf "${MESSAGE_TEMPLATE}" "LIT_VERSION=${LIT_VERSION}"
-printf "${MESSAGE_TEMPLATE}" "TORCH_VERSION=${TORCH_VERSION}"
-printf "${MESSAGE_TEMPLATE}" "PYTHON_VERSION=${PYTHON_VERSION}"
-
-printf "${MESSAGE_TEMPLATE}" "end with success"
\ No newline at end of file
diff --git a/internal/scripts_br/run_mig_br.sh b/internal/scripts_br/run_mig_br.sh
deleted file mode 100755
index 446615479..000000000
--- a/internal/scripts_br/run_mig_br.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/bin/bash
-
-
-# profiles are specified to each GPU, e.g. profile 15 can be used to divide into 4 devices of size 20gb
-# NVIDIA H100 80GB HBM3
-#| => sudo nvidia-smi mig -i 3 -cgi 15 -C
-#Successfully created GPU instance ID  5 on GPU  3 using profile MIG 1g.20gb (ID 15)
-#Successfully created compute instance ID  0 on GPU  3 GPU instance ID  5 using profile MIG 1g.20gb (ID  7)
-
-
-
-
-# GPU 0: NVIDIA H100 80GB HBM3 (UUID: GPU-afddd1b4-4464-96c8-a712-aaeb0acf1170)  # cudo 0 on torch
-# GPU 1: NVIDIA H100 80GB HBM3 (UUID: GPU-6faf0136-7870-5767-10be-a0827a158829)
-# GPU 2: NVIDIA H100 80GB HBM3 (UUID: GPU-20d20fc3-bcc7-e715-32d6-ffd646ea062f)
-# GPU 3: NVIDIA H100 80GB HBM3 (UUID: GPU-182e6bd5-b7ac-e0a6-48cf-96e198063dd3)
-#   MIG 1g.20gb     Device  0: (UUID: MIG-56679450-0984-50db-83a3-7e549eb60883)  # cudo 4 on torch
-#   MIG 1g.20gb     Device  1: (UUID: MIG-a155b8d5-2484-52fc-a2ed-e47dc89996cd)
-#   MIG 1g.20gb     Device  2: (UUID: MIG-9dc27b3c-b567-5802-a2a7-27ad657ab079)
-#   MIG 1g.20gb     Device  3: (UUID: MIG-f6102e7f-bbf5-5db4-abea-156619dd4ce2)
-
-
-
-# Split into to 40gb device sudo nvidia-smi mig -i 5 -cgi 5,5
-
-# (0) choose a device
-DEVICE_INDEX_FOR_MIG=1
-PROFILE=15
-PROFILE=9 # 
-
-# (1) show all gpu indices, uuids,  and product names
-nvidia-smi -L
-
-# (2) list all MIG instances
-sudo nvidia-smi mig -lgi
-
-# (3) activate multi-instance gpu for 
-sudo nvidia-smi --id ${DEVICE_INDEX_FOR_MIG} -mig 1
-
-# split device with index 0 into 3 compute instances
-for i in {0..3}; do
-    sudo nvidia-smi mig --id ${DEVICE_INDEX_FOR_MIG} -cgi ${PROFILE} -C
-done
-
-# show all gpu indices, uuids, and produce names
-nvidia-smi -L
diff --git a/internal/scripts_br/run_nsys_with_evo2_train.sh b/internal/scripts_br/run_nsys_with_evo2_train.sh
deleted file mode 100755
index e59cf0fb8..000000000
--- a/internal/scripts_br/run_nsys_with_evo2_train.sh
+++ /dev/null
@@ -1,108 +0,0 @@
-#!/bin/bash
-#
-#
-# nsys option like --pytorch function-trace: 
-#   nvtx markers for common torch operations at the pytorch level like torch.Tensor.to
-#
-# nsys option like --pytorch autograd-shapes-nvtx: 
-#   nvtx markers for common torch operations at the kernel level like "to", "to_copy"
-#
-
-
-# ----------------------------------------
-# (0) preamble
-# ----------------------------------------
-MESSAGE_TEMPLATE='********run_evo2_train.sh: %s\n'
-DATE_OF_SCRIPT=$(date +'%Y%m%dT%H%M')
-SCRIPT_DIR="$(dirname "$(realpath "$BASH_SOURCE")")"
-printf "${MESSAGE_TEMPLATE}" "begin"
-printf "${MESSAGE_TEMPLATE}" "DATE_OF_SCRIPT=${DATE_OF_SCRIPT}"
-
-# ----------------------------------------
-# (1) set some user parameters
-# ----------------------------------------
-RESULTS_DIR="./results/run_nsys_with_evo2_train"
-
-read -r -d '' NSYS_PROFILE_OPTIONS <<EOF
-    -s none \\
-    --trace=cuda,nvtx \\
-    --pytorch autograd-shapes-nvtx,functions-trace \\
-    --force-overwrite true
-EOF
-
-TRAIN_ARGS_ARRAY=(
-    "--nsys-profiling"
-    "--nsys-start-step"
-    "20"
-    "--nsys-end-step"
-    "28"
-    "--mock-data"
-    "--seq-length"
-    "256"
-    "--micro-batch-size"
-    "1"
-    "--model-size"
-    "test"
-    "--max-steps"
-    "30"
-    "--context-parallel-size"
-    "1"
-    "--devices"
-    "1"
-)
-
-RUN_LABEL_PREFIX="nsys_bionemo_evo2_train"
-PYTHON_SCRIPT_PATH=sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py
-
-# ----------------------------------------
-# (2) dump parameters
-# ----------------------------------------
-printf "${MESSAGE_TEMPLATE}" "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}"
-
-# ----------------------------------------
-# (3) create log file name and report filename
-# ----------------------------------------
-run_label_arr=(
-    ${RUN_LABEL_PREFIX}
-    "mock-data"
-    ${BRANCH_AT_START}
-    ${DATE_OF_SCRIPT}
-    ${COMMIT_AT_START}
-)
-RUN_LABEL="$(IFS='_'; echo "${run_label_arr[*]}")"
-
-RESULTS_SUBDIR="${RESULTS_DIR}/"nsys"/${RUN_LABEL}"
-mkdir -p "${RESULTS_SUBDIR}"
-chmod a+r "${RESULTS_SUBDIR}"
-
-LOG_FILE="${RESULTS_SUBDIR}/${RUN_LABEL}.log"
-REPORT_FILE="${RESULTS_SUBDIR}/${RUN_LABEL}.nsys-rep"
-
-# ----------------------------------------
-# (4) create command
-# ----------------------------------------
-APPLICATION_TO_PROFILE="python ${PYTHON_SCRIPT_PATH} ${TRAIN_ARGS_ARRAY[@]}"
-
-read -r -d '' NSYS_PROFILE_CMD <<EOF
-nsys profile \\
-    -o ${REPORT_FILE} \\
-    ${NSYS_PROFILE_OPTIONS} \\
-    ${APPLICATION_TO_PROFILE} | tee -a ${LOG_FILE}
-EOF
-# ----------------------------------------
-# (5) run command
-# ----------------------------------------
-printf "\n"
-printf "${MESSAGE_TEMPLATE}" "nsys version: $(nsys --version)"
-
-printf "\n"
-printf "${MESSAGE_TEMPLATE}" "APPLICATION_TO_PROFILE=${APPLICATION_TO_PROFILE}"
-printf "\n"
-printf "${MESSAGE_TEMPLATE}" "NSYS_PROFILE_CMD"
-echo "${NSYS_PROFILE_CMD}"
-eval "${NSYS_PROFILE_CMD}"
-
-# ----------------------------------------
-# (-1) post-amble
-# ----------------------------------------
-printf "${MESSAGE_TEMPLATE}" "end with success"
\ No newline at end of file
diff --git a/internal/scripts_br/run_precommit.sh b/internal/scripts_br/run_precommit.sh
deleted file mode 100755
index 639d4c193..000000000
--- a/internal/scripts_br/run_precommit.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/bin/bash
-# comment
-
-
-# files_to_check=(
-#     src/boltz/distributed/model/layers/distribute_module_tools.py
-#     src/boltz/distributed/model/layers/swiglu.py
-#     src/boltz/distributed/model/layers/mult_for_same_placement_and_shape.py
-#     tests/model/layers/test_dtensor_swiglu.py
-#     src/boltz/testing/utils.py
-# )
-# files_to_check=(
-#     src/boltz/distributed/model/layers/distribute_module_tools.py
-#     src/boltz/distributed/model/layers/layernorm.py
-#     tests/distributed/test_dtensor_layernorm.py
-# )
-
-# files_to_check=(
-#     src/boltz/distributed/model/layers/attention.py
-#     src/boltz/distributed/model/layers/attention_impl.py
-#     src/boltz/distributed/model/layers/distribute_module_tools.py
-#     src/boltz/distributed/model/layers/dtensor_metadata_tools.py
-#     tests/distributed/model/layers/test_attention_with_dtensor_for_pairformer_use_case.py
-# )
-
-# files_to_check=(
-#     src/boltz/distributed/model/layers/dtensor_metadata_tools.py
-#     tests/distributed/test_dtensor_metadata_tools.py
-# )
-
-files_to_check=(
-    sub-packages/bionemo-testing/src/bionemo/testing/torch.py
-    sub-packages/bionemo-evo2/tests/bionemo/evo2/test_evo2.py
-    sub-packages/bionemo-evo2/tests/bionemo/evo2/conftest.py
-    sub-packages/bionemo-testing/tests/bionemo/testing/test_torch.py
-)
-
-
-for file in "${files_to_check[@]}"; do
-    echo "Checking $file"
-    pre-commit run --files $file
-done
-
diff --git a/internal/scripts_br/run_pytest.sh b/internal/scripts_br/run_pytest.sh
deleted file mode 100755
index bd10c53a9..000000000
--- a/internal/scripts_br/run_pytest.sh
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/bin/bash
-
-# ----------------------------------------
-# (0) preamble
-# ----------------------------------------
-MESSAGE_TEMPLATE='********run_pytest.sh: %s\n'
-DATE_OF_SCRIPT=$(date +'%Y%m%dT%H%M')
-SCRIPT_DIR="$(dirname "$(realpath "$BASH_SOURCE")")"
-printf "${MESSAGE_TEMPLATE}" "begin"
-printf "${MESSAGE_TEMPLATE}" "DATE_OF_SCRIPT=${DATE_OF_SCRIPT}"
-
-# ----------------------------------------
-# (1) set some user parameters
-# ----------------------------------------
-#CUDA_VISIBLE_DEVICE_LIST=MIG-0e9a0f4b-dfee-5517-a54e-a73d5c450f24 # 20gb
-#CUDA_VISIBLE_DEVICE_LIST=MIG-08fb5198-a9d8-5984-b31a-a8e7044320d0 # 40gb
-export CUDA_VISIBLE_DEVICE_LIST=GPU-6f9dcb23-36a0-81a9-8942-78e5f07e3817 # gpu 0 with 80gb
-PYTEST_LOG_FILE_PREFIX="pytests_pr1058_unskip_evo2_tests"
-PYTEST_MARKERS=("not slow")
-#PYTEST_MARKERS=("not slow" "slow")
-#TEST_PATH=$(pwd)
-TEST_PATH=sub-packages/bionemo-evo2/tests/bionemo/evo2/test_evo2.py
-#TEST_PATH=sub-packages/bionemo-evo2/tests/bionemo/evo2/test_evo2.py::test_golden_values_top_k_logits_and_cosine_similarity_7b
-
-# ----------------------------------------
-# (2) dump parameters
-# ----------------------------------------
-printf "${MESSAGE_TEMPLATE}" "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}"
-
-# ----------------------------------------
-# (3) create log file name
-# ----------------------------------------
-TEST_PATH_LABEL="${TEST_PATH//\//-}"  # replace forward slashes with hypthens
-TEST_PATH_LABEL="${TEST_PATH_LABEL//::/__}"  # replace forward slashes with hypthens
-TEST_PATH_LABEL="${TEST_PATH_LABEL//.py/' '}"  # remove .py 
-
-for PYTEST_MARKER in "${PYTEST_MARKERS[@]}"; do
-
-    PYTEST_MARKER_LABEL="${PYTEST_MARKER// /}"
-    PYTEST_LOG_FILE="test_logs_for_evo2/${PYTEST_LOG_FILE_PREFIX}_${TEST_PATH_LABEL}_${PYTEST_MARKER_LABEL}_${BRANCH_AT_START}_${DATE_OF_SCRIPT}_${COMMIT_AT_START}.log"
-    PYTEST_COMMAND="pytest -s -v -m '${PYTEST_MARKER}' ${TEST_PATH} | tee -a ${PYTEST_LOG_FILE}"
-    printf "${MESSAGE_TEMPLATE}" "PYTEST_COMMAND=${PYTEST_COMMAND}"
-    eval "${PYTEST_COMMAND}"
-
-done
-
-
-# ----------------------------------------
-# (-1) post-amble
-# ----------------------------------------
-printf "${MESSAGE_TEMPLATE}" "end with success"
\ No newline at end of file
diff --git a/internal/scripts_br/run_simple_torch_app.py b/internal/scripts_br/run_simple_torch_app.py
deleted file mode 100644
index 6af0a7317..000000000
--- a/internal/scripts_br/run_simple_torch_app.py
+++ /dev/null
@@ -1,45 +0,0 @@
-import torch
-import torch.profiler
-
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-# Define a simple module: Linear -> ReLU -> Linear
-class SimpleModel(nn.Module):
-    def __init__(self, input_size=10, hidden_size=20, output_size=5):
-        super(SimpleModel, self).__init__()
-        self.fc1 = nn.Linear(input_size, hidden_size)
-        self.fc2 = nn.Linear(hidden_size, output_size)
-
-    def forward(self, x):
-        x = F.relu(self.fc1(x))
-        x = self.fc2(x)
-        return x
-
-# Instantiate the model
-model = SimpleModel()
-
-# Generate random input data (batch_size=4, input_size=10)
-x = torch.randn(4, 10)
-
-
-with torch.profiler.profile(
-    activities=[torch.profiler.ProfilerActivity.CPU,
-                torch.profiler.ProfilerActivity.CUDA],
-    on_trace_ready=torch.profiler.tensorboard_trace_handler("./log"),
-    record_shapes=True,
-    with_stack=True
-) as prof:
-    for _ in range(5):
-        output = model(x)
-        
-        print("Input:", x)
-        print("Output:", output)
-
-print(
-    prof.key_averages(group_by_input_shape=True).table(
-        sort_by="cuda_time_total", row_limit=10
-    )
-)
diff --git a/internal/scripts_br/run_tap_with_evo2_train.sh b/internal/scripts_br/run_tap_with_evo2_train.sh
deleted file mode 100755
index f22a8fb36..000000000
--- a/internal/scripts_br/run_tap_with_evo2_train.sh
+++ /dev/null
@@ -1,122 +0,0 @@
-#!/bin/bash
-#
-#
-# nsys option like --pytorch function-trace: 
-#   nvtx markers for common torch operations at the pytorch level like torch.Tensor.to
-#
-# nsys option like --pytorch autograd-shapes-nvtx: 
-#   nvtx markers for common torch operations at the kernel level like "to", "to_copy"
-#
-
-
-# ----------------------------------------
-# (0) preamble
-# ----------------------------------------
-MESSAGE_TEMPLATE='********run_evo2_train.sh: %s\n'
-DATE_OF_SCRIPT=$(date +'%Y%m%dT%H%M')
-SCRIPT_DIR="$(dirname "$(realpath "$BASH_SOURCE")")"
-printf "${MESSAGE_TEMPLATE}" "begin"
-printf "${MESSAGE_TEMPLATE}" "DATE_OF_SCRIPT=${DATE_OF_SCRIPT}"
-
-# ----------------------------------------
-# (1) set some user parameters
-# ----------------------------------------
-export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-export TAP_NSIGHT_LOCATION='/usr/local/cuda/bin/nsys'
-export TAP_LOG_LEVEL='debug'
-export TAP_MODE='nsight'     # '', nsight, or anna..... '' means ignore all profiling
-export TAP_NVTX='pytorch'       # pytorch, apex, python
-export TAP_BACKWARD_NVTX='false'   # true or false
-export TAP_PROFILE_MEMORY='false'
-export TAP_WAIT_STEPS='1'       # 2 is my default
-export TAP_WARMUP_STEPS='1'    # 12 is my default, 
-export TAP_ACTIVE_STEPS='4'     # 1 is my default
-export TAP_WAIT_EPOCHS='1'
-#!/usr/bin/env sh
-
-
-export TAP_EXIT_ON_STOP=true
-
-#export APP_NVTX_CATEGORIES='main,lit_module,dataset'
-export TAP_NSIGHT_FLAGS='--trace nvtx,cuda'
-#export TAP_MAX_DEPTH=14    # minimal is 4, since torch compile adds a ldevel,  default is 14
-
-
-RESULTS_DIR="./results/run_tap_with_evo2_train"
-
-TRAIN_ARGS_ARRAY=(
-    "--mock-data"
-    "--seq-length"
-    "256"
-    "--micro-batch-size"
-    "1"
-    "--model-size"
-    "test"
-    "--max-steps"
-    "30"
-    "--context-parallel-size"
-    "1"
-    "--devices"
-    "1"
-    "--val-check-interval" "0"
-)
-
-RUN_LABEL_PREFIX="tap_bionemo_evo2_train"
-PYTHON_SCRIPT_PATH=sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py
-
-# ----------------------------------------
-# (2) dump parameters
-# ----------------------------------------
-printf "${MESSAGE_TEMPLATE}" "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}"
-
-# ----------------------------------------
-# (3) create output dir names and file names
-#       - create log file name and report filename
-# ----------------------------------------
-run_label_arr=(
-    ${RUN_LABEL_PREFIX}
-    "mock-data"
-    ${BRANCH_AT_START}
-    ${DATE_OF_SCRIPT}
-    ${COMMIT_AT_START}
-)
-RUN_LABEL="$(IFS='_'; echo "${run_label_arr[*]}")"
-
-RESULTS_SUBDIR="${RESULTS_DIR}/${RUN_LABEL}"
-export TAP_SAVE_DIR="${RESULTS_SUBDIR}"
-mkdir -p "${RESULTS_SUBDIR}"
-chmod a+r "${RESULTS_SUBDIR}"
-
-LOG_FILE="${RESULTS_SUBDIR}/${RUN_LABEL}.log"
-REPORT_FILE="${RESULTS_SUBDIR}/${RUN_LABEL}.nsys-rep"
-
-# ----------------------------------------
-# (4) create command
-# ----------------------------------------
-APPLICATION_TO_PROFILE="python ${PYTHON_SCRIPT_PATH} ${TRAIN_ARGS_ARRAY[@]}"
-#APPLICATION_TO_PROFILE="python -c 'import torch; x= torch.ones(500)'"
-
-read -r -d '' TAP_PROFILE_CMD <<EOF
-${APPLICATION_TO_PROFILE} 2>&1 | tee -a ${LOG_FILE}
-EOF
-# ----------------------------------------
-# (5) run command
-# ----------------------------------------
-printf "\n"
-printf "${MESSAGE_TEMPLATE}" "nsys version: $(nsys --version)"
-
-printf "\n"
-printf "${MESSAGE_TEMPLATE}" "APPLICATION_TO_PROFILE=${APPLICATION_TO_PROFILE}"
-printf "\n"
-printf "${MESSAGE_TEMPLATE}" "TAP_PROFILE_CMD"
-echo "${TAP_PROFILE_CMD}"
-eval "${TAP_PROFILE_CMD}"
-
-if [[ -f '/tmp/.tap_dummy_nsight_report.nsys-rep' ]]; then
-    cp /tmp/.tap_dummy_nsight_report.nsys-rep ${RESULTS_SUBDIR}/tap_dummy_nsight_report.nsys-rep
-fi 
-# ----------------------------------------
-# (-1) post-amble
-# ----------------------------------------
-printf "${MESSAGE_TEMPLATE}" "TAP_SAVE_DIR=${TAP_SAVE_DIR}"
-printf "${MESSAGE_TEMPLATE}" "end script"
\ No newline at end of file
diff --git a/internal/scripts_br/run_update_git_submodules.sh b/internal/scripts_br/run_update_git_submodules.sh
deleted file mode 100755
index 1b0c74c98..000000000
--- a/internal/scripts_br/run_update_git_submodules.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-git submodule update --init --recursive
\ No newline at end of file
diff --git a/internal/scripts_br/show_git_submodule_config.sh b/internal/scripts_br/show_git_submodule_config.sh
deleted file mode 100755
index 1289082cd..000000000
--- a/internal/scripts_br/show_git_submodule_config.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-git config --local --get-regexp submodule
\ No newline at end of file
diff --git a/internal/scripts_br/venv_create.sh b/internal/scripts_br/venv_create.sh
deleted file mode 100755
index 80a20d63b..000000000
--- a/internal/scripts_br/venv_create.sh
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/bin/bash
-#
-# title: virtual_env_create.sh
-# usage:
-#   cd <repo root>; ./scripts/virtual_env_create.sh
-#
-#   create a virtual environment for the benchmarking repo
-#
-MESSAGE_TEMPLATE='********virtual_env_create.sh: %s\n'
-DATE_OF_SCRIPT=$(date +'%Y%m%dT%H%M%S')
-SCRIPT_DIR="$(dirname "$(realpath "$BASH_SOURCE")")"
-GIT_BRANCH=$(git branch --show-current)
-
-# -----------------------------------------------
-# (1) set script-level parameters
-# ------------------------------------------------
-ENV_DIR=./venv_bionemo_fw
-
-# -----------------------------------------------
-# (2) create venv
-# ------------------------------------------------
-printf "${MESSAGE_TEMPLATE}" "attempt to created a virtual env in directory ${ENV_DIR}"
-
-# --------------------------------------------------
-# on computelab run
-#   (1) cannot run as sudo
-# -------------------------------------------------
-#apt update
-#apt install -y python3 python3-pip python3.10-venv
-
-
-sudo add-apt-repository ppa:deadsnakes/ppa -y
-sudo apt update
-sudo apt install python3.13 python3.13-venv python3.13-dev
-
-
-python3 -m venv ${ENV_DIR}
-
-
-# -----------------------------------------------
-# (3) enter venv
-# ------------------------------------------------
-source ${ENV_DIR}/bin/activate
-printf "${MESSAGE_TEMPLATE}" "you are in virtual env in directory ${ENV_DIR}"
-
-# -----------------------------------------------
-# (3) install pip to virtual environment
-# ------------------------------------------------
-if [[ "$(hostname)" == *viking-prod* ]]; then
-    printf "${MESSAGE_TEMPLATE}" "installing pip inside virtual-environment on viking host"
-    sudo apt update
-    sudo apt install -y python3-pip
-else
-    printf "${MESSAGE_TEMPLATE}" "installing pip inside virtual-environment"
-    apt update python3-pip python3.10-venv
-    apt install -y python3-pip
-fi
-
-
-# -----------------------------------------------
-# (4) pip install
-# ------------------------------------------------
-pip install pre-commit==4.1.0
-
-printf "${MESSAGE_TEMPLATE}" "to enter this virtual env, source ${ENV_DIR}/bin/activate"
-printf "${MESSAGE_TEMPLATE}" "to exit this virtual env, deactivate"
-printf "${MESSAGE_TEMPLATE}" "exiting"
\ No newline at end of file
diff --git a/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/bnm_module_hook_manager.py b/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/bnm_module_hook_manager.py
new file mode 100644
index 000000000..7e9791867
--- /dev/null
+++ b/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/bnm_module_hook_manager.py
@@ -0,0 +1,171 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Callable
+import os
+from threading import stack_size
+from torch import nn
+from torch import Tensor
+
+BNM_MODULE_HOOK_HANDLES = []
+
+
+class BnmModuleHookManager():
+    
+    def configure_hooks(
+        self, 
+        root_module: nn.Module,
+        results_dir: str | None = None,
+        forward_pre_hook_types: list[str] | None = None,
+        forward_hook_types: list[str] | None = None,
+        level_max: int | None = None,
+    ):
+        """Configure hooks. 
+        
+        Args:
+            root_module: The module ancestor to all submodules which should have hooks.
+            forward_pre_hook_types: The types of forward pre hooks to configure # ["input_shapes"]
+            forward_hook_types: The types of forward hooks to configure. #["output_shapes"],
+        """
+        print(f"BnmModuleHookManager,configure_hooks,type(self.module)={type(root_module)}")
+        self.root_module = root_module
+        self.level_max = os.getenv("BNM_MODULE_HOOK_MANAGER_LEVEL_MAX", level_max)  # str or None or int
+        if isinstance(self.level_max, str):
+            self.level_max = int(self.level_max)
+  
+        self.results_dir = os.getenv("BNM_MODULE_HOOK_MANAGER_RESULTS_DIR", results_dir) # str or None
+        self.bnm_module_hook_output_filename = None if self.results_dir is None else os.path.join(str(self.results_dir), f"bnm_module_hook_output_lvl{level_max}.txt")
+    
+        
+        self.forward_pre_hook_types = forward_pre_hook_types
+        self.forward_hook_types = forward_hook_types
+        
+        header_with_column_names = ";".join([
+            "class_to_collect_metrics",
+            "method_name",
+            "level",
+            "hooked_pytorch_module_name",
+            "hooked_function_name",
+            "metric_name",
+            "metric_value",
+        ])
+        self.write_line_to_file(header_with_column_names)
+        
+        BnmModuleHookManager.do_for_each_submodule_bfs(
+            func=self.configure_hooks_for_submodule,
+            module=root_module,
+            level=0,
+            level_max=level_max,
+        )
+   
+    def configure_hooks_for_submodule(self, module: nn.Module, level: int | None = None):
+        """
+        Args:
+            module: A submodule
+            level: The level of the submodule in the subtree of the root module
+        """
+
+            
+        if isinstance(self.forward_pre_hook_types,list) and "input_shapes" in self.forward_pre_hook_types:
+            
+            def forward_pre_hook_for_input_shapes(
+                module: nn.Module, 
+                input: tuple[Tensor]
+            ):
+                message = BnmModuleHookManager.bnm_forward_pre_hook_for_input_shapes_helper(module, input, level)
+                self.write_line_to_file(message)
+
+            BNM_MODULE_HOOK_HANDLES.append(
+                module.register_forward_pre_hook(forward_pre_hook_for_input_shapes)
+            )
+        
+        if isinstance(self.forward_hook_types,list) and "output_shapes" in self.forward_hook_types:
+            
+            def forward_hook_for_output_shapes(
+                module: nn.Module, 
+                input: tuple[Tensor], 
+                output: tuple[Tensor] | Tensor,
+            ):
+                message = BnmModuleHookManager.bnm_forward_hook_for_output_shapes_helper(module, input, output, level)
+                self.write_line_to_file(message)
+            
+            BNM_MODULE_HOOK_HANDLES.append(
+                module.register_forward_hook(forward_hook_for_output_shapes)
+            )
+
+    def write_line_to_file(self, line: str):
+        if self.bnm_module_hook_output_filename is not None:
+            with open(self.bnm_module_hook_output_filename, "a") as f:
+                f.write(line + "\n")
+    
+    @staticmethod
+    def do_for_each_submodule_bfs(
+        func: Callable, 
+        module: nn.Module, 
+        level: int = 0, 
+        level_max: int | None = None
+    ):
+
+        func(module, level)
+        if level_max is None or level+1 <= level_max:
+            for _, child in module.named_children():
+                BnmModuleHookManager.do_for_each_submodule_bfs(
+                    func=func, module=child, level=level + 1, level_max=level_max
+                )
+                
+    @staticmethod
+    def bnm_forward_pre_hook_for_input_shapes_helper(
+        module: nn.Module, input: tuple[Tensor] | Tensor, level: int | None = None
+    ) -> str:
+        some_list_of_strings = ["NA"]
+        if isinstance(input, Tensor):
+            some_list_of_strings = [str(tuple(input.shape))]
+        elif isinstance(input, tuple):
+            some_list_of_strings = [
+                "NA" if not isinstance(input_component, Tensor) else str(tuple(input_component.shape)) 
+                for input_component in input
+            ]
+    
+        input_names_and_shapes = "|".join(some_list_of_strings)    
+        message = ";".join([
+            "BnmModuleHookManager",
+            "bnm_forward_pre_hook_for_input_shapes_helper",
+            f"{level}",
+            f"{module.__class__.__name__}",
+            "forward",
+            "input_shapes",
+            f"{input_names_and_shapes}",
+        ])
+        return message
+        
+    @staticmethod
+    def bnm_forward_hook_for_output_shapes_helper(
+        module: nn.Module, input: tuple[Tensor], output: tuple[Tensor] | Tensor, level: int | None = None
+    ) -> str:
+        some_list_of_strings = ["NA"]
+        if isinstance(output, Tensor):
+            some_list_of_strings = [str(tuple(output.shape))]
+        elif isinstance(output, tuple):
+            some_list_of_strings = ["NA" if not isinstance(output_component, Tensor) else str(tuple(output_component.shape)) for output_component in output]
+        
+        output_names_and_shapes = "|".join(some_list_of_strings)
+        message = ";".join([
+            "BnmModuleHookManager",
+            "bnm_forward_hook_for_output_shapes_helper",
+            f"{level}",
+            f"{module.__class__.__name__}",
+            "forward",
+            "output_shapes",
+            f"{output_names_and_shapes}",
+        ])
+        return message

From 31b39fe245d27044ee2232548fdc01fc831349c1 Mon Sep 17 00:00:00 2001
From: Brian Roland <broland@nvidia.com>
Date: Tue, 2 Sep 2025 15:51:01 +0000
Subject: [PATCH 06/12] br: update to bnm_module_hook_manager in nemo [skip ci]

Signed-off-by: Brian Roland <broland@nvidia.com>
---
 3rdparty/NeMo | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/3rdparty/NeMo b/3rdparty/NeMo
index ea54a221a..7aecc748b 160000
--- a/3rdparty/NeMo
+++ b/3rdparty/NeMo
@@ -1 +1 @@
-Subproject commit ea54a221a495b4dd8afae3993a18167bac42ba3f
+Subproject commit 7aecc748bfd1e941e53c4efd5ec09a2f1c0f872b

From f6121c38f957f66341a79430ea18346434e45a17 Mon Sep 17 00:00:00 2001
From: Brian Roland <broland@nvidia.com>
Date: Tue, 2 Sep 2025 16:15:24 +0000
Subject: [PATCH 07/12] br: [skip ci]

Signed-off-by: Brian Roland <broland@nvidia.com>
---
 .../src/bionemo/evo2/run/train.py             | 28 +++++++++++++++++--
 .../utils/logging/bnm_module_hook_manager.py  | 10 +++++--
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py b/sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py
index b6716514c..792b081bd 100644
--- a/sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py
+++ b/sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py
@@ -54,12 +54,32 @@
 from bionemo.llm.utils.datamodule_utils import infer_global_batch_size
 from bionemo.llm.utils.logger_utils import WandbConfig, setup_nemo_lightning_logger
 
-
-from bionemo.evo2.utils.logging.bnm_module_hook_manager import BNM_MODULE_HOOK_HANDLES
+from bionemo.evo2.utils.logging.bnm_module_hook_manager import BnmModuleHookManager, BNM_MODULE_HOOK_HANDLES
 
 torch._dynamo.config.suppress_errors = True
 
+from bionemo.evo2.utils.logging.bnm_module_hook_manager import BnmModuleHookManager
+
+
+class HyenaModelWithCustomMetrics(llm.HyenaModel):
+    
+    def configure_model(self, vp_stage: Optional[int] = None) -> None:
+        """Add additional configuration for HyenaModel(GPTModel), after GPTModel.configure_model().
+        
+        When this method is called, self.module is the HyenaModel(LanguageModule(MegatronModel))
+        
+        """
+        super(llm.HyenaModel, self).configure_model(vp_stage=vp_stage)
 
+        self.bnm_module_hook_manager = BnmModuleHookManager()
+
+        self.bnm_module_hook_manager.configure_hooks(
+            root_module=self.module,
+            forward_pre_hook_types=["input_shapes"],
+            forward_hook_types=["output_shapes"],
+        )
+    
+    
 def parse_args(args: Optional[List[str]] = None) -> argparse.Namespace:
     """Parse arguments for Evo2 model training."""
     parser = argparse.ArgumentParser(
@@ -633,7 +653,9 @@ def train(args: argparse.Namespace) -> nl.Trainer:
         if args.lora_finetune:
             lora_transform = Evo2LoRA(peft_ckpt_path=args.lora_checkpoint_path)
         print("********************train: init llm.HyenaModel*******")
-        model = llm.HyenaModel(model_config, tokenizer=data_module.tokenizer, model_transform=lora_transform)
+        #model = llm.HyenaModel(model_config, tokenizer=data_module.tokenizer, model_transform=lora_transform)
+        model = HyenaModelWithCustomMetrics(model_config, tokenizer=data_module.tokenizer, model_transform=lora_transform)
+    
     else:  # mamba
         if args.no_weight_decay_embeddings:
             config_modifiers_init["hyena_no_weight_decay_cond_fn"] = mamba_no_weight_decay_cond_with_embeddings
diff --git a/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/bnm_module_hook_manager.py b/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/bnm_module_hook_manager.py
index 7e9791867..6db97d43d 100644
--- a/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/bnm_module_hook_manager.py
+++ b/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/bnm_module_hook_manager.py
@@ -39,14 +39,18 @@ def configure_hooks(
         """
         print(f"BnmModuleHookManager,configure_hooks,type(self.module)={type(root_module)}")
         self.root_module = root_module
+        import pdb; pdb.set_trace()
         self.level_max = os.getenv("BNM_MODULE_HOOK_MANAGER_LEVEL_MAX", level_max)  # str or None or int
         if isinstance(self.level_max, str):
             self.level_max = int(self.level_max)
   
         self.results_dir = os.getenv("BNM_MODULE_HOOK_MANAGER_RESULTS_DIR", results_dir) # str or None
-        self.bnm_module_hook_output_filename = None if self.results_dir is None else os.path.join(str(self.results_dir), f"bnm_module_hook_output_lvl{level_max}.txt")
+        self.bnm_module_hook_output_filename = None 
+        if self.results_dir is not None:
+            self.bnm_module_hook_output_filename = os.path.join(
+                str(self.results_dir), f"bnm_module_hook_output_lvl{self.level_max}.txt"
+            )
     
-        
         self.forward_pre_hook_types = forward_pre_hook_types
         self.forward_hook_types = forward_hook_types
         
@@ -65,7 +69,7 @@ def configure_hooks(
             func=self.configure_hooks_for_submodule,
             module=root_module,
             level=0,
-            level_max=level_max,
+            level_max=self.level_max,
         )
    
     def configure_hooks_for_submodule(self, module: nn.Module, level: int | None = None):

From df85fafa30e637d7e2d83ad9b265ae2ecc1d6f08 Mon Sep 17 00:00:00 2001
From: Brian Roland <broland@nvidia.com>
Date: Wed, 17 Sep 2025 18:14:37 +0000
Subject: [PATCH 08/12] br: module_hook_manager

Signed-off-by: Brian Roland <broland@nvidia.com>
---
 .../evo2/utils/logging/bnm_module_hook_manager.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/bnm_module_hook_manager.py b/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/bnm_module_hook_manager.py
index 6db97d43d..e83ad3816 100644
--- a/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/bnm_module_hook_manager.py
+++ b/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/bnm_module_hook_manager.py
@@ -39,7 +39,6 @@ def configure_hooks(
         """
         print(f"BnmModuleHookManager,configure_hooks,type(self.module)={type(root_module)}")
         self.root_module = root_module
-        import pdb; pdb.set_trace()
         self.level_max = os.getenv("BNM_MODULE_HOOK_MANAGER_LEVEL_MAX", level_max)  # str or None or int
         if isinstance(self.level_max, str):
             self.level_max = int(self.level_max)
@@ -111,6 +110,7 @@ def write_line_to_file(self, line: str):
         if self.bnm_module_hook_output_filename is not None:
             with open(self.bnm_module_hook_output_filename, "a") as f:
                 f.write(line + "\n")
+                f.flush() 
     
     @staticmethod
     def do_for_each_submodule_bfs(
@@ -140,12 +140,16 @@ def bnm_forward_pre_hook_for_input_shapes_helper(
                 for input_component in input
             ]
     
-        input_names_and_shapes = "|".join(some_list_of_strings)    
+        input_names_and_shapes = "|".join(some_list_of_strings)
+        
+        module_name = f"{module.__class__.__name__}"
+        if hasattr(module, "operator_type"):
+            module_name += f"-{module.operator_type}"
         message = ";".join([
             "BnmModuleHookManager",
             "bnm_forward_pre_hook_for_input_shapes_helper",
             f"{level}",
-            f"{module.__class__.__name__}",
+            module_name,
             "forward",
             "input_shapes",
             f"{input_names_and_shapes}",
@@ -163,11 +167,14 @@ def bnm_forward_hook_for_output_shapes_helper(
             some_list_of_strings = ["NA" if not isinstance(output_component, Tensor) else str(tuple(output_component.shape)) for output_component in output]
         
         output_names_and_shapes = "|".join(some_list_of_strings)
+        module_name = f"{module.__class__.__name__}"
+        if hasattr(module, "operator_type"):
+            module_name += f"-{module.operator_type}"
         message = ";".join([
             "BnmModuleHookManager",
             "bnm_forward_hook_for_output_shapes_helper",
             f"{level}",
-            f"{module.__class__.__name__}",
+            module_name,
             "forward",
             "output_shapes",
             f"{output_names_and_shapes}",

From caf2ab601ebe54b8f2ad615a1b8bd3525b456dc5 Mon Sep 17 00:00:00 2001
From: Brian Roland <broland@nvidia.com>
Date: Wed, 17 Sep 2025 18:31:33 +0000
Subject: [PATCH 09/12] br: after merge from main

Signed-off-by: Brian Roland <broland@nvidia.com>
---
 sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py b/sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py
index 8a9f6cff9..009a71dbb 100644
--- a/sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py
+++ b/sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py
@@ -818,17 +818,9 @@ def train(args: argparse.Namespace) -> nl.Trainer:
         lora_transform = None
         if args.lora_finetune:
             lora_transform = Evo2LoRA(peft_ckpt_path=args.lora_checkpoint_path)
-<<<<<<< HEAD
-        print("********************train: init llm.HyenaModel*******")
-        #model = llm.HyenaModel(model_config, tokenizer=data_module.tokenizer, model_transform=lora_transform)
-        model = HyenaModelWithCustomMetrics(model_config, tokenizer=data_module.tokenizer, model_transform=lora_transform)
-    
-    else:  # mamba
-=======
 
         model = llm.HyenaModel(model_config, tokenizer=data_module.tokenizer, model_transform=lora_transform)
     elif model_type == "mamba":  # mamba
->>>>>>> main
         if args.no_weight_decay_embeddings:
             config_modifiers_init["hyena_no_weight_decay_cond_fn"] = mamba_no_weight_decay_cond_with_embeddings
         config_modifiers_init["lowercase_loss_reweighting"] = args.mamba_lowercase_loss_weight

From 1b9cf7cc509bfb1e794d68a8e25a60e6a203de64 Mon Sep 17 00:00:00 2001
From: Brian Roland <broland@nvidia.com>
Date: Wed, 17 Sep 2025 21:58:57 +0000
Subject: [PATCH 10/12] br: put in rearrange decorator

Signed-off-by: Brian Roland <broland@nvidia.com>
---
 .../src/bionemo/evo2/run/train.py             |  28 +----
 .../utils/logging/bnm_module_hook_manager.py  | 108 ++++++++++++++----
 2 files changed, 93 insertions(+), 43 deletions(-)

diff --git a/sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py b/sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py
index 009a71dbb..722960f85 100644
--- a/sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py
+++ b/sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py
@@ -57,31 +57,11 @@
 from bionemo.llm.utils.logger_utils import WandbConfig, setup_nemo_lightning_logger
 
 from bionemo.evo2.utils.logging.bnm_module_hook_manager import BnmModuleHookManager, BNM_MODULE_HOOK_HANDLES
+from bionemo.evo2.utils.logging.hyena_model_with_custom_metrics import HyenaModelWithCustomMetrics
 
 torch._dynamo.config.suppress_errors = True
 
-from bionemo.evo2.utils.logging.bnm_module_hook_manager import BnmModuleHookManager
 
-
-class HyenaModelWithCustomMetrics(llm.HyenaModel):
-    
-    def configure_model(self, vp_stage: Optional[int] = None) -> None:
-        """Add additional configuration for HyenaModel(GPTModel), after GPTModel.configure_model().
-        
-        When this method is called, self.module is the HyenaModel(LanguageModule(MegatronModel))
-        
-        """
-        super(llm.HyenaModel, self).configure_model(vp_stage=vp_stage)
-
-        self.bnm_module_hook_manager = BnmModuleHookManager()
-
-        self.bnm_module_hook_manager.configure_hooks(
-            root_module=self.module,
-            forward_pre_hook_types=["input_shapes"],
-            forward_hook_types=["output_shapes"],
-        )
-    
-    
 def parse_args(args: Optional[List[str]] = None) -> argparse.Namespace:
     """Parse arguments for Evo2 model training."""
     parser = argparse.ArgumentParser(
@@ -819,7 +799,11 @@ def train(args: argparse.Namespace) -> nl.Trainer:
         if args.lora_finetune:
             lora_transform = Evo2LoRA(peft_ckpt_path=args.lora_checkpoint_path)
 
-        model = llm.HyenaModel(model_config, tokenizer=data_module.tokenizer, model_transform=lora_transform)
+        import os
+        if os.getenv("BNM_MODULE_HOOK_MANAGER_LEVEL_MAX","") != "":
+            model = HyenaModelWithCustomMetrics(model_config, tokenizer=data_module.tokenizer, model_transform=lora_transform)
+        else:
+            model = llm.HyenaModel(model_config, tokenizer=data_module.tokenizer, model_transform=lora_transform)
     elif model_type == "mamba":  # mamba
         if args.no_weight_decay_embeddings:
             config_modifiers_init["hyena_no_weight_decay_cond_fn"] = mamba_no_weight_decay_cond_with_embeddings
diff --git a/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/bnm_module_hook_manager.py b/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/bnm_module_hook_manager.py
index e83ad3816..3a3c21c7e 100644
--- a/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/bnm_module_hook_manager.py
+++ b/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/bnm_module_hook_manager.py
@@ -11,15 +11,20 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Callable
+from typing import Callable, Optional
 import os
 from threading import stack_size
 from torch import nn
 from torch import Tensor
+import functools
+from einops import rearrange
 
 BNM_MODULE_HOOK_HANDLES = []
 
 
+_original_rearrange = rearrange
+
+
 class BnmModuleHookManager():
     
     def configure_hooks(
@@ -49,7 +54,8 @@ def configure_hooks(
             self.bnm_module_hook_output_filename = os.path.join(
                 str(self.results_dir), f"bnm_module_hook_output_lvl{self.level_max}.txt"
             )
-    
+        global BNM_MODULE_HOOK_OUTPUT_FILENAME
+        BNM_MODULE_HOOK_OUTPUT_FILENAME = self.bnm_module_hook_output_filename
         self.forward_pre_hook_types = forward_pre_hook_types
         self.forward_hook_types = forward_hook_types
         
@@ -62,14 +68,17 @@ def configure_hooks(
             "metric_name",
             "metric_value",
         ])
-        self.write_line_to_file(header_with_column_names)
-        
+        BnmModuleHookManager.write_line_to_file(
+            filename=self.bnm_module_hook_output_filename,
+            line=header_with_column_names,
+        )   
         BnmModuleHookManager.do_for_each_submodule_bfs(
             func=self.configure_hooks_for_submodule,
             module=root_module,
             level=0,
             level_max=self.level_max,
         )
+
    
     def configure_hooks_for_submodule(self, module: nn.Module, level: int | None = None):
         """
@@ -86,7 +95,10 @@ def forward_pre_hook_for_input_shapes(
                 input: tuple[Tensor]
             ):
                 message = BnmModuleHookManager.bnm_forward_pre_hook_for_input_shapes_helper(module, input, level)
-                self.write_line_to_file(message)
+                BnmModuleHookManager.write_line_to_file(
+                    filename=self.bnm_module_hook_output_filename,
+                    line=message,
+                )
 
             BNM_MODULE_HOOK_HANDLES.append(
                 module.register_forward_pre_hook(forward_pre_hook_for_input_shapes)
@@ -100,15 +112,18 @@ def forward_hook_for_output_shapes(
                 output: tuple[Tensor] | Tensor,
             ):
                 message = BnmModuleHookManager.bnm_forward_hook_for_output_shapes_helper(module, input, output, level)
-                self.write_line_to_file(message)
+                BnmModuleHookManager.write_line_to_file(
+                    filename=self.bnm_module_hook_output_filename,
+                    line=message,
+                )
             
             BNM_MODULE_HOOK_HANDLES.append(
                 module.register_forward_hook(forward_hook_for_output_shapes)
             )
-
-    def write_line_to_file(self, line: str):
-        if self.bnm_module_hook_output_filename is not None:
-            with open(self.bnm_module_hook_output_filename, "a") as f:
+    @staticmethod
+    def write_line_to_file(filename: str, line: str):
+        if filename is not None:
+            with open(filename, "a") as f:
                 f.write(line + "\n")
                 f.flush() 
     
@@ -126,11 +141,9 @@ def do_for_each_submodule_bfs(
                 BnmModuleHookManager.do_for_each_submodule_bfs(
                     func=func, module=child, level=level + 1, level_max=level_max
                 )
-                
+    
     @staticmethod
-    def bnm_forward_pre_hook_for_input_shapes_helper(
-        module: nn.Module, input: tuple[Tensor] | Tensor, level: int | None = None
-    ) -> str:
+    def arg_names_and_shapes_as_str(input: Tensor | tuple[Tensor]):
         some_list_of_strings = ["NA"]
         if isinstance(input, Tensor):
             some_list_of_strings = [str(tuple(input.shape))]
@@ -141,6 +154,14 @@ def bnm_forward_pre_hook_for_input_shapes_helper(
             ]
     
         input_names_and_shapes = "|".join(some_list_of_strings)
+        return input_names_and_shapes
+        
+                
+    @staticmethod
+    def bnm_forward_pre_hook_for_input_shapes_helper(
+        module: nn.Module, input: tuple[Tensor] | Tensor, level: int | None = None
+    ) -> str:
+        input_names_and_shapes = BnmModuleHookManager.arg_names_and_shapes_as_str(input)
         
         module_name = f"{module.__class__.__name__}"
         if hasattr(module, "operator_type"):
@@ -160,13 +181,8 @@ def bnm_forward_pre_hook_for_input_shapes_helper(
     def bnm_forward_hook_for_output_shapes_helper(
         module: nn.Module, input: tuple[Tensor], output: tuple[Tensor] | Tensor, level: int | None = None
     ) -> str:
-        some_list_of_strings = ["NA"]
-        if isinstance(output, Tensor):
-            some_list_of_strings = [str(tuple(output.shape))]
-        elif isinstance(output, tuple):
-            some_list_of_strings = ["NA" if not isinstance(output_component, Tensor) else str(tuple(output_component.shape)) for output_component in output]
-        
-        output_names_and_shapes = "|".join(some_list_of_strings)
+
+        output_names_and_shapes = BnmModuleHookManager.arg_names_and_shapes_as_str(output)
         module_name = f"{module.__class__.__name__}"
         if hasattr(module, "operator_type"):
             module_name += f"-{module.operator_type}"
@@ -180,3 +196,53 @@ def bnm_forward_hook_for_output_shapes_helper(
             f"{output_names_and_shapes}",
         ])
         return message
+
+
+    @staticmethod
+    def shape_logger(func):
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            # first argument is usually the tensor/array
+            input_ = args[0]
+
+            input_names_and_shapes = BnmModuleHookManager.arg_names_and_shapes_as_str(input_)
+            message = ";".join([
+                "BnmModuleHookManager",
+                "bnm_forward_hook_for_output_shapes_helper",
+                "?",
+                "rearrange",
+                "forward",
+                "input_shapes",
+                f"{input_names_and_shapes}",
+            ])
+            
+            BnmModuleHookManager.write_line_to_file(
+                filename=BNM_MODULE_HOOK_OUTPUT_FILENAME,
+                line=message,
+            )
+
+
+            result = func(*args, **kwargs)
+
+            result_names_and_shapes = BnmModuleHookManager.arg_names_and_shapes_as_str(result)
+            result_message = ";".join([
+                "BnmModuleHookManager",
+                "bnm_forward_hook_for_output_shapes_helper",
+                "?",
+                "rearrange",
+                "forward",
+                "output_shapes",
+                f"{result_names_and_shapes}",
+            ])
+            
+            BnmModuleHookManager.write_line_to_file(
+                filename=BNM_MODULE_HOOK_OUTPUT_FILENAME,
+                line=result_message,
+            )
+        
+        
+            return result
+        return wrapper
+
+
+rearrange = BnmModuleHookManager.shape_logger(_original_rearrange)

From 6691d8465287f272a21cb0865e7d4adb991c4168 Mon Sep 17 00:00:00 2001
From: Brian Roland <broland@nvidia.com>
Date: Tue, 23 Sep 2025 17:25:01 +0000
Subject: [PATCH 11/12] br: before merge from main

Signed-off-by: Brian Roland <broland@nvidia.com>
---
 sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py  | 9 ++++++---
 .../evo2/utils/logging/bnm_module_hook_manager.py        | 2 +-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py b/sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py
index 722960f85..481e1a2eb 100644
--- a/sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py
+++ b/sub-packages/bionemo-evo2/src/bionemo/evo2/run/train.py
@@ -57,7 +57,8 @@
 from bionemo.llm.utils.logger_utils import WandbConfig, setup_nemo_lightning_logger
 
 from bionemo.evo2.utils.logging.bnm_module_hook_manager import BnmModuleHookManager, BNM_MODULE_HOOK_HANDLES
-from bionemo.evo2.utils.logging.hyena_model_with_custom_metrics import HyenaModelWithCustomMetrics
+#from bionemo.evo2.utils.logging.hyena_model_with_custom_metrics import HyenaModelWithCustomMetrics
+from bionemo.evo2.utils.logging.hyena_model_with_call_stack_monitor import HyenaModelWithCallStackMonitor
 
 torch._dynamo.config.suppress_errors = True
 
@@ -800,8 +801,10 @@ def train(args: argparse.Namespace) -> nl.Trainer:
             lora_transform = Evo2LoRA(peft_ckpt_path=args.lora_checkpoint_path)
 
         import os
-        if os.getenv("BNM_MODULE_HOOK_MANAGER_LEVEL_MAX","") != "":
-            model = HyenaModelWithCustomMetrics(model_config, tokenizer=data_module.tokenizer, model_transform=lora_transform)
+        if os.getenv("BNM_CALL_STACK_MONITOR_LEVEL_MAX","") != "":
+            model = HyenaModelWithCallStackMonitor(model_config, tokenizer=data_module.tokenizer, model_transform=lora_transform)
+        # elif os.getenv("BNM_MODULE_HOOK_MANAGER_LEVEL_MAX","") != "":
+        #     model = HyenaModelWithCustomMetrics(model_config, tokenizer=data_module.tokenizer, model_transform=lora_transform)
         else:
             model = llm.HyenaModel(model_config, tokenizer=data_module.tokenizer, model_transform=lora_transform)
     elif model_type == "mamba":  # mamba
diff --git a/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/bnm_module_hook_manager.py b/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/bnm_module_hook_manager.py
index 3a3c21c7e..0ab26001a 100644
--- a/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/bnm_module_hook_manager.py
+++ b/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/bnm_module_hook_manager.py
@@ -245,4 +245,4 @@ def wrapper(*args, **kwargs):
         return wrapper
 
 
-rearrange = BnmModuleHookManager.shape_logger(_original_rearrange)
+# rearrange = BnmModuleHookManager.shape_logger(_original_rearrange)

From c46d0e7ce7072bb79c7da6b5c644b968d6490941 Mon Sep 17 00:00:00 2001
From: Brian Roland <broland@nvidia.com>
Date: Tue, 23 Sep 2025 17:27:23 +0000
Subject: [PATCH 12/12] br: call-stack-monitor tools

Signed-off-by: Brian Roland <broland@nvidia.com>
---
 .../utils/logging/bnm_call_stack_monitor.py   | 283 ++++++++++++++++++
 .../hyena_model_with_call_stack_monitor.py    |  43 +++
 .../hyena_model_with_custom_metrics.py        |  21 ++
 .../logging/run_bnm_call_stack_monitor.py     |  26 ++
 4 files changed, 373 insertions(+)
 create mode 100644 sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/bnm_call_stack_monitor.py
 create mode 100644 sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/hyena_model_with_call_stack_monitor.py
 create mode 100644 sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/hyena_model_with_custom_metrics.py
 create mode 100644 sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/run_bnm_call_stack_monitor.py

diff --git a/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/bnm_call_stack_monitor.py b/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/bnm_call_stack_monitor.py
new file mode 100644
index 000000000..ac97a3644
--- /dev/null
+++ b/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/bnm_call_stack_monitor.py
@@ -0,0 +1,283 @@
+import os
+import sys
+import inspect
+from torch import Tensor
+                    
+EVENT_TYPE = "event_type"
+LEVEL_OF_CALL_FRAME = "level_of_call_frame"
+
+
+class BnmCallStackMonitor():
+    def __init__(self, results_dir: str | None = None,):
+
+        self.level_max = os.getenv("BNM_CALL_STACK_MONITOR_LEVEL_MAX", 9)  # str or None or int
+        if isinstance(self.level_max, str):
+            self.level_max = int(self.level_max)
+
+        self.num_events_max = None
+    
+        self.results_dir = os.getenv("BNM_CALL_STACK_MONITOR_RESULTS_DIR", results_dir) # str or None
+        self.results_filename = None 
+        if self.results_dir is not None:
+            self.results_filename = os.path.join(
+                str(self.results_dir), f"bnm_call_stack_monitor_output.txt"
+            )
+        global BNM_CALL_STACK_MONITOR_OUTPUT_FILENAME
+        BNM_CALL_STACK_MONITOR_OUTPUT_FILENAME = self.results_filename
+        
+    def start_monitoring(self):
+        global CALL_STACK_EVENTS 
+        CALL_STACK_EVENTS = []
+        
+        prof = create_profiler_with_function_io_metrics(CALL_STACK_EVENTS, level_max = self.level_max, num_events_max=self.num_events_max)
+        sys.setprofile(prof)
+    
+    def stop_monitoring(self):
+        sys.setprofile(None)
+    
+    @property
+    def call_stack_events(self):
+        return CALL_STACK_EVENTS
+    
+    def delete_call_stack_events(self):
+        del CALL_STACK_EVENTS
+    
+    def write_events_to_file(self):
+        if self.results_filename is None:
+            raise Exception
+                
+        header_with_column_names = ";".join([
+            "class_to_collect_metrics",
+            "level",
+            "module_or_class_name_short",
+            "func_name",
+            "frame_id",
+            EVENT_TYPE,
+            "event_id",
+            "metric_name",
+            "metric_value",
+            "is_class_name_in_black_list",
+            "is_function_name_in_blacklist",
+            "class_name_long",
+            "location",
+        ])
+        BnmCallStackMonitor.write_line_to_file(
+            filename=self.results_filename,
+            line=header_with_column_names,
+        )  
+        
+        for event in self.call_stack_events:
+            message_as_line = ";".join([str(x) for x in [
+                "BnmCallStackMonitor",
+                event[LEVEL_OF_CALL_FRAME],
+                event["class_name_short"],
+                event["func_name"],
+                event["frame_id"],
+                event[EVENT_TYPE],
+                event["event_id"],
+                event["metric_name"],
+                event["metric_value"],
+                event["is_class_name_in_black_list"],
+                event["is_function_name_in_blacklist"],
+                event["class_name_long"],
+                event["location"],
+            ]])
+                
+            BnmCallStackMonitor.write_line_to_file(
+                filename=self.results_filename,
+                line=message_as_line,
+            )  
+
+    @staticmethod
+    def write_line_to_file(filename: str, line: str):
+        if filename is not None:
+            with open(filename, "a") as f:
+                f.write(line + "\n")
+                f.flush() 
+    
+ 
+def create_brief_module_name(frame):
+     
+    frame_code_filename = f"{frame.f_code.co_filename}"
+    for x in ["dist-packages/", "3rdparty/"]:
+        if x in frame_code_filename:
+            frame_code_filename = frame_code_filename.split(x)[-1]
+            break
+    
+    frame_code_filename = frame_code_filename.rstrip(".py")
+    split_result = frame_code_filename.split("/")
+    
+    if len(split_result) <= 2:
+        out =  ".".join(split_result)
+    else:
+        out = "...".join([split_result[0], split_result[-2] ]) 
+    return out
+    
+
+def create_profiler_with_function_io_metrics(call_stack_events: list, num_events_max: int= 50, level_max: int = 9):
+    """
+    Returns a profiling function that logs inputs and outputs of every function call.
+    
+    Use the returned function like:
+    
+    prof = create_profiler_with_function_io_metrics(CALL_STACK_EVENTS)
+    sys.setprofile(prof)
+    
+    """
+
+    def profiler(frame, event_type, arg):
+        
+        if isinstance(num_events_max, int) and len(call_stack_events) >= num_events_max:
+            return
+        
+        func_name = frame.f_code.co_name
+        func_loc = f"{frame.f_code.co_filename}:{frame.f_lineno}"
+        args, _, _, values = inspect.getargvalues(frame)
+        frame_args_as_dict = {k: values[k] for k in args}
+        
+        brief_module_name = create_brief_module_name(frame)
+        
+        is_an_input_a_tensor = any([isinstance(v, Tensor) for v  in frame_args_as_dict.values()])
+        
+        if not is_an_input_a_tensor:
+            return
+        
+        # FILEPATH_KEY_WHITELIST = ["NeMo", "Megatron", "evo2", "einops"]
+        # does_func_loc_contain_key_from_whitelist = any([x in func_loc for x in FILEPATH_KEY_WHITELIST])
+        # if not does_func_loc_contain_key_from_whitelist:
+        #     return
+        
+        FUNCTION_NAME_BLACKLIST = [
+            "nvtx_range_push", 
+            "nvtx_range_pop", 
+            "__hash__", 
+            "maybe_contiguous", 
+            "cast_if_needed", 
+            "cast", 
+            "shape", 
+            "<lambda>", 
+            "reset_swizzled_inputs", "swizzle_inputs", "set_activation_dtype", 
+            "is_appropriate_type",
+            "convert_tensor",
+            "get_backend",
+            "_apply_recipe",
+            "_check_single_tensor",
+            "make_viewless_tensor",
+            "make_upper_case",
+            "reduce_from_tensor_model_parallel_region",
+            "fused_apply_rotary_pos_emb",
+            "reduce_from_tensor_model_parallel_region",
+            "copy_to_tensor_model_parallel_region",
+    
+        ]
+        is_function_name_in_blacklist = any([x in func_name for x  in FUNCTION_NAME_BLACKLIST])
+        
+        is_class_method, class_name_long, _ = frame_is_class_method(frame)
+        brief_module_name = create_brief_module_name(frame)
+        class_name_short = brief_module_name if class_name_long is None else ".".join(class_name_long.split(".")[-1:])
+        
+        CLASS_NAME_BLACKLIST = [
+            "SymNumberMemoDescriptor", 
+            "MetaTensorDescriber",
+            "WeakIdRef", 
+            "WeakIdKeyDictionary", 
+            "FakeTensor", 
+            "OperationFuser",
+            "IdentityOp",
+        ]
+        is_class_name_in_black_list =  any([class_name_short==x for x in CLASS_NAME_BLACKLIST])
+        
+        
+        level_of_call_frame = None
+        metric_name = None
+        metric_value = None
+        if event_type not in ["call", "return"]:
+            return
+        
+        elif event_type == "call":
+            if len(call_stack_events) == 0:
+                level_of_call_frame = 0
+            elif call_stack_events[-1][EVENT_TYPE] == "call":
+                level_of_call_frame = call_stack_events[-1][LEVEL_OF_CALL_FRAME] + 1
+                if level_of_call_frame > level_max:
+                    # do not create event
+                    return
+            elif call_stack_events[-1][EVENT_TYPE] == "return":
+                level_of_call_frame = call_stack_events[-1][LEVEL_OF_CALL_FRAME]
+            
+            metric_name ="input_shapes"
+            metric_value = "|".join([
+                f"{k}={tuple(v.shape)}" for k, v in frame_args_as_dict.items() if isinstance(v, Tensor)
+            ])
+            
+        elif event_type == "return":
+            
+            if len(call_stack_events) == 0:
+                # return from function containing sys.profiler(prof) will trigger
+                return
+            elif call_stack_events[-1][EVENT_TYPE] == "call":
+                level_of_call_frame = call_stack_events[-1][LEVEL_OF_CALL_FRAME]
+            elif call_stack_events[-1][EVENT_TYPE] == "return":
+                level_of_call_frame = call_stack_events[-1][LEVEL_OF_CALL_FRAME] - 1
+            
+            metric_name = "output_shapes"
+            metric_value = f"NA"
+            if isinstance(arg, Tensor):
+                metric_value = f"{tuple(arg.shape)}"
+            elif isinstance(arg, tuple):
+                metric_value = "|".join([f"{tuple(v.shape)}" for v in arg if isinstance(v, Tensor)])
+
+        frame_id = str(id(frame))
+        event_dict = {
+            LEVEL_OF_CALL_FRAME: level_of_call_frame,
+            "class_name_short": class_name_short,
+            "func_name": func_name,
+            EVENT_TYPE: event_type,
+            "frame_id": frame_id,
+            "event_id": "|".join([class_name_short, func_name, frame_id, event_type]),
+            "metric_name": metric_name,
+            "metric_value": metric_value,
+            "is_class_name_in_black_list": is_class_name_in_black_list,
+            "is_function_name_in_blacklist": is_function_name_in_blacklist,
+            "class_name_long": class_name_long,
+            "location": func_loc,
+            
+        }
+        call_stack_events.append(event_dict)
+        #print(f"{event_dict}")
+
+    return profiler
+
+
+def frame_is_class_method(frame=None):
+    """
+    Returns (is_method: bool, class, function_name)
+    is_method = True if frame is an instance or class method
+    class = the class object if available, else None
+    function_name = name of the function in the frame
+    """
+
+
+    if frame is  None:
+        return False, None, None
+    else:    
+        locals_ = frame.f_locals
+        func_name = frame.f_code.co_name
+
+        # Check for instance method (has 'self')
+        if 'self' in locals_:
+            cls = type(locals_['self'])
+            class_name_long = str(cls).split("\'")[1]
+
+            return True, class_name_long, func_name
+
+        # Check for class method (has 'cls')
+        if 'cls' in locals_:
+            cls = locals_['cls']
+            class_name_long = str(cls).split("\'")[1]
+
+            return True, class_name_long, func_name
+
+        # Static method or free function
+        return False, None, func_name
+    
\ No newline at end of file
diff --git a/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/hyena_model_with_call_stack_monitor.py b/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/hyena_model_with_call_stack_monitor.py
new file mode 100644
index 000000000..b8c0853de
--- /dev/null
+++ b/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/hyena_model_with_call_stack_monitor.py
@@ -0,0 +1,43 @@
+from typing import Optional
+from torch import nn, Tensor
+from nemo.collections import llm
+from bionemo.evo2.utils.logging.bnm_call_stack_monitor import BnmCallStackMonitor
+
+
+class HyenaModelWithCallStackMonitor(llm.HyenaModel):
+    
+    def configure_model(self, vp_stage: Optional[int] = None) -> None:
+        """Add additional configuration for HyenaModel(GPTModel), after GPTModel.configure_model().
+        
+        When this method is called, self.module is the HyenaModel(LanguageModule(MegatronModel))
+        
+        """
+        super(llm.HyenaModel, self).configure_model(vp_stage=vp_stage)
+
+        global BNM_CALL_STACK_MONITOR_HOOKS
+        BNM_CALL_STACK_MONITOR_HOOKS = []
+
+        def forward_pre_hook(module: nn.Module, input: Tensor | tuple[Tensor]):
+            if not hasattr(module, "bnm_call_stack_monitor"):
+                module.bnm_call_stack_monitor = BnmCallStackMonitor()
+                module.bnm_call_stack_monitor.start_monitoring()
+        
+        def forward_hook(module: nn.Module, input: Tensor | tuple[Tensor], output: Tensor | tuple[Tensor]):
+            if hasattr(module, "bnm_call_stack_monitor"):
+                module.bnm_call_stack_monitor.stop_monitoring()
+                module.bnm_call_stack_monitor.write_events_to_file()
+                
+        BNM_CALL_STACK_MONITOR_HOOKS.append(
+            self.module.register_forward_pre_hook(forward_pre_hook)
+        )
+        BNM_CALL_STACK_MONITOR_HOOKS.append(
+            self.module.register_forward_hook(forward_hook)
+        )
+
+
+
+
+
+        
+        
+
diff --git a/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/hyena_model_with_custom_metrics.py b/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/hyena_model_with_custom_metrics.py
new file mode 100644
index 000000000..fa4fddddd
--- /dev/null
+++ b/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/hyena_model_with_custom_metrics.py
@@ -0,0 +1,21 @@
+from typing import Optional
+from nemo.collections import llm
+from bionemo.evo2.utils.logging.bnm_module_hook_manager import BnmModuleHookManager
+
+class HyenaModelWithCustomMetrics(llm.HyenaModel):
+    
+    def configure_model(self, vp_stage: Optional[int] = None) -> None:
+        """Add additional configuration for HyenaModel(GPTModel), after GPTModel.configure_model().
+        
+        When this method is called, self.module is the HyenaModel(LanguageModule(MegatronModel))
+        
+        """
+        super(llm.HyenaModel, self).configure_model(vp_stage=vp_stage)
+
+        self.bnm_module_hook_manager = BnmModuleHookManager()
+
+        self.bnm_module_hook_manager.configure_hooks(
+            root_module=self.module,
+            forward_pre_hook_types=["input_shapes"],
+            forward_hook_types=["output_shapes"],
+        )
diff --git a/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/run_bnm_call_stack_monitor.py b/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/run_bnm_call_stack_monitor.py
new file mode 100644
index 000000000..043fc791b
--- /dev/null
+++ b/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/logging/run_bnm_call_stack_monitor.py
@@ -0,0 +1,26 @@
+import sys
+from bionemo.evo2.utils.logging.bnm_call_stack_monitor import BnmCallStackMonitor
+
+
+# Example usage
+def foo(x, y):
+    return bar(x) + y
+
+
+def bar(z):
+    return z * 2
+
+def main():
+
+    monitor = BnmCallStackMonitor()
+    monitor.start_monitoring()
+
+    result = foo(3, 4)
+
+    monitor.stop_monitoring()
+
+    monitor.write_events_to_file()
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file