Fix ESM2 README

ohadmo · ohadmo · commit 1ae584ea3a7d · 2025-10-01T19:20:30.000-07:00
diff --git a/bionemo-recipes/models/esm2/.dockerignore b/bionemo-recipes/models/esm2/.dockerignore
@@ -1,3 +1,4 @@
 Dockerfile
 README.md
-checkpoint_export/
+hf_to_te_checkpoint_export/
+te_to_hf_checkpoint_export/
diff --git a/bionemo-recipes/models/esm2/README.md b/bionemo-recipes/models/esm2/README.md
@@ -16,7 +16,7 @@ The ESM-2 implementation natively supports the following TransformerEngine-provi
 | **Sequence Packing / THD input format** | ✅ Supported                                                                     |
 | **FP8 with THD input format**           | ✅ Supported where FP8 is supported                                              |
 | **Import from HuggingFace checkpoints** | ✅ Supported                                                                     |
-| **Export to HuggingFace checkpoints**   | 🚧 Under development                                                             |
+| **Export to HuggingFace checkpoints**   | ✅ Under development                                                             |
 
 See [BioNemo Recipes](../../recipes/README.md) for more details on how to use these features to accelerate model
 training and inference.
@@ -77,17 +77,108 @@ Training recipes are available in the `bionemo-recipes/recipes/` directory:
 Generate converted ESM-2 checkpoints from existing HuggingFace transformers checkpoints:
 
 ```bash
-mkdir -p checkpoint_export
+mkdir -p hf_to_te_checkpoint_export
 docker build -t esm2 .
 docker run --rm -it --gpus all \
-  -v $PWD/checkpoint_export/:/workspace/bionemo/checkpoint_export \
+  -v $PWD/hf_to_te_checkpoint_export/:/workspace/bionemo/hf_to_te_checkpoint_export \
   -v $HOME/.cache/huggingface/:/root/.cache/huggingface \
-  esm2 python export.py
+  esm2 python export.py hf-to-te
 ```
 
 ### TE to HF Transformers conversion
 
-(Coming soon)
+```bash
+MODEL_TAG=esm2_t6_8M_UR50D # specify which model to convert
+mkdir -p te_to_hf_checkpoint_export
+docker build -t esm2 .
+docker run --rm -it --gpus all \
+  -v $PWD/te_to_hf_checkpoint_export/:/workspace/bionemo/te_to_hf_checkpoint_export \
+  -v $PWD/hf_to_te_checkpoint_export/$MODEL_TAG:/workspace/bionemo/hf_to_te_checkpoint_export/$MODEL_TAG \
+  -v $HOME/.cache/huggingface/:/root/.cache/huggingface \
+  esm2 python export.py te-to-hf --checkpoint-path /workspace/bionemo/hf_to_te_checkpoint_export/$MODEL_TAG
+```
+
+## Developer Conversion Workflow
+
+This section explains how to convert between Hugging Face and Transformer Engine (TE) ESM2 model formats. The process demonstrates bidirectional conversion: from Hugging Face to TE format for optimized inference, and back to Hugging Face format for sharing and deployment. The workflow involves several key steps:
+
+### Step 1: Load Original Hugging Face Model
+
+First, load the original ESM2 model from Hugging Face:
+
+```python
+from transformers import AutoModelForMaskedLM
+
+model_hf_original = AutoModelForMaskedLM.from_pretrained("facebook/esm2_t6_8M_UR50D")
+```
+
+This loads the pre-trained ESM2 model that will serve as our reference for comparison.
+
+### Step 2: Export to Transformer Engine Format
+
+Convert the Hugging Face model to Transformer Engine format using the high-level export API:
+
+```python
+from pathlib import Path
+from esm.export import export_hf_checkpoint
+
+te_checkpoint_path = Path("te_checkpoint")
+export_hf_checkpoint("esm2_t6_8M_UR50D", te_checkpoint_path)
+```
+
+This creates a Transformer Engine checkpoint that can be used for optimized inference.
+
+### Step 3: Export Back to Hugging Face Format
+
+Convert the Transformer Engine checkpoint back to Hugging Face format:
+
+```python
+from esm.export import export_te_checkpoint
+
+hf_export_path = Path("hf_export")
+exported_model_path = te_checkpoint_path / "esm2_t6_8M_UR50D"
+export_te_checkpoint(str(exported_model_path), str(hf_export_path))
+```
+
+This step creates a new Hugging Face model that should be functionally equivalent to the original.
+
+### Step 4: Load and Test the Exported Model
+
+Load the exported model and perform validation:
+
+```python
+from transformers import AutoTokenizer
+model_hf_exported = AutoModelForMaskedLM.from_pretrained(str(hf_export_path))
+tokenizer = AutoTokenizer.from_pretrained("facebook/esm2_t6_8M_UR50D")
+```
+
+### Step 5: Validate Model Equivalence
+
+Test the exported model against the original using masked language modeling:
+
+```python
+import torch
+from transformers import DataCollatorForLanguageModeling
+
+# Prepare test sequence
+sequence = "MKTVRQERLKSIVRILERSKEPVSGAQLAEELSVSRQVIVQDIAYLRSLGYNIVATPRGYVLAGG"
+batch = tokenizer([sequence], return_tensors="pt")
+collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=True, mlm_probability=0.15)
+inputs = collator([{"input_ids": batch["input_ids"][0]}])
+
+# Compare outputs
+with torch.no_grad():
+    outputs_original = model_hf_original(**inputs)
+    outputs_exported = model_hf_exported(**inputs)
+
+# Check differences
+logits_diff = torch.abs(outputs_original.logits - outputs_exported.logits).max()
+print(f"Max logits difference: {logits_diff:.2e}")
+
+if outputs_original.loss is not None and outputs_exported.loss is not None:
+    loss_diff = abs(outputs_original.loss - outputs_exported.loss)
+    print(f"Loss difference: {loss_diff:.2e}")
+```
 
 ## Developer Guide
 
diff --git a/bionemo-recipes/models/esm2/src/esm/export.py b/bionemo-recipes/models/esm2/src/esm/export.py
@@ -155,7 +155,7 @@ def export_te_checkpoint(te_checkpoint_path: str, output_path: str):
 
     model_hf = AutoModelForMaskedLM.from_pretrained(
         output_path,
-        torch_dtype=torch.bfloat16,
+        dtype=torch.bfloat16,
         trust_remote_code=False,
     )
     del model_hf
diff --git a/bionemo-recipes/models/esm2/tests/test_convert.py b/bionemo-recipes/models/esm2/tests/test_convert.py
@@ -13,10 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import tempfile
-from pathlib import Path
 
-import pytest
 import torch
 from transformers import AutoModelForMaskedLM
 
@@ -41,38 +38,6 @@ def test_convert_te_to_hf_roundtrip():
             torch.testing.assert_close(original_state_dict[key], converted_state_dict[key], atol=1e-5, rtol=1e-5)
 
 
-@pytest.mark.parametrize("model_name", ["esm2_t6_8M_UR50D"])
-def test_export_te_checkpoint_to_hf(model_name):
-    """Test the export function that saves TE checkpoint as HF format."""
-    from esm.export import export_hf_checkpoint, export_te_checkpoint
-
-    with tempfile.TemporaryDirectory() as temp_dir:
-        temp_path = Path(temp_dir)
-
-        model_hf_original = AutoModelForMaskedLM.from_pretrained(f"facebook/{model_name}")
-
-        # Use export_hf_checkpoint to create TE checkpoint
-        te_checkpoint_path = temp_path / "te_checkpoint"
-        export_hf_checkpoint(model_name, te_checkpoint_path)
-        te_model_path = te_checkpoint_path / model_name
-
-        hf_export_path = temp_path / "hf_export"
-        export_te_checkpoint(str(te_model_path), str(hf_export_path))
-
-        model_hf_exported = AutoModelForMaskedLM.from_pretrained(str(hf_export_path))
-
-        original_state_dict = model_hf_original.state_dict()
-        exported_state_dict = model_hf_exported.state_dict()
-
-        # assert original_state_dict.keys() == exported_state_dict.keys()
-        original_keys = {k for k in original_state_dict.keys() if "contact_head" not in k}
-        exported_keys = {k for k in exported_state_dict.keys() if "contact_head" not in k}
-        assert original_keys == exported_keys
-
-        for key in original_state_dict.keys():
-            if not key.endswith("_extra_state") and not key.endswith("inv_freq") and "contact_head" not in key:
-                torch.testing.assert_close(original_state_dict[key], exported_state_dict[key], atol=1e-5, rtol=1e-5)
-
 
 def test_qkv_unpacking():
     """Test that QKV unpacking works correctly."""
diff --git a/bionemo-recipes/models/esm2/tests/test_export.py b/bionemo-recipes/models/esm2/tests/test_export.py
@@ -14,6 +14,13 @@
 # limitations under the License.
 
 
+from pathlib import Path
+import pytest
+import tempfile
+import torch
+from transformers import AutoModelForMaskedLM
+
+
 def test_export_hf_checkpoint(tmp_path):
     from transformers import AutoModel, AutoModelForMaskedLM, AutoTokenizer
 
@@ -63,3 +70,35 @@ def test_export_hf_checkpoint(tmp_path):
     assert "**Benchmark Score:** 0.37" in readme_contents, (
         f"README.md does not contain the expected CASP14 score line: {readme_contents}"
     )
+
+@pytest.mark.parametrize("model_name", ["esm2_t6_8M_UR50D"])
+def test_export_te_checkpoint_to_hf(model_name):
+    """Test the export function that saves TE checkpoint as HF format."""
+    from esm.export import export_hf_checkpoint, export_te_checkpoint
+
+    with tempfile.TemporaryDirectory() as temp_dir:
+        temp_path = Path(temp_dir)
+
+        model_hf_original = AutoModelForMaskedLM.from_pretrained(f"facebook/{model_name}")
+
+        # Use export_hf_checkpoint to create TE checkpoint
+        te_checkpoint_path = temp_path / "te_checkpoint"
+        export_hf_checkpoint(model_name, te_checkpoint_path)
+        te_model_path = te_checkpoint_path / model_name
+
+        hf_export_path = temp_path / "hf_export"
+        export_te_checkpoint(str(te_model_path), str(hf_export_path))
+
+        model_hf_exported = AutoModelForMaskedLM.from_pretrained(str(hf_export_path))
+
+        original_state_dict = model_hf_original.state_dict()
+        exported_state_dict = model_hf_exported.state_dict()
+
+        # assert original_state_dict.keys() == exported_state_dict.keys()
+        original_keys = {k for k in original_state_dict.keys() if "contact_head" not in k}
+        exported_keys = {k for k in exported_state_dict.keys() if "contact_head" not in k}
+        assert original_keys == exported_keys
+
+        for key in original_state_dict.keys():
+            if not key.endswith("_extra_state") and not key.endswith("inv_freq") and "contact_head" not in key:
+                torch.testing.assert_close(original_state_dict[key], exported_state_dict[key], atol=1e-5, rtol=1e-5)

Original file line number	Diff line number	Diff line change
`@@ -155,7 +155,7 @@ def export_te_checkpoint(te_checkpoint_path: str, output_path: str):`
`155`	`155`
`156`	`156`	`model_hf = AutoModelForMaskedLM.from_pretrained(`
`157`	`157`	`output_path,`
`158`		`- torch_dtype=torch.bfloat16,`
	`158`	`+ dtype=torch.bfloat16,`
`159`	`159`	`trust_remote_code=False,`
`160`	`160`	`)`
`161`	`161`	`del model_hf`