From 0d5db0cde73f30ec24348de8f126ddaa7453c8d5 Mon Sep 17 00:00:00 2001
From: Branden Vandermoon <bvandermoon@google.com>
Date: Thu, 21 Aug 2025 06:34:12 +0000
Subject: [PATCH] Update RESTRUCTURE.md to include latest changes

---
 RESTRUCTURE.md | 432 +++++++++++++++++++++++++++----------------------
 1 file changed, 241 insertions(+), 191 deletions(-)

diff --git a/RESTRUCTURE.md b/RESTRUCTURE.md
index 1d9dace2a3..608c02a1f9 100644
--- a/RESTRUCTURE.md
+++ b/RESTRUCTURE.md
@@ -6,6 +6,7 @@ We welcome feedback on this proposed structure. Please provide any thoughts,
 comments, or questions by creating a new 
 [issue](https://github.com/AI-Hypercomputer/maxtext/issues) in MaxText.
 
+
 ```
 .
 ├── .dockerignore
@@ -16,59 +17,8 @@ comments, or questions by creating a new
 ├── AUTHORS
 ├── CONTRIBUTING.md
 ├── LICENSE
-|   # Note: Keeping MaxText/ temporarily for backwards compatibility.
-|   # This will be deprecated. We will remove it in the near future.
-├── MaxText/
-│   ├── decode.py
-│   ├── sft.py
-│   ├── train.py
-│   └── train_compile.py
 ├── README.md
-├── assets/
-│   └── tokenizers/
-│       ├── tokenizer.default
-│       ├── tokenizer.gemma
-│       ├── tokenizer.gemma3
-│       ├── tokenizer.llama2
-│       ├── tokenizer.mistral-v1
-│       ├── tokenizer.mistral-v3
-│       └── tokenizer_llama3.tiktoken
-├── benchmarks/
-│   ├── bigquery/
-│   │   ├── benchmark_db_utils.py
-│   │   └── upload_metrics_to_bq.py
-│   ├── disruption_management/
-│   │   ├── disruption_handler.py
-│   │   ├── disruption_manager.py
-│   │   ├── disruption_utils.py
-│   │   └── monitor.py
-│   ├── runners/
-│   │   ├── benchmark_runner.py
-│   │   ├── benchmark_utils.py
-│   │   ├── command_utils.py
-│   │   └── maxtext_xpk_runner.py
-│   └── workload_configs/
-│   │   ├── convergence/
-│   │   │   ├── c4_exp.py
-│   │   │   └── convergence_utils.py
-│   │   ├── hardware_optimized
-│   │   │   ├── tpu/
-│   │   │   │   ├── maxtext_trillium_model_configs.py
-│   │   │   │   ├── maxtext_v5e_model_configs.py
-│   │   │   │   └── maxtext_v5p_model_configs.py
-│   │   └── mmlu/
-│   │   │   ├── mmlu_categories.py
-│   │   │   └── mmlu_eval.py
-│   │   └── recipes/
-│   │       ├── args_helper.py
-│   │       ├── mcjax_long_running_recipe.py
-│   │       ├── py_elastic_training_recipe.py
-│   │       └── ...
-│   └── xla_flags_library.py
-|   # Note: configs/ content is out of scope for this restructure.
-|   # This will be improved in the future.
-├── configs/
-├── docker/
+├── dependencies/
 │   ├── dockerfiles/
 │   │   ├── jetstream_pathways.Dockerfile
 │   │   ├── maxengine_server.Dockerfile
@@ -81,157 +31,257 @@ comments, or questions by creating a new
 │   │   └── maxtext_runner.Dockerfile
 │   ├── requirements/
 │   │   ├── constraints_gpu.txt
-│   │   ├── requirements.txt
+│   │   └── requirements.txt
 │   │   └── ...
 │   └── scripts/
 │       ├── docker_build_dependency_image.sh
 │       └── docker_upload_runner.sh
-|   # Note: docs/ content is out of scope for this restructure.
-│   # This will be improved in the future.
 ├── docs/
-├── maxtext/
-│   ├── checkpoint_conversion/
-│   │   ├── to_hf/
-│   │   │   └── llama_mistral_mixtral_orbax_to_hf.py
-│   │   └── to_maxtext/
-│   │   │   ├── convert_deepseek_ckpt.py
-│   │   │   ├── llama_or_mistral_ckpt.py
-│   │   │   ├── convert_deepseek_ckpt_unscanned.py
-│   │   │   ├── convert_gemma2_ckpt.py
+│   ├── examples/
+│   │   └── pedagogical_examples.md
+│   ├── getting_started/
+│   │   └── First_run.md
+│   │   └── ...
+│   ├── PREFLIGHT.md
+│   ├── README.md
+│   └── Sharding.md
+├── src/
+│   ├── maxtext/
+│   │   ├── assets/
+│   │   │   └── tokenizers/
+│   │   │       ├── tokenizer.default
+│   │   │       ├── tokenizer.gemma
+│   │   │       ├── tokenizer.gemma3
+│   │   │       ├── tokenizer.llama2
+│   │   │       ├── tokenizer.mistral-v1
+│   │   │       ├── tokenizer.mistral-v3
+│   │   │       └── tokenizer_llama3.tiktoken
+│   │   ├── benchmarks/
+│   │   │   ├── bigquery/
+│   │   │   │   ├── benchmark_db_utils.py
+│   │   │   │   └── upload_metrics_to_bq.py
+│   │   │   ├── disruption_management/
+│   │   │   │   ├── disruption_handler.py
+│   │   │   │   ├── disruption_manager.py
+│   │   │   │   ├── disruption_utils.py
+│   │   │   │   └── monitor.py
+│   │   │   ├── runners/
+│   │   │   │   ├── benchmark_runner.py
+│   │   │   │   ├── benchmark_utils.py
+│   │   │   │   ├── command_utils.py
+│   │   │   │   └── maxtext_xpk_runner.py
+│   │   │   └── workload_configs/
+│   │   │       ├── convergence/
+│   │   │       │   ├── c4_exp.py
+│   │   │       │   └── convergence_utils.py
+│   │   │       ├── hardware_optimized/
+│   │   │       │   └── tpu/
+│   │   │       │       ├── maxtext_trillium_model_configs.py
+│   │   │       │       ├── maxtext_v5e_model_configs.py
+│   │   │       │       └── maxtext_v5p_model_configs.py
+│   │   │       ├── mmlu/
+│   │   │       │   ├── mmlu_categories.py
+│   │   │       │   └── mmlu_eval.py
+│   │   │       ├── recipes/
+│   │   │       │   ├── args_helper.py
+│   │   │       │   ├── mcjax_long_running_recipe.py
+│   │   │       │   └── py_elastic_training_recipe.py
+│   │   │       │   └── ...
+│   │   │       ├── llama2_v6e-256_benchmarks.py
+│   │   │       └── xla_flags_library.py
+│   │   ├── checkpoint_conversion/
+│   │   │   ├── to_hf/
+│   │   │   │   └── llama_mistral_mixtral_orbax_to_hf.py
+│   │   │   ├── to_maxtext/
+│   │   │   │   ├── convert_deepseek_ckpt.py
+│   │   │   │   ├── convert_deepseek_ckpt_unscanned.py
+│   │   │   │   ├── convert_gemma2_ckpt.py
+│   │   │   │   ├── convert_gemma_ckpt.py
+│   │   │   │   ├── llama4_ckpt_unscanned.py
+│   │   │   │   ├── llama_ckpt_conversion_inference_only.py
+│   │   │   │   └── llama_or_mistral_ckpt.py
 │   │   │   ├── convert_gemma3_ckpt.py
-│   │   │   ├── convert_gemma_ckpt.py
 │   │   │   ├── convert_gpt2_ckpt_from_paxml.py
-│   │   │   └── llama4_ckpt_unscanned.py
-│   │   ├── load_and_quantize_checkpoint.py
-│   ├── examples/
-│   │   ├── non_spmd.py
-│   │   ├── shardings.py
-│   │   └── shmap_collective_matmul.py
-│   ├── inference/
-│   │   ├── inference_mlperf/
-│   │   │   ├── eval/
-│   │   │   │   ├── evaluate-accuracy-fast.py
-│   │   │   │   └── evaluate-accuracy.py
+│   │   │   └── load_and_quantize_checkpoint.py
+│   │   ├── configs/
 │   │   │   ├── gpu/
-│   │   │   │   └── benchmarks_llama2-70b-h100_8.sh
-│   │   │   ├── matmul/
-│   │   │   │   ├── matmul_dtypes.py
-│   │   │   │   ├── matmul_sharding.py
-│   │   │   │   └── timing_util.py
-│   │   │   ├── offline/
-│   │   │   │   ├── llama_offline_run.sh
-│   │   │   │   ├── mixtral_offline_run.sh
-│   │   │   │   ├── offline_inference.py
-│   │   │   │   └── offline_mode.py
-│   │   │   ├── trillium/
-│   │   │   │   ├── benchmarks_llama2-70b-trillium_2x4.sh
-│   │   │   │   ├── microbenchmarks_llama2-70b-trillium_2x4.sh
-│   │   │   │   └── select_xla_flags.py
-│   │   │   └── user_config/
-│   │   │   │   ├── user.conf
-│   │   │   │   ├── user100.conf
-│   │   │   │   └── user5000.conf
-│   │   │   ├── README.md
-│   │   │   └── requirements.txt
-│   │   ├── gpu/
-│   │   │   ├── README.md
-│   │   │   └── microbenchmark_llama2-70b_h100-8.sh
-│   │   ├── jetstream_pathways/
-│   │   │   ├── README.md
-│   │   │   └── jetstream_pathways_entrypoint.sh
-│   │   ├── maxengine_server/
+│   │   │   │   ├── a3/
+│   │   │   │   │   ├── llama_2_7b.sh
+│   │   │   │   │   └── llama3.1_405b/
+│   │   │   │   └── models/
+│   │   │   │       ├── llama2_70b.yml
+│   │   │   │       └── llama2_7b.yml
+│   │   │   │       └── ...
+│   │   │   ├── inference/
+│   │   │   │   ├── multihost/
+│   │   │   │   │   ├── disaggregation/
+│   │   │   │   │   │   └── lama3_405b_v6e-16-16.yml
+│   │   │   │   │   └── interleaved/
+│   │   │   │   │       ├── llama2_70b_v5e-16.yml
+│   │   │   │   │       ├── llama3_405b_v5e-64.yml
+│   │   │   │   │       └── llama3_70b_v5e-16.yml
+│   │   │   │   ├── inference.yml
+│   │   │   │   └── inference_jetstream.yml
+│   │   │   ├── post_train/
+│   │   │   │   ├── dpo.yml
+│   │   │   │   ├── grpo.yml
+│   │   │   │   └── sft.yml
+│   │   │   ├── tpu/
+│   │   │   │   ├── models/
+│   │   │   │   │   └── deepseek2-16b.yml
+│   │   │   │   │   └── ...
+│   │   │   │   ├── v4/
+│   │   │   │   │   └── ...
+│   │   │   │   ├── v5e/
+│   │   │   │   │   └── ...
+│   │   │   │   ├── v5p/
+│   │   │   │   │   └── ...
+│   │   │   │   └── v6e/
+│   │   │   │       └── ...
 │   │   │   ├── README.md
-│   │   │   └── maxengine_server_entrypoint.sh
-│   │   ├── scripts/
-│   │   │   ├── notebooks/
-│   │   │   │   └── sharding_utils.ipynb
-│   │   │   ├── decode_multi.py
-│   │   │   ├── sharding_utils.py
-│   │   │   └── test_sharding_utils.py
-│   │   ├── kvcache.py
-│   │   ├── page_manager.py
-│   │   ├── paged_attention.py
-│   │   ├── paged_attention_kernel_v2.py
-│   │   └── sharding_utils.py
-│   ├── input_pipeline/
-│   │   ├── packing/
-│   │   │   ├── prefill_packing.py
-│   │   │   └── sequence_packing.py
-│   │   ├── distillation_data_processing.py
-│   │   ├── grain_data_processing.py
-│   │   ├── grain_tokenizer.py
-│   │   ├── hf_data_processing.py
-│   │   ├── input_pipeline_interface.py
-│   │   ├── input_pipeline_utils.py
-│   │   ├── synthetic_data_processing.py
-│   │   ├── tfds_data_processing.py
-│   │   ├── tfds_data_processing_c4_mlperf.py
-│   │   └── tokenizer.py
-│   ├── kernels/
-│   │   ├── attention/
-│   │   │   └── ragged_attention.py
-│   │   └── megablox/
-│   │       ├── common.py
-│   │       ├── gmm.py
-│   │       └── ops.py
-│   ├── layers/
-│   │   ├── attentions.py
-│   │   ├── embeddings.py
-│   │   ├── linears.py
-│   │   └── normalizations.py
-│   │   ├── ...
-│   ├── models/
-│   │   ├── deepseek.py
-│   │   ├── gemma.py
-│   │   ├── llama2.py
-│   │   ├── transformer.py
-│   │   └── ...
-│   ├── optimizers/
-│   │   └── optimizers.py
-│   ├── profile/
-│   │   ├── profiler.py
-│   │   └── vertex_tensorboard.py
-│   ├── tests/
-│   │   ├── assets/
-│   │   │   ├── golden_logits/
-│   │   │   │   ├── golden_data_deepseek_r1_distill_llama3.1_8b.jsonl
-│   │   │   │   └── ...
-│   │   │   └── logits_generation/
-│   │   │       ├── generate_grpo_golden_logits.py
-│   │   │   │   └── ...
-│   │   ├── end_to_end/
+│   │   │   ├── base.yml
+│   │   │   ├── gpu_smoke_test.yml
+│   │   │   └── tpu_smoke_test.yml
+│   │   ├── examples/
+│   │   │   ├── non_spmd.py
+│   │   │   ├── shardings.py
+│   │   │   └── shmap_collective_matmul.py
+│   │   ├── inference/
 │   │   │   ├── gpu/
-│   │   │   │   └── ...
-│   │   │   └── tpu/
-│   │   │       └── llama3.1/
-│   │   │           └── 8b/
-│   │   │               ├── 3_test_llana3.1_8b.sh
-│   │   │               └── ...
-│   │   ├── integration/
+│   │   │   │   ├── README.md
+│   │   │   │   └── microbenchmark_llama2-70b_h100-8.sh
+│   │   │   ├── inference_mlperf/
+│   │   │   │   ├── eval/
+│   │   │   │   │   ├── evaluate-accuracy-fast.py
+│   │   │   │   │   └── evaluate-accuracy.py
+│   │   │   │   ├── gpu/
+│   │   │   │   │   └── benchmarks_llama2-70b-h100_8.sh
+│   │   │   │   ├── matmul/
+│   │   │   │   │   ├── matmul_dtypes.py
+│   │   │   │   │   ├── matmul_sharding.py
+│   │   │   │   │   └── timing_util.py
+│   │   │   │   ├── microbenchmarks/
+│   │   │   │   │   ├── inference_microbenchmark.py
+│   │   │   │   │   └── inference_microbenchmark_sweep.py
+│   │   │   │   ├── offline/
+│   │   │   │   │   ├── llama_offline_run.sh
+│   │   │   │   │   ├── mixtral_offline_run.sh
+│   │   │   │   │   ├── offline_inference.py
+│   │   │   │   │   └── offline_mode.py
+│   │   │   │   ├── trillium/
+│   │   │   │   │   ├── benchmarks_llama2-70b-trillium_2x4.sh
+│   │   │   │   │   ├── microbenchmarks_llama2-70b-trillium_2x4.sh
+│   │   │   │   │   └── select_xla_flags.py
+│   │   │   │   ├── user_config/
+│   │   │   │   │   ├── user.conf
+│   │   │   │   │   ├── user100.conf
+│   │   │   │   │   └── user5000.conf
+│   │   │   │   ├── README.md
+│   │   │   │   └── requirements.txt
+│   │   │   ├── jetstream_pathways/
+│   │   │   │   ├── README.md
+│   │   │   │   └── jetstream_pathways_entrypoint.sh
+│   │   │   ├── maxengine/
+│   │   │   │   ├── maxengine_server/
+│   │   │   │   │   ├── README.md
+│   │   │   │   │   └── maxengine_server_entrypoint.sh
+│   │   │   │   ├── maxengine.py
+│   │   │   │   ├── maxengine_config.py
+│   │   │   │   └── maxengine_server.py
+│   │   │   ├── scripts/
+│   │   │   │   ├── notebooks/
+│   │   │   │   │   └── sharding_utils.ipynb
+│   │   │   │   ├── decode_multi.py
+│   │   │   │   ├── sharding_utils.py
+│   │   │   │   └── test_sharding_utils.py
+│   │   │   ├── decode_multi.py
+│   │   │   ├── inference_utils.py
+│   │   │   ├── kvcache.py
+│   │   │   ├── page_manager.py
+│   │   │   ├── paged_attention.py
+│   │   │   └── paged_attention_kernel_v2.py
+│   │   ├── input_pipeline/
+│   │   │   ├── packing/
+│   │   │   │   ├── prefill_packing.py
+│   │   │   │   └── sequence_packing.py
+│   │   │   ├── distillation_data_processing.py
+│   │   │   ├── grain_data_processing.py
+│   │   │   ├── grain_tokenizer.py
+│   │   │   ├── hf_data_processing.py
+│   │   │   ├── input_pipeline_interface.py
+│   │   │   ├── input_pipeline_utils.py
+│   │   │   ├── synthetic_data_processing.py
+│   │   │   ├── tfds_data_processing.py
+│   │   │   ├── tfds_data_processing_c4_mlperf.py
+│   │   │   └── tokenizer.py
+│   │   ├── kernels/
+│   │   │   ├── attention/
+│   │   │   │   └── ragged_attention.py
+│   │   │   └── megablox/
+│   │   │       ├── common.py
+│   │   │       ├── gmm.py
+│   │   │       └── ops.py
+│   │   ├── layers/
+│   │   │   ├── attentions.py
+│   │   │   ├── embeddings.py
+│   │   │   ├── linears.py
+│   │   │   └── normalizations.py
+│   │   │   └── ...
+│   │   ├── models/
+│   │   │   ├── deepseek.py
+│   │   │   ├── gemma.py
+│   │   │   ├── llama2.py
+│   │   │   └── transformer.py
+│   │   │   └── ...
+│   │   ├── optimizers/
+│   │   │   └── optimizers.py
+│   │   ├── profile/
+│   │   │   ├── profiler.py
+│   │   │   └── vertex_tensorboard.py
+│   │   ├── trainers/
+│   │   │   ├── post_train/
+│   │   │   │   ├── grpo/
+│   │   │   │   │   ├── grpo_input_pipeline.py
+│   │   │   │   │   ├── grpo_trainer.py
+│   │   │   │   │   └── grpo_trainer_test.yml
+│   │   │   │   └── sft/
+│   │   │   │       └── sft_train.py
+│   │   │   └── pretrain/
+│   │   │       ├── elastic_train.py
+│   │   │       ├── train.py
+│   │   │       ├── train_compile.py
+│   │   │       ├── train_tokenizer.py
+│   │   │       └── train_utils.py
+│   │   └── utils/
+│   │       ├── globals.py
+│   │       ├── max_logging.py
+│   │       ├── max_utils.py
+│   │       ├── maxtext_utils.py
+│   │       ├── metric_logger.py
+│   │       └── multimodal_utils.py
+│   └── MaxText/
+│       └── shims.py
+├── tests/
+│   ├── assets/
+│   │   ├── golden_logits/
+│   │   │   └── golden_data_deepseek_r1_distill_llama3.1_8b.jsonl
+│   │   │   └── ...
+│   │   └── logits_generation/
+│   │       └── generate_grpo_golden_logits.py
+│   │       └── ...
+│   ├── end_to_end/
+│   │   ├── gpu/
 │   │   │   └── ...
-│   │   └── unit/
+│   │   └── tpu/
+│   │       └── llama3.1/
+│   │           └── 8b/
+│   │               └── 3_test_llana3.1_8b.sh
+│   │               └── ...
 │   │       └── ...
-│   ├── trainers/
-│   │   ├── post_train/
-│   │   │   ├── grpo/
-│   │   │   │   ├── grpo_input_pipeline.py
-│   │   │   │   ├── grpo_trainer.py
-│   │   │   │   └── grpo_trainer_test.yml
-│   │   │   └── sft/
-│   │   │       └── sft_train.py
-│   │   └── pretrain/
-│   │       ├── elastic_train.py
-│   │       ├── train.py
-│   │       ├── train_compile.py
-│   │       ├── train_tokenizer.py
-│   │       └── train_utils.py
-│   └── utils/
-│       ├── globals.py
-│       ├── max_logging.py
-│       ├── max_utils.py
-│       ├── maxtext_utils.py
-│       ├── metric_logger.py
-│       └── multimodal_utils.py
+│   ├── integration/
+│   │   └── hf_checkpoint_conversion_checker.py
+│   └── unit/
+│       └── ...
 ├── pylintrc
 ├── pyproject.toml
 ├── pytest.ini