@@ -72,6 +72,26 @@ def run_maverick_serving(model: str):
72
72
raise
73
73
74
74
75
+ def get_rope_layers_config (model_path : str ) -> list [int ]:
76
+ """
77
+ Get the interleaved RoPE configuration from HuggingFace config
78
+
79
+ Args:
80
+ model_path: Path to the local directory containing the reduced
81
+ Maverick model checkpoint
82
+
83
+ Returns:
84
+ List of 0 or 1 indicating whether each layer uses RoPE and local attn
85
+ 0 indicates that RoPE is not used while 1 indicates that RoPE is used.
86
+ """
87
+ config_path = Path (model_path ) / "config.json"
88
+ model_config = json .loads (config_path .read_text ())
89
+ text_config = model_config ["text_config" ]
90
+ no_rope_layers = text_config ["no_rope_layers" ]
91
+ print (f"Found no_rope_layers: { no_rope_layers } " )
92
+ return no_rope_layers
93
+
94
+
75
95
def create_reduced_maverick_model (
76
96
original_model_name :
77
97
str = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8" ,
@@ -80,7 +100,7 @@ def create_reduced_maverick_model(
80
100
num_experts : int = 4 ,
81
101
vision_layers : int = 2 ,
82
102
force_recreate : bool = False ,
83
- ) -> tuple [ str , list [ int ]] :
103
+ ) -> str :
84
104
"""
85
105
Create a reduced-layer version of the Maverick model.
86
106
@@ -93,22 +113,13 @@ def create_reduced_maverick_model(
93
113
force_recreate: Whether to recreate if output_dir already exists
94
114
95
115
Returns:
96
- Tuple of:
97
- - Path to the created reduced model directory
98
- - List of 0 or 1 indicating whether each layer uses RoPE and local attn
99
- 0 indicates that RoPE is not used while 1 indicates that RoPE is used.
116
+ Path to the created reduced model directory
100
117
"""
101
118
102
119
print (
103
120
f"Creating reduced Maverick model with { text_layers } text layers and "
104
121
f"{ vision_layers } vision layers..." )
105
122
106
- print ("Loading original model configuration..." )
107
- original_config = AutoConfig .from_pretrained (original_model_name ,
108
- trust_remote_code = True )
109
- text_config = original_config .to_dict ()["text_config" ]
110
- no_rope_layers = text_config ["no_rope_layers" ]
111
-
112
123
# Create output directory
113
124
output_path = Path (output_dir )
114
125
if output_path .exists ():
@@ -117,11 +128,14 @@ def create_reduced_maverick_model(
117
128
else :
118
129
print (f"Output directory { output_dir } already exists. "
119
130
"Use --force-recreate to overwrite." )
120
- return str (output_path ), no_rope_layers
131
+ return str (output_path )
121
132
122
133
output_path .mkdir (parents = True , exist_ok = True )
123
134
124
135
try :
136
+ print ("Loading original model configuration..." )
137
+ original_config = AutoConfig .from_pretrained (original_model_name ,
138
+ trust_remote_code = True )
125
139
print ("Creating reduced configuration..." )
126
140
reduced_config = create_reduced_config (original_config , text_layers ,
127
141
num_experts , vision_layers )
@@ -149,7 +163,7 @@ def create_reduced_maverick_model(
149
163
print (f"Could not copy generation config: { e } " )
150
164
151
165
print (f"Successfully created reduced Maverick model at { output_path } " )
152
- return str (output_path ), no_rope_layers
166
+ return str (output_path )
153
167
154
168
except Exception as e :
155
169
print (f"Error creating reduced model: { e } " )
@@ -586,7 +600,7 @@ def test_dummy_maverick(
586
600
monkeypatch .setenv ("VLLM_USE_V1" , "1" )
587
601
monkeypatch .setenv ("VLLM_ENABLE_V1_MULTIPROCESSING" , "0" )
588
602
589
- model_path , rope_layers = create_reduced_maverick_model (
603
+ model_path = create_reduced_maverick_model (
590
604
original_model_name = original_model_name ,
591
605
output_dir = output_dir ,
592
606
text_layers = text_layers ,
@@ -597,6 +611,8 @@ def test_dummy_maverick(
597
611
598
612
print (f"\n Reduced model created successfully at: { model_path } " )
599
613
614
+ rope_layers = get_rope_layers_config (model_path )
615
+
600
616
llm = LLM (
601
617
model = model_path ,
602
618
trust_remote_code = True ,
0 commit comments