Skip to content

Commit 82ae39b

Browse files
committed
fixed formatting, bitsandbytes logic
1 parent 32b8bcc commit 82ae39b

File tree

7 files changed

+560
-458
lines changed

7 files changed

+560
-458
lines changed
Lines changed: 188 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,40 @@
11
{
22
"shapes": {
3-
"BM.GPU.H200.8": {
4-
"gpu_count": 8,
5-
"gpu_memory_in_gbs": 1128,
6-
"gpu_type": "H200",
7-
"quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
8-
"ranking": {
9-
"cost": 100,
10-
"performance": 110
11-
}
12-
},
13-
"BM.GPU.H100.8": {
14-
"gpu_count": 8,
15-
"gpu_memory_in_gbs": 640,
16-
"gpu_type": "H100",
17-
"quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
18-
"ranking": {
19-
"cost": 100,
20-
"performance": 100
21-
}
22-
},
23-
"BM.GPU.MI300X.8": {
24-
"gpu_count": 8,
25-
"gpu_memory_in_gbs": 1536,
26-
"gpu_type": "MI300X",
27-
"quantization": ["fp8", "gguf"],
3+
"BM.GPU.A10.4": {
4+
"gpu_count": 4,
5+
"gpu_memory_in_gbs": 96,
6+
"gpu_type": "A10",
7+
"quantization": [
8+
"awq",
9+
"gptq",
10+
"marlin",
11+
"int8",
12+
"bitblas",
13+
"aqlm",
14+
"bitsandbytes",
15+
"deepspeedfp",
16+
"gguf"
17+
],
2818
"ranking": {
29-
"cost": 90,
30-
"performance": 90
19+
"cost": 50,
20+
"performance": 50
3121
}
3222
},
3323
"BM.GPU.A100-V2.8": {
3424
"gpu_count": 8,
3525
"gpu_memory_in_gbs": 640,
3626
"gpu_type": "A100",
37-
"quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
27+
"quantization": [
28+
"awq",
29+
"gptq",
30+
"marlin",
31+
"int8",
32+
"bitblas",
33+
"aqlm",
34+
"bitsandbytes",
35+
"deepspeedfp",
36+
"gguf"
37+
],
3838
"ranking": {
3939
"cost": 80,
4040
"performance": 70
@@ -44,17 +44,80 @@
4444
"gpu_count": 8,
4545
"gpu_memory_in_gbs": 320,
4646
"gpu_type": "A100",
47-
"quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
47+
"quantization": [
48+
"awq",
49+
"gptq",
50+
"marlin",
51+
"int8",
52+
"bitblas",
53+
"aqlm",
54+
"bitsandbytes",
55+
"deepspeedfp",
56+
"gguf"
57+
],
4858
"ranking": {
4959
"cost": 70,
5060
"performance": 60
5161
}
5262
},
63+
"BM.GPU.H100.8": {
64+
"gpu_count": 8,
65+
"gpu_memory_in_gbs": 640,
66+
"gpu_type": "H100",
67+
"quantization": [
68+
"awq",
69+
"gptq",
70+
"marlin",
71+
"fp8",
72+
"int8",
73+
"bitblas",
74+
"aqlm",
75+
"bitsandbytes",
76+
"deepspeedfp",
77+
"gguf"
78+
],
79+
"ranking": {
80+
"cost": 100,
81+
"performance": 100
82+
}
83+
},
84+
"BM.GPU.H200.8": {
85+
"gpu_count": 8,
86+
"gpu_memory_in_gbs": 1128,
87+
"gpu_type": "H200",
88+
"quantization": [
89+
"awq",
90+
"gptq",
91+
"marlin",
92+
"fp8",
93+
"int8",
94+
"bitblas",
95+
"aqlm",
96+
"bitsandbytes",
97+
"deepspeedfp",
98+
"gguf"
99+
],
100+
"ranking": {
101+
"cost": 100,
102+
"performance": 110
103+
}
104+
},
53105
"BM.GPU.L40S-NC.4": {
54106
"gpu_count": 4,
55107
"gpu_memory_in_gbs": 192,
56108
"gpu_type": "L40S",
57-
"quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
109+
"quantization": [
110+
"awq",
111+
"gptq",
112+
"marlin",
113+
"fp8",
114+
"int8",
115+
"bitblas",
116+
"aqlm",
117+
"bitsandbytes",
118+
"deepspeedfp",
119+
"gguf"
120+
],
58121
"ranking": {
59122
"cost": 60,
60123
"performance": 80
@@ -64,18 +127,64 @@
64127
"gpu_count": 4,
65128
"gpu_memory_in_gbs": 192,
66129
"gpu_type": "L40S",
67-
"quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
130+
"quantization": [
131+
"awq",
132+
"gptq",
133+
"marlin",
134+
"fp8",
135+
"int8",
136+
"bitblas",
137+
"aqlm",
138+
"bitsandbytes",
139+
"deepspeedfp",
140+
"gguf"
141+
],
68142
"ranking": {
69143
"cost": 60,
70144
"performance": 80
71145
}
72146
},
147+
"BM.GPU.MI300X.8": {
148+
"gpu_count": 8,
149+
"gpu_memory_in_gbs": 1536,
150+
"gpu_type": "MI300X",
151+
"quantization": [
152+
"fp8",
153+
"gguf"
154+
],
155+
"ranking": {
156+
"cost": 90,
157+
"performance": 90
158+
}
159+
},
160+
"BM.GPU2.2": {
161+
"gpu_count": 2,
162+
"gpu_memory_in_gbs": 32,
163+
"gpu_type": "P100",
164+
"quantization": [
165+
"fp16"
166+
],
167+
"ranking": {
168+
"cost": 30,
169+
"performance": 20
170+
}
171+
},
73172
"VM.GPU.A10.1": {
74173
"gpu_count": 1,
75174
"gpu_memory_in_gbs": 24,
76175
"gpu_type": "A10",
77-
"quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
78-
"ranking" : {
176+
"quantization": [
177+
"awq",
178+
"gptq",
179+
"marlin",
180+
"int8",
181+
"bitblas",
182+
"aqlm",
183+
"bitsandbytes",
184+
"deepspeedfp",
185+
"gguf"
186+
],
187+
"ranking": {
79188
"cost": 20,
80189
"performance": 30
81190
}
@@ -84,37 +193,29 @@
84193
"gpu_count": 2,
85194
"gpu_memory_in_gbs": 48,
86195
"gpu_type": "A10",
87-
"quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
88-
"ranking" : {
196+
"quantization": [
197+
"awq",
198+
"gptq",
199+
"marlin",
200+
"int8",
201+
"bitblas",
202+
"aqlm",
203+
"bitsandbytes",
204+
"deepspeedfp",
205+
"gguf"
206+
],
207+
"ranking": {
89208
"cost": 40,
90209
"performance": 40
91210
}
92211
},
93-
"BM.GPU.A10.4": {
94-
"gpu_count": 4,
95-
"gpu_memory_in_gbs": 96,
96-
"gpu_type": "A10",
97-
"quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
98-
"ranking" : {
99-
"cost": 50,
100-
"performance": 50
101-
}
102-
},
103-
"BM.GPU2.2": {
104-
"gpu_count": 2,
105-
"gpu_memory_in_gbs": 32,
106-
"gpu_type": "P100",
107-
"quantization": ["fp16"],
108-
"ranking": {
109-
"cost": 30,
110-
"performance": 20
111-
}
112-
},
113212
"VM.GPU2.1": {
114213
"gpu_count": 1,
115214
"gpu_memory_in_gbs": 16,
116215
"gpu_type": "P100",
117-
"quantization": ["fp16"],
216+
"quantization": [
217+
"fp16"
218+
],
118219
"ranking": {
119220
"cost": 10,
120221
"performance": 10
@@ -124,29 +225,52 @@
124225
"gpu_count": 1,
125226
"gpu_memory_in_gbs": 16,
126227
"gpu_type": "V100",
127-
"quantization" : ["gptq", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
128-
"ranking" : {
228+
"quantization": [
229+
"gptq",
230+
"bitblas",
231+
"aqlm",
232+
"bitsandbytes",
233+
"deepspeedfp",
234+
"gguf"
235+
],
236+
"ranking": {
129237
"cost": 35,
130-
"performance": 10
238+
"performance": 10
131239
}
132240
},
133241
"VM.GPU3.2": {
134242
"gpu_count": 2,
135243
"gpu_memory_in_gbs": 32,
136244
"gpu_type": "V100",
137-
"ranking" : {
245+
"quantization": [
246+
"gptq",
247+
"bitblas",
248+
"aqlm",
249+
"bitsandbytes",
250+
"deepspeedfp",
251+
"gguf"
252+
],
253+
"ranking": {
138254
"cost": 45,
139-
"performance": 20
255+
"performance": 20
140256
}
141257
},
142258
"VM.GPU3.4": {
143259
"gpu_count": 4,
144260
"gpu_memory_in_gbs": 64,
145261
"gpu_type": "V100",
146-
"ranking" : {
262+
"quantization": [
263+
"gptq",
264+
"bitblas",
265+
"aqlm",
266+
"bitsandbytes",
267+
"deepspeedfp",
268+
"gguf"
269+
],
270+
"ranking": {
147271
"cost": 55,
148-
"performance": 45
272+
"performance": 45
149273
}
150274
}
151275
}
152-
}
276+
}

0 commit comments

Comments
 (0)