1
1
{
2
2
"shapes" : {
3
- "BM.GPU.H200.8" : {
4
- "gpu_count" : 8 ,
5
- "gpu_memory_in_gbs" : 1128 ,
6
- "gpu_type" : " H200" ,
7
- "quantization" : [" awq" , " gptq" , " marlin" , " fp8" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
8
- "ranking" : {
9
- "cost" : 100 ,
10
- "performance" : 110
11
- }
12
- },
13
- "BM.GPU.H100.8" : {
14
- "gpu_count" : 8 ,
15
- "gpu_memory_in_gbs" : 640 ,
16
- "gpu_type" : " H100" ,
17
- "quantization" : [" awq" , " gptq" , " marlin" , " fp8" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
18
- "ranking" : {
19
- "cost" : 100 ,
20
- "performance" : 100
21
- }
22
- },
23
- "BM.GPU.MI300X.8" : {
24
- "gpu_count" : 8 ,
25
- "gpu_memory_in_gbs" : 1536 ,
26
- "gpu_type" : " MI300X" ,
27
- "quantization" : [" fp8" , " gguf" ],
3
+ "BM.GPU.A10.4" : {
4
+ "gpu_count" : 4 ,
5
+ "gpu_memory_in_gbs" : 96 ,
6
+ "gpu_type" : " A10" ,
7
+ "quantization" : [
8
+ " awq" ,
9
+ " gptq" ,
10
+ " marlin" ,
11
+ " int8" ,
12
+ " bitblas" ,
13
+ " aqlm" ,
14
+ " bitsandbytes" ,
15
+ " deepspeedfp" ,
16
+ " gguf"
17
+ ],
28
18
"ranking" : {
29
- "cost" : 90 ,
30
- "performance" : 90
19
+ "cost" : 50 ,
20
+ "performance" : 50
31
21
}
32
22
},
33
23
"BM.GPU.A100-V2.8" : {
34
24
"gpu_count" : 8 ,
35
25
"gpu_memory_in_gbs" : 640 ,
36
26
"gpu_type" : " A100" ,
37
- "quantization" : [" awq" , " gptq" , " marlin" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
27
+ "quantization" : [
28
+ " awq" ,
29
+ " gptq" ,
30
+ " marlin" ,
31
+ " int8" ,
32
+ " bitblas" ,
33
+ " aqlm" ,
34
+ " bitsandbytes" ,
35
+ " deepspeedfp" ,
36
+ " gguf"
37
+ ],
38
38
"ranking" : {
39
39
"cost" : 80 ,
40
40
"performance" : 70
44
44
"gpu_count" : 8 ,
45
45
"gpu_memory_in_gbs" : 320 ,
46
46
"gpu_type" : " A100" ,
47
- "quantization" : [" awq" , " gptq" , " marlin" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
47
+ "quantization" : [
48
+ " awq" ,
49
+ " gptq" ,
50
+ " marlin" ,
51
+ " int8" ,
52
+ " bitblas" ,
53
+ " aqlm" ,
54
+ " bitsandbytes" ,
55
+ " deepspeedfp" ,
56
+ " gguf"
57
+ ],
48
58
"ranking" : {
49
59
"cost" : 70 ,
50
60
"performance" : 60
51
61
}
52
62
},
63
+ "BM.GPU.H100.8" : {
64
+ "gpu_count" : 8 ,
65
+ "gpu_memory_in_gbs" : 640 ,
66
+ "gpu_type" : " H100" ,
67
+ "quantization" : [
68
+ " awq" ,
69
+ " gptq" ,
70
+ " marlin" ,
71
+ " fp8" ,
72
+ " int8" ,
73
+ " bitblas" ,
74
+ " aqlm" ,
75
+ " bitsandbytes" ,
76
+ " deepspeedfp" ,
77
+ " gguf"
78
+ ],
79
+ "ranking" : {
80
+ "cost" : 100 ,
81
+ "performance" : 100
82
+ }
83
+ },
84
+ "BM.GPU.H200.8" : {
85
+ "gpu_count" : 8 ,
86
+ "gpu_memory_in_gbs" : 1128 ,
87
+ "gpu_type" : " H200" ,
88
+ "quantization" : [
89
+ " awq" ,
90
+ " gptq" ,
91
+ " marlin" ,
92
+ " fp8" ,
93
+ " int8" ,
94
+ " bitblas" ,
95
+ " aqlm" ,
96
+ " bitsandbytes" ,
97
+ " deepspeedfp" ,
98
+ " gguf"
99
+ ],
100
+ "ranking" : {
101
+ "cost" : 100 ,
102
+ "performance" : 110
103
+ }
104
+ },
53
105
"BM.GPU.L40S-NC.4" : {
54
106
"gpu_count" : 4 ,
55
107
"gpu_memory_in_gbs" : 192 ,
56
108
"gpu_type" : " L40S" ,
57
- "quantization" : [" awq" , " gptq" , " marlin" , " fp8" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
109
+ "quantization" : [
110
+ " awq" ,
111
+ " gptq" ,
112
+ " marlin" ,
113
+ " fp8" ,
114
+ " int8" ,
115
+ " bitblas" ,
116
+ " aqlm" ,
117
+ " bitsandbytes" ,
118
+ " deepspeedfp" ,
119
+ " gguf"
120
+ ],
58
121
"ranking" : {
59
122
"cost" : 60 ,
60
123
"performance" : 80
64
127
"gpu_count" : 4 ,
65
128
"gpu_memory_in_gbs" : 192 ,
66
129
"gpu_type" : " L40S" ,
67
- "quantization" : [" awq" , " gptq" , " marlin" , " fp8" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
130
+ "quantization" : [
131
+ " awq" ,
132
+ " gptq" ,
133
+ " marlin" ,
134
+ " fp8" ,
135
+ " int8" ,
136
+ " bitblas" ,
137
+ " aqlm" ,
138
+ " bitsandbytes" ,
139
+ " deepspeedfp" ,
140
+ " gguf"
141
+ ],
68
142
"ranking" : {
69
143
"cost" : 60 ,
70
144
"performance" : 80
71
145
}
72
146
},
147
+ "BM.GPU.MI300X.8" : {
148
+ "gpu_count" : 8 ,
149
+ "gpu_memory_in_gbs" : 1536 ,
150
+ "gpu_type" : " MI300X" ,
151
+ "quantization" : [
152
+ " fp8" ,
153
+ " gguf"
154
+ ],
155
+ "ranking" : {
156
+ "cost" : 90 ,
157
+ "performance" : 90
158
+ }
159
+ },
160
+ "BM.GPU2.2" : {
161
+ "gpu_count" : 2 ,
162
+ "gpu_memory_in_gbs" : 32 ,
163
+ "gpu_type" : " P100" ,
164
+ "quantization" : [
165
+ " fp16"
166
+ ],
167
+ "ranking" : {
168
+ "cost" : 30 ,
169
+ "performance" : 20
170
+ }
171
+ },
73
172
"VM.GPU.A10.1" : {
74
173
"gpu_count" : 1 ,
75
174
"gpu_memory_in_gbs" : 24 ,
76
175
"gpu_type" : " A10" ,
77
- "quantization" : [" awq" , " gptq" , " marlin" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
78
- "ranking" : {
176
+ "quantization" : [
177
+ " awq" ,
178
+ " gptq" ,
179
+ " marlin" ,
180
+ " int8" ,
181
+ " bitblas" ,
182
+ " aqlm" ,
183
+ " bitsandbytes" ,
184
+ " deepspeedfp" ,
185
+ " gguf"
186
+ ],
187
+ "ranking" : {
79
188
"cost" : 20 ,
80
189
"performance" : 30
81
190
}
84
193
"gpu_count" : 2 ,
85
194
"gpu_memory_in_gbs" : 48 ,
86
195
"gpu_type" : " A10" ,
87
- "quantization" : [" awq" , " gptq" , " marlin" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
88
- "ranking" : {
196
+ "quantization" : [
197
+ " awq" ,
198
+ " gptq" ,
199
+ " marlin" ,
200
+ " int8" ,
201
+ " bitblas" ,
202
+ " aqlm" ,
203
+ " bitsandbytes" ,
204
+ " deepspeedfp" ,
205
+ " gguf"
206
+ ],
207
+ "ranking" : {
89
208
"cost" : 40 ,
90
209
"performance" : 40
91
210
}
92
211
},
93
- "BM.GPU.A10.4" : {
94
- "gpu_count" : 4 ,
95
- "gpu_memory_in_gbs" : 96 ,
96
- "gpu_type" : " A10" ,
97
- "quantization" : [" awq" , " gptq" , " marlin" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
98
- "ranking" : {
99
- "cost" : 50 ,
100
- "performance" : 50
101
- }
102
- },
103
- "BM.GPU2.2" : {
104
- "gpu_count" : 2 ,
105
- "gpu_memory_in_gbs" : 32 ,
106
- "gpu_type" : " P100" ,
107
- "quantization" : [" fp16" ],
108
- "ranking" : {
109
- "cost" : 30 ,
110
- "performance" : 20
111
- }
112
- },
113
212
"VM.GPU2.1" : {
114
213
"gpu_count" : 1 ,
115
214
"gpu_memory_in_gbs" : 16 ,
116
215
"gpu_type" : " P100" ,
117
- "quantization" : [" fp16" ],
216
+ "quantization" : [
217
+ " fp16"
218
+ ],
118
219
"ranking" : {
119
220
"cost" : 10 ,
120
221
"performance" : 10
124
225
"gpu_count" : 1 ,
125
226
"gpu_memory_in_gbs" : 16 ,
126
227
"gpu_type" : " V100" ,
127
- "quantization" : [" gptq" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
128
- "ranking" : {
228
+ "quantization" : [
229
+ " gptq" ,
230
+ " bitblas" ,
231
+ " aqlm" ,
232
+ " bitsandbytes" ,
233
+ " deepspeedfp" ,
234
+ " gguf"
235
+ ],
236
+ "ranking" : {
129
237
"cost" : 35 ,
130
- "performance" : 10
238
+ "performance" : 10
131
239
}
132
240
},
133
241
"VM.GPU3.2" : {
134
242
"gpu_count" : 2 ,
135
243
"gpu_memory_in_gbs" : 32 ,
136
244
"gpu_type" : " V100" ,
137
- "ranking" : {
245
+ "quantization" : [
246
+ " gptq" ,
247
+ " bitblas" ,
248
+ " aqlm" ,
249
+ " bitsandbytes" ,
250
+ " deepspeedfp" ,
251
+ " gguf"
252
+ ],
253
+ "ranking" : {
138
254
"cost" : 45 ,
139
- "performance" : 20
255
+ "performance" : 20
140
256
}
141
257
},
142
258
"VM.GPU3.4" : {
143
259
"gpu_count" : 4 ,
144
260
"gpu_memory_in_gbs" : 64 ,
145
261
"gpu_type" : " V100" ,
146
- "ranking" : {
262
+ "quantization" : [
263
+ " gptq" ,
264
+ " bitblas" ,
265
+ " aqlm" ,
266
+ " bitsandbytes" ,
267
+ " deepspeedfp" ,
268
+ " gguf"
269
+ ],
270
+ "ranking" : {
147
271
"cost" : 55 ,
148
- "performance" : 45
272
+ "performance" : 45
149
273
}
150
274
}
151
275
}
152
- }
276
+ }
0 commit comments