1
1
{
2
2
"shapes" : {
3
- "BM.GPU.A10.4" : {
4
- "gpu_count" : 4 ,
5
- "gpu_memory_in_gbs" : 96 ,
6
- "gpu_type" : " A10"
3
+ "BM.GPU.H200.8" : {
4
+ "gpu_count" : 8 ,
5
+ "gpu_memory_in_gbs" : 1128 ,
6
+ "gpu_type" : " H200" ,
7
+ "quantization" : [" awq" , " gptq" , " marlin" , " fp8" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
8
+ "ranking" : {
9
+ "cost" : 100 ,
10
+ "performance" : 110
11
+ }
7
12
},
8
- "BM.GPU.A100-V2 .8" : {
13
+ "BM.GPU.H100 .8" : {
9
14
"gpu_count" : 8 ,
10
15
"gpu_memory_in_gbs" : 640 ,
11
- "gpu_type" : " A100"
16
+ "gpu_type" : " H100" ,
17
+ "quantization" : [" awq" , " gptq" , " marlin" , " fp8" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
18
+ "ranking" : {
19
+ "cost" : 100 ,
20
+ "performance" : 100
21
+ }
12
22
},
13
- "BM.GPU.B4 .8" : {
23
+ "BM.GPU.MI300X .8" : {
14
24
"gpu_count" : 8 ,
15
- "gpu_memory_in_gbs" : 320 ,
16
- "gpu_type" : " A100"
25
+ "gpu_memory_in_gbs" : 1536 ,
26
+ "gpu_type" : " MI300X" ,
27
+ "quantization" : [" fp8" , " gguf" ],
28
+ "ranking" : {
29
+ "cost" : 90 ,
30
+ "performance" : 90
31
+ }
17
32
},
18
- "BM.GPU.H100 .8" : {
33
+ "BM.GPU.A100-V2 .8" : {
19
34
"gpu_count" : 8 ,
20
35
"gpu_memory_in_gbs" : 640 ,
21
- "gpu_type" : " H100"
36
+ "gpu_type" : " A100" ,
37
+ "quantization" : [" awq" , " gptq" , " marlin" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
38
+ "ranking" : {
39
+ "cost" : 80 ,
40
+ "performance" : 70
41
+ }
22
42
},
23
- "BM.GPU.H200 .8" : {
43
+ "BM.GPU.B4 .8" : {
24
44
"gpu_count" : 8 ,
25
- "gpu_memory_in_gbs" : 1128 ,
26
- "gpu_type" : " H200"
45
+ "gpu_memory_in_gbs" : 320 ,
46
+ "gpu_type" : " A100" ,
47
+ "quantization" : [" awq" , " gptq" , " marlin" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
48
+ "ranking" : {
49
+ "cost" : 70 ,
50
+ "performance" : 60
51
+ }
27
52
},
28
53
"BM.GPU.L40S-NC.4" : {
29
54
"gpu_count" : 4 ,
30
55
"gpu_memory_in_gbs" : 192 ,
31
- "gpu_type" : " L40S"
56
+ "gpu_type" : " L40S" ,
57
+ "quantization" : [" awq" , " gptq" , " marlin" , " fp8" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
58
+ "ranking" : {
59
+ "cost" : 60 ,
60
+ "performance" : 80
61
+ }
32
62
},
33
63
"BM.GPU.L40S.4" : {
34
64
"gpu_count" : 4 ,
35
65
"gpu_memory_in_gbs" : 192 ,
36
- "gpu_type" : " L40S"
37
- },
38
- "BM.GPU.MI300X.8" : {
39
- "gpu_count" : 8 ,
40
- "gpu_memory_in_gbs" : 1536 ,
41
- "gpu_type" : " MI300X"
42
- },
43
- "BM.GPU2.2" : {
44
- "gpu_count" : 2 ,
45
- "gpu_memory_in_gbs" : 32 ,
46
- "gpu_type" : " P100"
47
- },
48
- "BM.GPU3.8" : {
49
- "gpu_count" : 8 ,
50
- "gpu_memory_in_gbs" : 128 ,
51
- "gpu_type" : " V100"
52
- },
53
- "BM.GPU4.8" : {
54
- "gpu_count" : 8 ,
55
- "gpu_memory_in_gbs" : 320 ,
56
- "gpu_type" : " A100"
66
+ "gpu_type" : " L40S" ,
67
+ "quantization" : [" awq" , " gptq" , " marlin" , " fp8" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
68
+ "ranking" : {
69
+ "cost" : 60 ,
70
+ "performance" : 80
71
+ }
57
72
},
58
73
"VM.GPU.A10.1" : {
59
74
"gpu_count" : 1 ,
60
75
"gpu_memory_in_gbs" : 24 ,
61
- "gpu_type" : " A10"
76
+ "gpu_type" : " A10" ,
77
+ "quantization" : [" awq" , " gptq" , " marlin" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
78
+ "ranking" : {
79
+ "cost" : 20 ,
80
+ "performance" : 30
81
+ }
62
82
},
63
83
"VM.GPU.A10.2" : {
64
84
"gpu_count" : 2 ,
65
85
"gpu_memory_in_gbs" : 48 ,
66
- "gpu_type" : " A10"
86
+ "gpu_type" : " A10" ,
87
+ "quantization" : [" awq" , " gptq" , " marlin" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
88
+ "ranking" : {
89
+ "cost" : 40 ,
90
+ "performance" : 40
91
+ }
67
92
},
68
- "VM .GPU.A10.4" : {
93
+ "BM .GPU.A10.4" : {
69
94
"gpu_count" : 4 ,
70
95
"gpu_memory_in_gbs" : 96 ,
71
- "gpu_type" : " A10"
96
+ "gpu_type" : " A10" ,
97
+ "quantization" : [" awq" , " gptq" , " marlin" , " int8" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
98
+ "ranking" : {
99
+ "cost" : 50 ,
100
+ "performance" : 50
101
+ }
102
+ },
103
+ "BM.GPU2.2" : {
104
+ "gpu_count" : 2 ,
105
+ "gpu_memory_in_gbs" : 32 ,
106
+ "gpu_type" : " P100" ,
107
+ "quantization" : [" fp16" ],
108
+ "ranking" : {
109
+ "cost" : 30 ,
110
+ "performance" : 20
111
+ }
72
112
},
73
113
"VM.GPU2.1" : {
74
114
"gpu_count" : 1 ,
75
115
"gpu_memory_in_gbs" : 16 ,
76
- "gpu_type" : " P100"
116
+ "gpu_type" : " P100" ,
117
+ "quantization" : [" fp16" ],
118
+ "ranking" : {
119
+ "cost" : 10 ,
120
+ "performance" : 10
121
+ }
77
122
},
78
123
"VM.GPU3.1" : {
79
124
"gpu_count" : 1 ,
80
125
"gpu_memory_in_gbs" : 16 ,
81
- "gpu_type" : " V100"
126
+ "gpu_type" : " V100" ,
127
+ "quantization" : [" gptq" , " bitblas" , " aqlm" , " bitsandbytes" , " deepspeedfp" , " gguf" ],
128
+ "ranking" : {
129
+ "cost" : 35 ,
130
+ "performance" : 10
131
+ }
82
132
},
83
133
"VM.GPU3.2" : {
84
134
"gpu_count" : 2 ,
85
135
"gpu_memory_in_gbs" : 32 ,
86
- "gpu_type" : " V100"
136
+ "gpu_type" : " V100" ,
137
+ "ranking" : {
138
+ "cost" : 45 ,
139
+ "performance" : 20
140
+ }
87
141
},
88
142
"VM.GPU3.4" : {
89
143
"gpu_count" : 4 ,
90
144
"gpu_memory_in_gbs" : 64 ,
91
- "gpu_type" : " V100"
145
+ "gpu_type" : " V100" ,
146
+ "ranking" : {
147
+ "cost" : 55 ,
148
+ "performance" : 45
149
+ }
92
150
}
93
151
}
94
- }
152
+ }
0 commit comments