Skip to content

Commit 4461af7

Browse files
committed
init implementation for gpu recommendations
1 parent bd026e7 commit 4461af7

File tree

11 files changed

+357
-275
lines changed

11 files changed

+357
-275
lines changed

ads/aqua/common/errors.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,11 @@ class AquaValueError(AquaError, ValueError):
5555
def __init__(self, reason, status=403, service_payload=None):
5656
super().__init__(reason, status, service_payload)
5757

58+
class AquaRecommendationError(AquaError):
59+
"""Exception raised for models incompatible with shape recommendation tool."""
60+
61+
def __init__(self, reason, status=400, service_payload=None):
62+
super().__init__(reason, status, service_payload)
5863

5964
class AquaFileNotFoundError(AquaError, FileNotFoundError):
6065
"""Exception raised for missing target file."""

ads/aqua/common/utils.py

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1253,24 +1253,24 @@ def load_gpu_shapes_index(
12531253
file_name = "gpu_shapes_index.json"
12541254

12551255
# Try remote load
1256-
remote_data: Dict[str, Any] = {}
1257-
if CONDA_BUCKET_NS:
1258-
try:
1259-
auth = auth or authutil.default_signer()
1260-
storage_path = (
1261-
f"oci://{CONDA_BUCKET_NAME}@{CONDA_BUCKET_NS}/service_pack/{file_name}"
1262-
)
1263-
logger.debug(
1264-
"Loading GPU shapes index from Object Storage: %s", storage_path
1265-
)
1266-
with fsspec.open(storage_path, mode="r", **auth) as f:
1267-
remote_data = json.load(f)
1268-
logger.debug(
1269-
"Loaded %d shapes from Object Storage",
1270-
len(remote_data.get("shapes", {})),
1271-
)
1272-
except Exception as ex:
1273-
logger.debug("Remote load failed (%s); falling back to local", ex)
1256+
# remote_data: Dict[str, Any] = {}
1257+
# if CONDA_BUCKET_NS:
1258+
# try:
1259+
# auth = auth or authutil.default_signer()
1260+
# storage_path = (
1261+
# f"oci://{CONDA_BUCKET_NAME}@{CONDA_BUCKET_NS}/service_pack/{file_name}"
1262+
# )
1263+
# logger.debug(
1264+
# "Loading GPU shapes index from Object Storage: %s", storage_path
1265+
# )
1266+
# with fsspec.open(storage_path, mode="r", **auth) as f:
1267+
# remote_data = json.load(f)
1268+
# logger.debug(
1269+
# "Loaded %d shapes from Object Storage",
1270+
# len(remote_data.get("shapes", {})),
1271+
# )
1272+
# except Exception as ex:
1273+
# logger.debug("Remote load failed (%s); falling back to local", ex)
12741274

12751275
# Load local copy
12761276
local_data: Dict[str, Any] = {}
@@ -1287,6 +1287,7 @@ def load_gpu_shapes_index(
12871287

12881288
# Merge: remote shapes override local
12891289
local_shapes = local_data.get("shapes", {})
1290+
remote_data = {}
12901291
remote_shapes = remote_data.get("shapes", {})
12911292
merged_shapes = {**local_shapes, **remote_shapes}
12921293

ads/aqua/extension/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
)
1313
from ads.aqua.extension.evaluation_handler import __handlers__ as __eval_handlers__
1414
from ads.aqua.extension.finetune_handler import __handlers__ as __finetune_handlers__
15-
from ads.aqua.extension.recommend_handler import __handlers__ as __gpu_handlers__
1615
from ads.aqua.extension.model_handler import __handlers__ as __model_handlers__
16+
from ads.aqua.extension.recommend_handler import __handlers__ as __gpu_handlers__
1717
from ads.aqua.extension.ui_handler import __handlers__ as __ui_handlers__
1818
from ads.aqua.extension.ui_websocket_handler import __handlers__ as __ws_handlers__
1919

ads/aqua/extension/recommend_handler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,5 +46,5 @@ def post(self, *args, **kwargs): # noqa: ARG002
4646
self.finish(AquaRecommendApp().which_gpu(**input_data))
4747

4848
__handlers__ = [
49-
("gpu-shape-recommendation/?([^/]*)", AquaRecommendHandler),
49+
("recommendation/?([^/]*)", AquaRecommendHandler),
5050
]
Lines changed: 103 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,94 +1,152 @@
11
{
22
"shapes": {
3-
"BM.GPU.A10.4": {
4-
"gpu_count": 4,
5-
"gpu_memory_in_gbs": 96,
6-
"gpu_type": "A10"
3+
"BM.GPU.H200.8": {
4+
"gpu_count": 8,
5+
"gpu_memory_in_gbs": 1128,
6+
"gpu_type": "H200",
7+
"quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
8+
"ranking": {
9+
"cost": 100,
10+
"performance": 110
11+
}
712
},
8-
"BM.GPU.A100-V2.8": {
13+
"BM.GPU.H100.8": {
914
"gpu_count": 8,
1015
"gpu_memory_in_gbs": 640,
11-
"gpu_type": "A100"
16+
"gpu_type": "H100",
17+
"quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
18+
"ranking": {
19+
"cost": 100,
20+
"performance": 100
21+
}
1222
},
13-
"BM.GPU.B4.8": {
23+
"BM.GPU.MI300X.8": {
1424
"gpu_count": 8,
15-
"gpu_memory_in_gbs": 320,
16-
"gpu_type": "A100"
25+
"gpu_memory_in_gbs": 1536,
26+
"gpu_type": "MI300X",
27+
"quantization": ["fp8", "gguf"],
28+
"ranking": {
29+
"cost": 90,
30+
"performance": 90
31+
}
1732
},
18-
"BM.GPU.H100.8": {
33+
"BM.GPU.A100-V2.8": {
1934
"gpu_count": 8,
2035
"gpu_memory_in_gbs": 640,
21-
"gpu_type": "H100"
36+
"gpu_type": "A100",
37+
"quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
38+
"ranking": {
39+
"cost": 80,
40+
"performance": 70
41+
}
2242
},
23-
"BM.GPU.H200.8": {
43+
"BM.GPU.B4.8": {
2444
"gpu_count": 8,
25-
"gpu_memory_in_gbs": 1128,
26-
"gpu_type": "H200"
45+
"gpu_memory_in_gbs": 320,
46+
"gpu_type": "A100",
47+
"quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
48+
"ranking": {
49+
"cost": 70,
50+
"performance": 60
51+
}
2752
},
2853
"BM.GPU.L40S-NC.4": {
2954
"gpu_count": 4,
3055
"gpu_memory_in_gbs": 192,
31-
"gpu_type": "L40S"
56+
"gpu_type": "L40S",
57+
"quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
58+
"ranking": {
59+
"cost": 60,
60+
"performance": 80
61+
}
3262
},
3363
"BM.GPU.L40S.4": {
3464
"gpu_count": 4,
3565
"gpu_memory_in_gbs": 192,
36-
"gpu_type": "L40S"
37-
},
38-
"BM.GPU.MI300X.8": {
39-
"gpu_count": 8,
40-
"gpu_memory_in_gbs": 1536,
41-
"gpu_type": "MI300X"
42-
},
43-
"BM.GPU2.2": {
44-
"gpu_count": 2,
45-
"gpu_memory_in_gbs": 32,
46-
"gpu_type": "P100"
47-
},
48-
"BM.GPU3.8": {
49-
"gpu_count": 8,
50-
"gpu_memory_in_gbs": 128,
51-
"gpu_type": "V100"
52-
},
53-
"BM.GPU4.8": {
54-
"gpu_count": 8,
55-
"gpu_memory_in_gbs": 320,
56-
"gpu_type": "A100"
66+
"gpu_type": "L40S",
67+
"quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
68+
"ranking": {
69+
"cost": 60,
70+
"performance": 80
71+
}
5772
},
5873
"VM.GPU.A10.1": {
5974
"gpu_count": 1,
6075
"gpu_memory_in_gbs": 24,
61-
"gpu_type": "A10"
76+
"gpu_type": "A10",
77+
"quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
78+
"ranking" : {
79+
"cost": 20,
80+
"performance": 30
81+
}
6282
},
6383
"VM.GPU.A10.2": {
6484
"gpu_count": 2,
6585
"gpu_memory_in_gbs": 48,
66-
"gpu_type": "A10"
86+
"gpu_type": "A10",
87+
"quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
88+
"ranking" : {
89+
"cost": 40,
90+
"performance": 40
91+
}
6792
},
68-
"VM.GPU.A10.4": {
93+
"BM.GPU.A10.4": {
6994
"gpu_count": 4,
7095
"gpu_memory_in_gbs": 96,
71-
"gpu_type": "A10"
96+
"gpu_type": "A10",
97+
"quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
98+
"ranking" : {
99+
"cost": 50,
100+
"performance": 50
101+
}
102+
},
103+
"BM.GPU2.2": {
104+
"gpu_count": 2,
105+
"gpu_memory_in_gbs": 32,
106+
"gpu_type": "P100",
107+
"quantization": ["fp16"],
108+
"ranking": {
109+
"cost": 30,
110+
"performance": 20
111+
}
72112
},
73113
"VM.GPU2.1": {
74114
"gpu_count": 1,
75115
"gpu_memory_in_gbs": 16,
76-
"gpu_type": "P100"
116+
"gpu_type": "P100",
117+
"quantization": ["fp16"],
118+
"ranking": {
119+
"cost": 10,
120+
"performance": 10
121+
}
77122
},
78123
"VM.GPU3.1": {
79124
"gpu_count": 1,
80125
"gpu_memory_in_gbs": 16,
81-
"gpu_type": "V100"
126+
"gpu_type": "V100",
127+
"quantization" : ["gptq", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
128+
"ranking" : {
129+
"cost": 35,
130+
"performance": 10
131+
}
82132
},
83133
"VM.GPU3.2": {
84134
"gpu_count": 2,
85135
"gpu_memory_in_gbs": 32,
86-
"gpu_type": "V100"
136+
"gpu_type": "V100",
137+
"ranking" : {
138+
"cost": 45,
139+
"performance": 20
140+
}
87141
},
88142
"VM.GPU3.4": {
89143
"gpu_count": 4,
90144
"gpu_memory_in_gbs": 64,
91-
"gpu_type": "V100"
145+
"gpu_type": "V100",
146+
"ranking" : {
147+
"cost": 55,
148+
"performance": 45
149+
}
92150
}
93151
}
94-
}
152+
}

0 commit comments

Comments
 (0)