Skip to content

[WIP][AQUA] GPU Shape Recommendation #1221

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions ads/aqua/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from ads.aqua.model import AquaModelApp
from ads.aqua.modeldeployment import AquaDeploymentApp
from ads.aqua.verify_policies import AquaVerifyPoliciesApp
from ads.aqua.shaperecommend.recommend import AquaRecommendApp
from ads.common.utils import LOG_LEVELS


Expand All @@ -31,6 +32,7 @@ class AquaCommand:
deployment = AquaDeploymentApp
evaluation = AquaEvaluationApp
verify_policies = AquaVerifyPoliciesApp
recommend = AquaRecommendApp

def __init__(
self,
Expand Down Expand Up @@ -96,18 +98,20 @@ def _validate_value(flag, value):
"If you intend to chain a function call to the result, please separate the "
"flag and the subsequent function call with separator `-`."
)

@staticmethod
def install():
"""Install ADS Aqua Extension from wheel file. Set enviroment variable `AQUA_EXTENSTION_PATH` to change the wheel file path.

Return
Return
------
int:
Installatation status.
"""
import subprocess

wheel_file_path = os.environ.get("AQUA_EXTENSTION_PATH", "/ads/extension/adsjupyterlab_aqua_extension*.whl")
status = subprocess.run(f"pip install {wheel_file_path}",shell=True)
return status.check_returncode
wheel_file_path = os.environ.get(
"AQUA_EXTENSTION_PATH", "/ads/extension/adsjupyterlab_aqua_extension*.whl"
)
status = subprocess.run(f"pip install {wheel_file_path}", shell=True)
return status.check_returncode
17 changes: 17 additions & 0 deletions ads/aqua/common/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,17 @@ class Config:
arbitrary_types_allowed = True
protected_namespaces = ()

class ComputeRank(Serializable):
"""
Represents the cost and performance ranking for a compute shape.
"""
cost: int = Field(
None, description="The relative rank of the cost of the shape. Range is [10 (cost-effective), 100 (most-expensive)]"
)

performance: int = Field(
None, description="The relative rank of the performance of the shape. Range is [10 (lower performance), 110 (highest performance)]"
)

class GPUSpecs(Serializable):
"""
Expand All @@ -61,6 +72,12 @@ class GPUSpecs(Serializable):
gpu_type: Optional[str] = Field(
default=None, description="The type of GPU (e.g., 'V100, A100, H100')."
)
quantization: Optional[List[str]] = Field(
default_factory=list, description="The quantization format supported by shape. (ex. bitsandbytes, fp8, etc.)"
)
ranking: Optional[ComputeRank] = Field(
None, description="The relative rank of the cost and performance of the shape."
)


class GPUShapesIndex(Serializable):
Expand Down
5 changes: 5 additions & 0 deletions ads/aqua/common/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ class AquaValueError(AquaError, ValueError):
def __init__(self, reason, status=403, service_payload=None):
super().__init__(reason, status, service_payload)

class AquaRecommendationError(AquaError):
"""Exception raised for models incompatible with shape recommendation tool."""

def __init__(self, reason, status=400, service_payload=None):
super().__init__(reason, status, service_payload)

class AquaFileNotFoundError(AquaError, FileNotFoundError):
"""Exception raised for missing target file."""
Expand Down
37 changes: 19 additions & 18 deletions ads/aqua/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1253,24 +1253,24 @@ def load_gpu_shapes_index(
file_name = "gpu_shapes_index.json"

# Try remote load
remote_data: Dict[str, Any] = {}
if CONDA_BUCKET_NS:
try:
auth = auth or authutil.default_signer()
storage_path = (
f"oci://{CONDA_BUCKET_NAME}@{CONDA_BUCKET_NS}/service_pack/{file_name}"
)
logger.debug(
"Loading GPU shapes index from Object Storage: %s", storage_path
)
with fsspec.open(storage_path, mode="r", **auth) as f:
remote_data = json.load(f)
logger.debug(
"Loaded %d shapes from Object Storage",
len(remote_data.get("shapes", {})),
)
except Exception as ex:
logger.debug("Remote load failed (%s); falling back to local", ex)
# remote_data: Dict[str, Any] = {}
# if CONDA_BUCKET_NS:
# try:
# auth = auth or authutil.default_signer()
# storage_path = (
# f"oci://{CONDA_BUCKET_NAME}@{CONDA_BUCKET_NS}/service_pack/{file_name}"
# )
# logger.debug(
# "Loading GPU shapes index from Object Storage: %s", storage_path
# )
# with fsspec.open(storage_path, mode="r", **auth) as f:
# remote_data = json.load(f)
# logger.debug(
# "Loaded %d shapes from Object Storage",
# len(remote_data.get("shapes", {})),
# )
# except Exception as ex:
# logger.debug("Remote load failed (%s); falling back to local", ex)

# Load local copy
local_data: Dict[str, Any] = {}
Expand All @@ -1287,6 +1287,7 @@ def load_gpu_shapes_index(

# Merge: remote shapes override local
local_shapes = local_data.get("shapes", {})
remote_data = {}
remote_shapes = remote_data.get("shapes", {})
merged_shapes = {**local_shapes, **remote_shapes}

Expand Down
2 changes: 2 additions & 0 deletions ads/aqua/extension/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from ads.aqua.extension.evaluation_handler import __handlers__ as __eval_handlers__
from ads.aqua.extension.finetune_handler import __handlers__ as __finetune_handlers__
from ads.aqua.extension.model_handler import __handlers__ as __model_handlers__
from ads.aqua.extension.recommend_handler import __handlers__ as __gpu_handlers__
from ads.aqua.extension.ui_handler import __handlers__ as __ui_handlers__
from ads.aqua.extension.ui_websocket_handler import __handlers__ as __ws_handlers__

Expand All @@ -24,6 +25,7 @@
+ __ui_handlers__
+ __eval_handlers__
+ __ws_handlers__
+ __gpu_handlers__
)


Expand Down
46 changes: 46 additions & 0 deletions ads/aqua/extension/recommend_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from tornado.web import HTTPError

from ads.aqua.common.decorator import handle_exceptions
from ads.aqua.extension.base_handler import AquaAPIhandler
from ads.aqua.extension.errors import Errors
from ads.aqua.shaperecommend.recommend import AquaRecommendApp


class AquaRecommendHandler(AquaAPIhandler):
"""
Handler for Aqua GPU Recommendation REST APIs.

Methods
-------
post(self, *args, **kwargs)
Obtains the eligible compute shapes that would fit the specifed model, context length, model weights, and quantization level.

Raises
------
HTTPError: For various failure scenarios such as invalid input format, missing data, etc.
"""

@handle_exceptions
def post(self, *args, **kwargs): # noqa: ARG002
"""
Obtains the eligible compute shapes that would fit the specifed model, context length, model weights, and quantization level.

Returns
-------
ShapeRecommendationReport
Report containing shape recommendations and troubleshooting advice, if any.
"""
try:
input_data = self.get_json_body()
except Exception as ex:
raise HTTPError(400, Errors.INVALID_INPUT_DATA_FORMAT) from ex

if not input_data:
raise HTTPError(400, Errors.NO_INPUT_DATA)

self.finish(AquaRecommendApp().which_gpu(**input_data))


__handlers__ = [
("recommendation/?([^/]*)", AquaRecommendHandler),
]
148 changes: 103 additions & 45 deletions ads/aqua/resources/gpu_shapes_index.json
Original file line number Diff line number Diff line change
@@ -1,94 +1,152 @@
{
"shapes": {
"BM.GPU.A10.4": {
"gpu_count": 4,
"gpu_memory_in_gbs": 96,
"gpu_type": "A10"
"BM.GPU.H200.8": {
"gpu_count": 8,
"gpu_memory_in_gbs": 1128,
"gpu_type": "H200",
"quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
"ranking": {
"cost": 100,
"performance": 110
}
},
"BM.GPU.A100-V2.8": {
"BM.GPU.H100.8": {
"gpu_count": 8,
"gpu_memory_in_gbs": 640,
"gpu_type": "A100"
"gpu_type": "H100",
"quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
"ranking": {
"cost": 100,
"performance": 100
}
},
"BM.GPU.B4.8": {
"BM.GPU.MI300X.8": {
"gpu_count": 8,
"gpu_memory_in_gbs": 320,
"gpu_type": "A100"
"gpu_memory_in_gbs": 1536,
"gpu_type": "MI300X",
"quantization": ["fp8", "gguf"],
"ranking": {
"cost": 90,
"performance": 90
}
},
"BM.GPU.H100.8": {
"BM.GPU.A100-V2.8": {
"gpu_count": 8,
"gpu_memory_in_gbs": 640,
"gpu_type": "H100"
"gpu_type": "A100",
"quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
"ranking": {
"cost": 80,
"performance": 70
}
},
"BM.GPU.H200.8": {
"BM.GPU.B4.8": {
"gpu_count": 8,
"gpu_memory_in_gbs": 1128,
"gpu_type": "H200"
"gpu_memory_in_gbs": 320,
"gpu_type": "A100",
"quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
"ranking": {
"cost": 70,
"performance": 60
}
},
"BM.GPU.L40S-NC.4": {
"gpu_count": 4,
"gpu_memory_in_gbs": 192,
"gpu_type": "L40S"
"gpu_type": "L40S",
"quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
"ranking": {
"cost": 60,
"performance": 80
}
},
"BM.GPU.L40S.4": {
"gpu_count": 4,
"gpu_memory_in_gbs": 192,
"gpu_type": "L40S"
},
"BM.GPU.MI300X.8": {
"gpu_count": 8,
"gpu_memory_in_gbs": 1536,
"gpu_type": "MI300X"
},
"BM.GPU2.2": {
"gpu_count": 2,
"gpu_memory_in_gbs": 32,
"gpu_type": "P100"
},
"BM.GPU3.8": {
"gpu_count": 8,
"gpu_memory_in_gbs": 128,
"gpu_type": "V100"
},
"BM.GPU4.8": {
"gpu_count": 8,
"gpu_memory_in_gbs": 320,
"gpu_type": "A100"
"gpu_type": "L40S",
"quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
"ranking": {
"cost": 60,
"performance": 80
}
},
"VM.GPU.A10.1": {
"gpu_count": 1,
"gpu_memory_in_gbs": 24,
"gpu_type": "A10"
"gpu_type": "A10",
"quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
"ranking" : {
"cost": 20,
"performance": 30
}
},
"VM.GPU.A10.2": {
"gpu_count": 2,
"gpu_memory_in_gbs": 48,
"gpu_type": "A10"
"gpu_type": "A10",
"quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
"ranking" : {
"cost": 40,
"performance": 40
}
},
"VM.GPU.A10.4": {
"BM.GPU.A10.4": {
"gpu_count": 4,
"gpu_memory_in_gbs": 96,
"gpu_type": "A10"
"gpu_type": "A10",
"quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
"ranking" : {
"cost": 50,
"performance": 50
}
},
"BM.GPU2.2": {
"gpu_count": 2,
"gpu_memory_in_gbs": 32,
"gpu_type": "P100",
"quantization": ["fp16"],
"ranking": {
"cost": 30,
"performance": 20
}
},
"VM.GPU2.1": {
"gpu_count": 1,
"gpu_memory_in_gbs": 16,
"gpu_type": "P100"
"gpu_type": "P100",
"quantization": ["fp16"],
"ranking": {
"cost": 10,
"performance": 10
}
},
"VM.GPU3.1": {
"gpu_count": 1,
"gpu_memory_in_gbs": 16,
"gpu_type": "V100"
"gpu_type": "V100",
"quantization" : ["gptq", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
"ranking" : {
"cost": 35,
"performance": 10
}
},
"VM.GPU3.2": {
"gpu_count": 2,
"gpu_memory_in_gbs": 32,
"gpu_type": "V100"
"gpu_type": "V100",
"ranking" : {
"cost": 45,
"performance": 20
}
},
"VM.GPU3.4": {
"gpu_count": 4,
"gpu_memory_in_gbs": 64,
"gpu_type": "V100"
"gpu_type": "V100",
"ranking" : {
"cost": 55,
"performance": 45
}
}
}
}
}
Loading