oracle
diff --git a/‎ads/aqua/common/errors.py
Lines changed: 5 additions & 0 deletions b/‎ads/aqua/common/errors.py
Lines changed: 5 additions & 0 deletions
diff --git a/‎ads/aqua/common/utils.py
Lines changed: 19 additions & 18 deletions b/‎ads/aqua/common/utils.py
Lines changed: 19 additions & 18 deletions
diff --git a/‎ads/aqua/extension/__init__.py
Lines changed: 1 addition & 1 deletion b/‎ads/aqua/extension/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎ads/aqua/extension/recommend_handler.py
Lines changed: 1 addition & 1 deletion b/‎ads/aqua/extension/recommend_handler.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎ads/aqua/resources/gpu_shapes_index.json
Lines changed: 103 additions & 45 deletions b/‎ads/aqua/resources/gpu_shapes_index.json
Lines changed: 103 additions & 45 deletions
@@ -55,6 +55,11 @@ class AquaValueError(AquaError, ValueError):
     def __init__(self, reason, status=403, service_payload=None):
         super().__init__(reason, status, service_payload)
 
+class AquaRecommendationError(AquaError):
+    """Exception raised for models incompatible with shape recommendation tool."""
+
+    def __init__(self, reason, status=400, service_payload=None):
+        super().__init__(reason, status, service_payload)
 
 class AquaFileNotFoundError(AquaError, FileNotFoundError):
     """Exception raised for missing target file."""
 
@@ -1253,24 +1253,24 @@ def load_gpu_shapes_index(
     file_name = "gpu_shapes_index.json"
 
     # Try remote load
-    remote_data: Dict[str, Any] = {}
-    if CONDA_BUCKET_NS:
-        try:
-            auth = auth or authutil.default_signer()
-            storage_path = (
-                f"oci://{CONDA_BUCKET_NAME}@{CONDA_BUCKET_NS}/service_pack/{file_name}"
-            )
-            logger.debug(
-                "Loading GPU shapes index from Object Storage: %s", storage_path
-            )
-            with fsspec.open(storage_path, mode="r", **auth) as f:
-                remote_data = json.load(f)
-            logger.debug(
-                "Loaded %d shapes from Object Storage",
-                len(remote_data.get("shapes", {})),
-            )
-        except Exception as ex:
-            logger.debug("Remote load failed (%s); falling back to local", ex)
+    # remote_data: Dict[str, Any] = {}
+    # if CONDA_BUCKET_NS:
+    #     try:
+    #         auth = auth or authutil.default_signer()
+    #         storage_path = (
+    #             f"oci://{CONDA_BUCKET_NAME}@{CONDA_BUCKET_NS}/service_pack/{file_name}"
+    #         )
+    #         logger.debug(
+    #             "Loading GPU shapes index from Object Storage: %s", storage_path
+    #         )
+    #         with fsspec.open(storage_path, mode="r", **auth) as f:
+    #             remote_data = json.load(f)
+    #         logger.debug(
+    #             "Loaded %d shapes from Object Storage",
+    #             len(remote_data.get("shapes", {})),
+    #         )
+    #     except Exception as ex:
+    #         logger.debug("Remote load failed (%s); falling back to local", ex)
 
     # Load local copy
     local_data: Dict[str, Any] = {}
@@ -1287,6 +1287,7 @@ def load_gpu_shapes_index(
 
     # Merge: remote shapes override local
     local_shapes = local_data.get("shapes", {})
+    remote_data = {}
     remote_shapes = remote_data.get("shapes", {})
     merged_shapes = {**local_shapes, **remote_shapes}
 
 
@@ -12,8 +12,8 @@
 )
 from ads.aqua.extension.evaluation_handler import __handlers__ as __eval_handlers__
 from ads.aqua.extension.finetune_handler import __handlers__ as __finetune_handlers__
-from ads.aqua.extension.recommend_handler import __handlers__ as __gpu_handlers__
 from ads.aqua.extension.model_handler import __handlers__ as __model_handlers__
+from ads.aqua.extension.recommend_handler import __handlers__ as __gpu_handlers__
 from ads.aqua.extension.ui_handler import __handlers__ as __ui_handlers__
 from ads.aqua.extension.ui_websocket_handler import __handlers__ as __ws_handlers__
 
 
@@ -46,5 +46,5 @@ def post(self, *args, **kwargs):  # noqa: ARG002
         self.finish(AquaRecommendApp().which_gpu(**input_data))
 
 __handlers__ = [
-    ("gpu-shape-recommendation/?([^/]*)", AquaRecommendHandler),
+    ("recommendation/?([^/]*)", AquaRecommendHandler),
 ]
@@ -1,94 +1,152 @@
 {
   "shapes": {
-    "BM.GPU.A10.4": {
-      "gpu_count": 4,
-      "gpu_memory_in_gbs": 96,
-      "gpu_type": "A10"
+    "BM.GPU.H200.8": {
+      "gpu_count": 8,
+      "gpu_memory_in_gbs": 1128,
+      "gpu_type": "H200",
+      "quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking": {
+          "cost": 100,
+          "performance": 110
+      }
     },
-    "BM.GPU.A100-V2.8": {
+    "BM.GPU.H100.8": {
       "gpu_count": 8,
       "gpu_memory_in_gbs": 640,
-      "gpu_type": "A100"
+      "gpu_type": "H100",
+      "quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking": {
+        "cost": 100,
+        "performance": 100
+      }
     },
-    "BM.GPU.B4.8": {
+    "BM.GPU.MI300X.8": {
       "gpu_count": 8,
-      "gpu_memory_in_gbs": 320,
-      "gpu_type": "A100"
+      "gpu_memory_in_gbs": 1536,
+      "gpu_type": "MI300X",
+      "quantization": ["fp8", "gguf"],
+      "ranking": {
+        "cost": 90,
+        "performance": 90
+      }
     },
-    "BM.GPU.H100.8": {
+    "BM.GPU.A100-V2.8": {
       "gpu_count": 8,
       "gpu_memory_in_gbs": 640,
-      "gpu_type": "H100"
+      "gpu_type": "A100",
+      "quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking": {
+        "cost": 80,
+        "performance": 70
+      }
     },
-    "BM.GPU.H200.8": {
+    "BM.GPU.B4.8": {
       "gpu_count": 8,
-      "gpu_memory_in_gbs": 1128,
-      "gpu_type": "H200"
+      "gpu_memory_in_gbs": 320,
+      "gpu_type": "A100",
+      "quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking": {
+        "cost": 70,
+        "performance": 60
+      }
     },
     "BM.GPU.L40S-NC.4": {
       "gpu_count": 4,
       "gpu_memory_in_gbs": 192,
-      "gpu_type": "L40S"
+      "gpu_type": "L40S",
+      "quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking": {
+        "cost": 60,
+        "performance": 80
+      }
     },
     "BM.GPU.L40S.4": {
       "gpu_count": 4,
       "gpu_memory_in_gbs": 192,
-      "gpu_type": "L40S"
-    },
-    "BM.GPU.MI300X.8": {
-      "gpu_count": 8,
-      "gpu_memory_in_gbs": 1536,
-      "gpu_type": "MI300X"
-    },
-    "BM.GPU2.2": {
-      "gpu_count": 2,
-      "gpu_memory_in_gbs": 32,
-      "gpu_type": "P100"
-    },
-    "BM.GPU3.8": {
-      "gpu_count": 8,
-      "gpu_memory_in_gbs": 128,
-      "gpu_type": "V100"
-    },
-    "BM.GPU4.8": {
-      "gpu_count": 8,
-      "gpu_memory_in_gbs": 320,
-      "gpu_type": "A100"
+      "gpu_type": "L40S",
+      "quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking": {
+        "cost": 60,
+        "performance": 80
+      }
     },
     "VM.GPU.A10.1": {
       "gpu_count": 1,
       "gpu_memory_in_gbs": 24,
-      "gpu_type": "A10"
+      "gpu_type": "A10",
+      "quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking" : {
+        "cost": 20,
+        "performance": 30
+      }
     },
     "VM.GPU.A10.2": {
       "gpu_count": 2,
       "gpu_memory_in_gbs": 48,
-      "gpu_type": "A10"
+      "gpu_type": "A10",
+      "quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking" : {
+        "cost": 40,
+        "performance": 40
+      }
     },
-    "VM.GPU.A10.4": {
+    "BM.GPU.A10.4": {
       "gpu_count": 4,
       "gpu_memory_in_gbs": 96,
-      "gpu_type": "A10"
+      "gpu_type": "A10",
+      "quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking" : {
+        "cost": 50,
+        "performance": 50
+      }
+    },
+    "BM.GPU2.2": {
+      "gpu_count": 2,
+      "gpu_memory_in_gbs": 32,
+      "gpu_type": "P100",
+      "quantization": ["fp16"],
+      "ranking": {
+        "cost": 30,
+        "performance": 20
+      }
     },
     "VM.GPU2.1": {
       "gpu_count": 1,
       "gpu_memory_in_gbs": 16,
-      "gpu_type": "P100"
+      "gpu_type": "P100",
+      "quantization": ["fp16"],
+      "ranking": {
+        "cost": 10,
+        "performance": 10
+      }
     },
     "VM.GPU3.1": {
       "gpu_count": 1,
       "gpu_memory_in_gbs": 16,
-      "gpu_type": "V100"
+      "gpu_type": "V100",
+      "quantization" : ["gptq", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking" : {
+        "cost": 35,
+        "performance": 10 
+      }
     },
     "VM.GPU3.2": {
       "gpu_count": 2,
       "gpu_memory_in_gbs": 32,
-      "gpu_type": "V100"
+      "gpu_type": "V100",
+      "ranking" : {
+        "cost": 45,
+        "performance": 20 
+      }
     },
     "VM.GPU3.4": {
       "gpu_count": 4,
       "gpu_memory_in_gbs": 64,
-      "gpu_type": "V100"
+      "gpu_type": "V100",
+      "ranking" : {
+        "cost": 55,
+        "performance": 45 
+      }
     }
   }
-}
+}
Original file line number	Diff line number	Diff line change
`@@ -12,8 +12,8 @@`
`12`	`12`	`)`
`13`	`13`	`from ads.aqua.extension.evaluation_handler import __handlers__ as __eval_handlers__`
`14`	`14`	`from ads.aqua.extension.finetune_handler import __handlers__ as __finetune_handlers__`
`15`		`-from ads.aqua.extension.recommend_handler import __handlers__ as __gpu_handlers__`
`16`	`15`	`from ads.aqua.extension.model_handler import __handlers__ as __model_handlers__`
	`16`	`+from ads.aqua.extension.recommend_handler import __handlers__ as __gpu_handlers__`
`17`	`17`	`from ads.aqua.extension.ui_handler import __handlers__ as __ui_handlers__`
`18`	`18`	`from ads.aqua.extension.ui_websocket_handler import __handlers__ as __ws_handlers__`
`19`	`19`
Original file line number	Diff line number	Diff line change
`@@ -46,5 +46,5 @@ def post(self, args, *kwargs): # noqa: ARG002`
`46`	`46`	`self.finish(AquaRecommendApp().which_gpu(**input_data))`
`47`	`47`
`48`	`48`	`__handlers__ = [`
`49`		`- ("gpu-shape-recommendation/?([^/]*)", AquaRecommendHandler),`
	`49`	`+ ("recommendation/?([^/]*)", AquaRecommendHandler),`
`50`	`50`	`]`