|
1 | | -import time |
2 | 1 | import centml |
3 | 2 | from centml.sdk.api import get_centml_client |
4 | | -from centml.sdk import DeploymentType, CreateCServeV2DeploymentRequest |
| 3 | +from centml.sdk import DeploymentType, CreateCServeV2DeploymentRequest, CServeV2Recipe |
5 | 4 |
|
6 | | -with get_centml_client() as cclient: |
7 | | - # Get fastest recipe for the Qwen model |
8 | | - fastest = cclient.get_cserve_recipe(model="Qwen/Qwen2-VL-7B-Instruct")[0].fastest |
9 | 5 |
|
10 | | - # Modify the recipe if necessary |
11 | | - fastest.recipe.additional_properties["max_num_seqs"] = 512 |
| 6 | +def get_fastest_cserve_config(cclient, name, model): |
| 7 | + fastest = cclient.get_cserve_recipe(model=model)[0].fastest |
12 | 8 |
|
13 | | - # Create CServeV2 deployment |
14 | | - request = CreateCServeV2DeploymentRequest( |
15 | | - name="qwen-fastest", |
| 9 | + return CreateCServeV2DeploymentRequest( |
| 10 | + name=name, |
16 | 11 | cluster_id=cclient.get_cluster_id(fastest.hardware_instance_id), |
17 | 12 | hardware_instance_id=fastest.hardware_instance_id, |
18 | 13 | recipe=fastest.recipe, |
19 | 14 | min_scale=1, |
20 | 15 | max_scale=1, |
21 | 16 | env_vars={}, |
22 | 17 | ) |
23 | | - response = cclient.create_cserve(request) |
24 | | - print("Create deployment response: ", response) |
25 | 18 |
|
26 | | - # Get deployment details |
27 | | - deployment = cclient.get_cserve(response.id) |
28 | | - print("Deployment details: ", deployment) |
29 | 19 |
|
30 | | - # Pause the deployment |
31 | | - cclient.pause(deployment.id) |
| 20 | +def get_default_cserve_config(cclient, name, model): |
| 21 | + default_recipe = CServeV2Recipe(model=model) |
32 | 22 |
|
33 | | - # Delete the deployment |
34 | | - cclient.delete(deployment.id) |
| 23 | + hardware_instance = cclient.get_hardware_instances(cluster_id=1001)[0] |
| 24 | + |
| 25 | + return CreateCServeV2DeploymentRequest( |
| 26 | + name=name, |
| 27 | + cluster_id=hardware_instance.cluster_id, |
| 28 | + hardware_instance_id=hardware_instance.id, |
| 29 | + recipe=default_recipe, |
| 30 | + min_scale=1, |
| 31 | + max_scale=1, |
| 32 | + env_vars={}, |
| 33 | + ) |
| 34 | + |
| 35 | + |
| 36 | +def main(): |
| 37 | + with get_centml_client() as cclient: |
| 38 | + ### Get the configurations for the Qwen model |
| 39 | + qwen_config = get_fastest_cserve_config(cclient, name="qwen-fastest", model="Qwen/Qwen2-VL-7B-Instruct") |
| 40 | + #qwen_config = get_default_cserve_config(cclient, name="qwen-default", model="Qwen/Qwen2-VL-7B-Instruct") |
| 41 | + |
| 42 | + ### Modify the recipe if necessary |
| 43 | + qwen_config.recipe.additional_properties["max_num_seqs"] = 512 |
| 44 | + |
| 45 | + # Create CServeV2 deployment |
| 46 | + response = cclient.create_cserve(qwen_config) |
| 47 | + print("Create deployment response: ", response) |
| 48 | + |
| 49 | + ### Get deployment details |
| 50 | + deployment = cclient.get_cserve(response.id) |
| 51 | + print("Deployment details: ", deployment) |
| 52 | + |
| 53 | + ''' |
| 54 | + ### Pause the deployment |
| 55 | + cclient.pause(deployment.id) |
| 56 | +
|
| 57 | + ### Delete the deployment |
| 58 | + cclient.delete(deployment.id) |
| 59 | + ''' |
| 60 | + |
| 61 | +if __name__ == "__main__": |
| 62 | + main() |
0 commit comments