Skip to content

Commit f966764

Browse files
authored
Merge pull request #61 from cloudera/feature/CAII
Feature/caii
2 parents cb40689 + 9a9c42b commit f966764

File tree

10 files changed

+509
-4
lines changed

10 files changed

+509
-4
lines changed

ClouderaSetup/OnCloud/AWS/build/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,4 @@ RUN chmod +x /usr/local/bin/docker-entrypoint.sh
4949

5050
WORKDIR "$HOME_DIR"
5151
#CMD ["/bin/bash"]
52-
ENTRYPOINT ["docker-entrypoint.sh"]
52+
ENTRYPOINT ["docker-entrypoint.sh"]
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#!/bin/bash
2+
3+
set -e
4+
5+
workshop_name=$1
6+
env_name="${workshop_name}-cdp-env"
7+
cluster_name="${workshop_name}-compute-cluster"
8+
9+
echo "🔧 Checking compute cluster: $cluster_name"
10+
11+
existing_cluster_status=$(cdp compute list-clusters | jq -r --arg name "$cluster_name" '
12+
.clusters[]
13+
| select(.clusterName == $name)
14+
| .status
15+
')
16+
17+
if [[ "$existing_cluster_status" == "RUNNING" ]]; then
18+
echo "✅ Compute cluster '$cluster_name' is already RUNNING. Skipping creation."
19+
20+
elif [[ "$existing_cluster_status" == "CREATING" ]]; then
21+
echo "ℹ️ Compute cluster '$cluster_name' is already being created. Skipping creation."
22+
23+
else
24+
echo "🚀 Creating compute cluster: $cluster_name"
25+
cdp compute create-cluster --environment "$env_name" --name "$cluster_name"
26+
27+
echo "⏳ Waiting for compute cluster '$cluster_name' to reach RUNNING state..."
28+
for i in {1..60}; do
29+
current_status=$(cdp compute list-clusters | jq -r --arg name "$cluster_name" '
30+
.clusters[]
31+
| select(.clusterName == $name)
32+
| .status
33+
')
34+
35+
echo " ➤ Attempt $i: Status = $current_status"
36+
37+
if [[ "$current_status" == "RUNNING" ]]; then
38+
echo "✅ Compute cluster '$cluster_name' is now RUNNING."
39+
break
40+
elif [[ "$current_status" == "FAILED" ]]; then
41+
echo "❌ Compute cluster creation FAILED."
42+
exit 1
43+
fi
44+
sleep 30
45+
done
46+
47+
if [[ "$current_status" != "RUNNING" ]]; then
48+
echo "❌ Timeout Error: Compute cluster did not reach RUNNING state."
49+
exit 1
50+
fi
51+
fi
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
"environmentName": "$workshop_name-cdp-env",
3+
"computeClusterConfiguration": {
4+
"privateCluster": false,
5+
"kubeApiAuthorizedIpRanges": [
6+
"165.1.214.236/16",
7+
"134.238.0.0/16",
8+
"208.127.0.0/16"
9+
],
10+
"workerNodeSubnets": [
11+
"subnet-054399ed657492b02",
12+
"subnet-094a93473bca14029",
13+
"subnet-025cf012f047c7392"
14+
]
15+
}
16+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/bin/bash
2+
3+
# Sample JSON template file
4+
template_file="convert-v2-env.json"
5+
6+
# Example values
7+
env_public_subnets=$1
8+
workshop_name=$2
9+
local_ip=$3
10+
11+
# Replace values in the template
12+
jq --argjson new_subnets "$env_public_subnets" \
13+
--arg workshop_name "$workshop_name" \
14+
--arg local_ip "$local_ip" \
15+
'.computeClusterConfiguration.workerNodeSubnets = $new_subnets
16+
| .environmentName = ($workshop_name + "-cdp-env")
17+
| .computeClusterConfiguration.kubeApiAuthorizedIpRanges = [$local_ip]' \
18+
"$template_file" > updated-$template_file
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
{
2+
"appName": "akt-cai2-serving-app",
3+
"environmentCrn": "crn:cdp:environments:us-west-1:d1a4553c-a799-432d-8e54-372cc2ab95f2:environment:3c67096a-ca6c-4b27-9b87-3ed26fa71110",
4+
"clusterCrn": "crn:cdp:compute:us-west-1:d1a4553c-a799-432d-8e54-372cc2ab95f2:cluster:liftie-443pky5s",
5+
"provisionK8sRequest": {
6+
"instanceGroups": [
7+
{
8+
"instanceType": "m5.4xlarge",
9+
"instanceTier": "",
10+
"instanceCount": 1,
11+
"name": "",
12+
"ingressRules": [
13+
""
14+
],
15+
"rootVolume": {
16+
"size": 256
17+
},
18+
"autoscaling": {
19+
"minInstances": 0,
20+
"maxInstances": 2,
21+
"enabled": true
22+
}
23+
},
24+
{
25+
"instanceType": "g5.12xlarge",
26+
"instanceCount": 1,
27+
"rootVolume": {
28+
"size": 256
29+
},
30+
"autoscaling": {
31+
"minInstances": 0,
32+
"maxInstances": 1,
33+
"enabled": true
34+
}
35+
}
36+
],
37+
"environmentCrn": "crn:cdp:environments:us-west-1:d1a4553c-a799-432d-8e54-372cc2ab95f2:environment:3c67096a-ca6c-4b27-9b87-3ed26fa71110",
38+
"tags": [
39+
{
40+
"key": "owner",
41+
"value": "Automation"
42+
}
43+
],
44+
"network": {
45+
"plugin": "",
46+
"topology": {
47+
"subnets": [
48+
""
49+
]
50+
}
51+
}
52+
},
53+
"usePublicLoadBalancer": true,
54+
"skipValidation": true,
55+
"loadBalancerIPWhitelists": [
56+
""
57+
],
58+
"subnetsForLoadBalancers": [
59+
""
60+
],
61+
"staticSubdomain": "akt-cai2-serving"
62+
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#!/bin/bash
2+
3+
set -e
4+
5+
# Input arguments
6+
workshop_name=$1
7+
8+
# Input/output JSON files
9+
template_file="create-serving-app-input.json"
10+
output_file="updated-serving-app-input.json"
11+
12+
# Fetch envCrn and clusterCrn from CDP for a running cluster
13+
read env_crn cluster_crn < <(
14+
cdp compute list-clusters | jq -r --arg name "${workshop_name}-compute-cluster" '
15+
.clusters[]
16+
| select(.clusterName == $name and .status == "RUNNING")
17+
| [.envCrn, .clusterCrn]
18+
| @tsv'
19+
)
20+
21+
# Fallback check
22+
if [[ -z "$env_crn" || -z "$cluster_crn" ]]; then
23+
echo "❌ No running cluster found for '${workshop_name}-compute-cluster'. Exiting."
24+
exit 1
25+
fi
26+
27+
# Set other values
28+
app_name="${workshop_name}-serving-app"
29+
static_subdomain="${workshop_name}-serving-subdomain"
30+
31+
# Update the JSON using jq
32+
jq --arg app_name "$app_name" \
33+
--arg env_crn "$env_crn" \
34+
--arg cluster_crn "$cluster_crn" \
35+
--arg static_subdomain "$static_subdomain" '
36+
.appName = $app_name
37+
| .environmentCrn = $env_crn
38+
| .clusterCrn = $cluster_crn
39+
| .staticSubdomain = $static_subdomain
40+
' "$template_file" > "$output_file"
41+
42+
echo "✅ Updated $output_file with dynamic values."
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#!/bin/bash
2+
3+
set -e
4+
5+
workshop_name="$1"
6+
env_name="${workshop_name}-cdp-env"
7+
cluster_name="${workshop_name}-compute-cluster"
8+
9+
echo "🔁 Starting cleanup for environment: $env_name"
10+
11+
# --- Delete ML Model Registry ---
12+
delete_model_registry() {
13+
model_registry_crn=$(cdp ml list-model-registries | jq -r --arg env_name "$env_name" '
14+
.modelRegistries[]
15+
| select(.environmentName == $env_name and .status == "installation:finished")
16+
| .crn
17+
')
18+
if [[ -n "$model_registry_crn" ]]; then
19+
echo "🗑️ Deleting Model Registry: $model_registry_crn"
20+
cdp ml delete-model-registry --model-registry-crn "$model_registry_crn"
21+
else
22+
echo "✅ No Model Registry found"
23+
fi
24+
}
25+
26+
# --- Delete Compute Cluster ---
27+
delete_compute_cluster() {
28+
compute_cluster_crn=$(cdp compute list-clusters | jq -r --arg name "$cluster_name" '
29+
.clusters[] | select(.isDefault == false and .clusterName == $name) | .clusterCrn
30+
')
31+
if [[ -n "$compute_cluster_crn" ]]; then
32+
echo "🗑️ Deleting Compute Cluster: $compute_cluster_crn"
33+
cdp compute delete-cluster --cluster-crn "$compute_cluster_crn" --skip-validation
34+
else
35+
echo "✅ No Compute Cluster found"
36+
fi
37+
}
38+
39+
# Run deletion in parallel
40+
delete_model_registry &
41+
pid_model=$!
42+
43+
delete_compute_cluster &
44+
pid_compute=$!
45+
46+
# Wait for both
47+
wait $pid_model
48+
wait $pid_compute
49+
50+
# --- Final Verification Loop ---
51+
echo "🔍 Verifying deletion of all resources..."
52+
53+
while true; do
54+
still_exists=0
55+
56+
if cdp ml list-model-registries | jq -e --arg env_name "$env_name" '.modelRegistries[] | select(.environmentName == $env_name)' > /dev/null; then
57+
echo "⏳ Waiting: Model Registry still exists..."
58+
still_exists=1
59+
fi
60+
61+
if cdp compute list-clusters | jq -e --arg name "$cluster_name" '.clusters[] | select(.isDefault == false and .clusterName == $name)' > /dev/null; then
62+
echo "⏳ Waiting: Compute Cluster still exists..."
63+
still_exists=1
64+
fi
65+
66+
if [[ "$still_exists" -eq 0 ]]; then
67+
echo "✅ All resources successfully deleted."
68+
break
69+
fi
70+
71+
sleep 15
72+
done
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#!/bin/bash
2+
3+
set -e
4+
5+
# Input: workshop name
6+
workshop_name=$1
7+
env_name="${workshop_name}-cdp-env"
8+
9+
echo "🔧 Checking default compute cluster status for environment: $env_name"
10+
11+
default_cluster_status=$(cdp compute list-clusters | jq -r --arg env_name "$env_name" '
12+
.clusters[]
13+
| select(.isDefault == true and .envName == $env_name)
14+
| .status
15+
')
16+
17+
if [[ "$default_cluster_status" != "RUNNING" && "$default_cluster_status" != "CREATING" ]]; then
18+
echo "⚙️ Initializing default compute cluster..."
19+
cdp environments initialize-aws-compute-cluster --cli-input-json file://updated-convert-v2-env.json
20+
fi
21+
22+
echo "⏳ Waiting for default compute cluster to reach RUNNING state..."
23+
for i in {1..60}; do
24+
default_cluster_status=$(cdp compute list-clusters | jq -r --arg env_name "$env_name" '
25+
.clusters[]
26+
| select(.isDefault == true and .envName == $env_name)
27+
| .status
28+
')
29+
30+
echo " ➤ Attempt $i: Status = $default_cluster_status"
31+
32+
if [[ "$default_cluster_status" == "RUNNING" ]]; then
33+
echo "✅ Default compute cluster is now RUNNING."
34+
break
35+
elif [[ "$default_cluster_status" == "FAILED" ]]; then
36+
echo "❌ Default compute cluster initialization FAILED."
37+
exit 1
38+
fi
39+
sleep 30
40+
done
41+
42+
if [[ "$default_cluster_status" != "RUNNING" ]]; then
43+
echo "❌ Timeout Error: Default cluster did not reach RUNNING state after 30 minutes."
44+
exit 1
45+
fi

ClouderaSetup/OnCloud/AWS/build/entrypoint/docker-entrypoint.sh

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,17 +54,24 @@ provision)
5454
if [ "$provision_keycloak" == "yes" ]; then
5555
cdp_idp_setup_user
5656
fi
57+
if [ "$provision_caii" == "yes" ]; then
58+
sleep 30
59+
echo -e "\n =============================CAII Provisioning Started=============================="
60+
provision_cai_inference
61+
fi
5762
enable_data_services
5863
echo -e "\n ==============================Infrastructure Provisioned========================================="
59-
6064
;;
6165
destroy)
6266
validating_variables
63-
#setup_aws_and_cdp_profile
67+
if [ "$provision_caii" == "yes" ]; then
68+
echo -e "\n =============================CAII deletion Started=============================="
69+
destroy_cai_inference
70+
fi
71+
disable_data_services
6472
if [ "$provision_keycloak" == "yes" ]; then
6573
cdp_idp_user_teardown
6674
fi
67-
disable_data_services
6875
destroy_hol_infra
6976
;;
7077
*)

0 commit comments

Comments
 (0)