Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions ibm/terraform/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
module "resource-group" {
source = "terraform-ibm-modules/resource-group/ibm"
version = "1.1.5"

resource_group_name = "qdrant-example-rg"
}

module "cluster" {
source = "./modules/cluster"

# source = "terraform-ibm-modules/base-ocp-vpc/ibm"
# version = "3.18.3"

ibmcloud_api_key = var.ibmcloud_api_key
cluster_name = var.cluster_name
resource_group_id = module.resource-group.resource_group_id
region = var.region
force_delete_storage = true
vpc_id = module.vpc.vpc_id
vpc_subnets = module.vpc.subnet_detail_map
worker_pools = var.worker_pools
}

6 changes: 6 additions & 0 deletions ibm/terraform/modules/cluster/kconfig/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Ignore everything
*

# But not these files...
!.gitignore
!README.md
533 changes: 533 additions & 0 deletions ibm/terraform/modules/cluster/main.tf

Large diffs are not rendered by default.

86 changes: 86 additions & 0 deletions ibm/terraform/modules/cluster/outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
##############################################################################
# Outputs
##############################################################################

output "cluster_id" {
description = "ID of cluster created"
value = var.ignore_worker_pool_size_changes ? ibm_container_vpc_cluster.autoscaling_cluster[0].id : ibm_container_vpc_cluster.cluster[0].id
depends_on = [null_resource.confirm_network_healthy]
}

output "cluster_name" {
description = "Name of the created cluster"
value = var.ignore_worker_pool_size_changes ? ibm_container_vpc_cluster.autoscaling_cluster[0].name : ibm_container_vpc_cluster.cluster[0].name
depends_on = [null_resource.confirm_network_healthy]
}

output "cluster_crn" {
description = "CRN for the created cluster"
value = var.ignore_worker_pool_size_changes ? ibm_container_vpc_cluster.autoscaling_cluster[0].crn : ibm_container_vpc_cluster.cluster[0].crn
depends_on = [null_resource.confirm_network_healthy]
}

output "workerpools" {
description = "Worker pools created"
value = var.ignore_worker_pool_size_changes ? ibm_container_vpc_worker_pool.autoscaling_pool : ibm_container_vpc_worker_pool.pool
}

output "ocp_version" {
description = "Openshift Version of the cluster"
value = var.ignore_worker_pool_size_changes ? ibm_container_vpc_cluster.autoscaling_cluster[0].kube_version : ibm_container_vpc_cluster.cluster[0].kube_version
}

output "cos_crn" {
description = "CRN of the COS instance"
value = var.ignore_worker_pool_size_changes ? ibm_container_vpc_cluster.autoscaling_cluster[0].cos_instance_crn : ibm_container_vpc_cluster.cluster[0].cos_instance_crn
}

output "vpc_id" {
description = "ID of the clusters VPC"
value = var.ignore_worker_pool_size_changes ? ibm_container_vpc_cluster.autoscaling_cluster[0].vpc_id : ibm_container_vpc_cluster.cluster[0].vpc_id
}

output "region" {
description = "Region cluster is deployed in"
value = var.region
}

output "resource_group_id" {
description = "Resource group ID the cluster is deployed in"
value = var.ignore_worker_pool_size_changes ? ibm_container_vpc_cluster.autoscaling_cluster[0].resource_group_id : ibm_container_vpc_cluster.cluster[0].resource_group_id
}

output "ingress_hostname" {
description = "Ingress hostname"
value = var.ignore_worker_pool_size_changes ? ibm_container_vpc_cluster.autoscaling_cluster[0].ingress_hostname : ibm_container_vpc_cluster.cluster[0].ingress_hostname
}

output "private_service_endpoint_url" {
description = "Private service endpoint URL"
value = var.ignore_worker_pool_size_changes ? ibm_container_vpc_cluster.autoscaling_cluster[0].private_service_endpoint_url : ibm_container_vpc_cluster.cluster[0].private_service_endpoint_url
}

output "public_service_endpoint_url" {
description = "Public service endpoint URL"
value = var.ignore_worker_pool_size_changes ? ibm_container_vpc_cluster.autoscaling_cluster[0].public_service_endpoint_url : ibm_container_vpc_cluster.cluster[0].public_service_endpoint_url
}

output "master_url" {
description = "The URL of the Kubernetes master."
value = var.ignore_worker_pool_size_changes ? ibm_container_vpc_cluster.autoscaling_cluster[0].master_url : ibm_container_vpc_cluster.cluster[0].master_url
}

output "kms_config" {
description = "KMS configuration details"
value = var.kms_config
}

output "operating_system" {
description = "The operating system of the workers in the default worker pool."
value = var.ignore_worker_pool_size_changes ? ibm_container_vpc_cluster.autoscaling_cluster[0].operating_system : ibm_container_vpc_cluster.cluster[0].operating_system
}

output "master_status" {
description = "The status of the Kubernetes master."
value = var.ignore_worker_pool_size_changes ? ibm_container_vpc_cluster.autoscaling_cluster[0].master_status : ibm_container_vpc_cluster.cluster[0].master_status
}
96 changes: 96 additions & 0 deletions ibm/terraform/modules/cluster/scripts/confirm_network_healthy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/bin/bash

set -e

function run_checks() {

last_attempt=$1
namespace=calico-system

MAX_ATTEMPTS=10
attempt=0
PODS=()
while [ $attempt -lt $MAX_ATTEMPTS ]; do
# Get list of calico-node pods (There will be 1 pod per worker node)
if while IFS='' read -r line; do PODS+=("$line"); done < <(kubectl get pods -n "${namespace}" | grep calico-node | cut -f1 -d ' '); then
if [ ${#PODS[@]} -eq 0 ]; then
echo "No calico-node pods found. Retrying in 10s. (Attempt $((attempt+1)) / $MAX_ATTEMPTS)"
sleep 10
((attempt=attempt+1))
else
# Pods found, break out of loop
break
fi
else
echo "Error getting calico-node pods. Retrying in 10s. (Attempt $((attempt+1)) / $MAX_ATTEMPTS)"
sleep 10
((attempt=attempt+1))
fi
done

if [ ${#PODS[@]} -eq 0 ]; then
echo "No calico-node pods found after $MAX_ATTEMPTS attempts. Exiting."
exit 1
fi

# Iterate through pods to check health
healthy=true
for pod in "${PODS[@]}"; do
command="kubectl logs ${pod} -n ${namespace} --tail=0"
# If it is the last attempt then print the output
if [ "${last_attempt}" == true ]; then
node=$(kubectl get pod "$pod" -n "${namespace}" -o=jsonpath='{.spec.nodeName}')
echo "Checking node: $node"
if ! ${command}; then
healthy=false
else
echo "OK"
fi
# Otherwise redirect output to /dev/null
else
if ! ${command} &> /dev/null; then
healthy=false
fi
fi
done

if [ "$healthy" == "false" ]; then
return 1
else
return 0
fi

}

counter=0
number_retries=40
retry_wait_time=60

echo "Running script to ensure kube master can communicate with all worker nodes.."

while [ ${counter} -le ${number_retries} ]; do

# Determine if it is last attempt
last_attempt=false
if [ "${counter}" -eq ${number_retries} ]; then
last_attempt=true
fi

((counter=counter+1))
if ! run_checks ${last_attempt}; then
if [ "${counter}" -gt ${number_retries} ]; then
echo "Maximum attempts reached, giving up."
echo
echo "Found kube master is unable to communicate with one or more of its workers."
echo "Please create a support issue with IBM Cloud and include the error message."
exit 1
else
echo "Retrying in ${retry_wait_time}s. (Retry attempt ${counter} / ${number_retries})"
sleep ${retry_wait_time}
fi
else
break
fi
done

echo "Success! Master can communicate with all worker nodes."
23 changes: 23 additions & 0 deletions ibm/terraform/modules/cluster/scripts/get_config_map_status.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/bash

set -e

CONFIGMAP_NAME="iks-ca-configmap"
NAMESPACE="kube-system"
COUNTER=0
MAX_ATTEMPTS=40

while [[ $COUNTER -lt $MAX_ATTEMPTS ]] && ! kubectl get configmap $CONFIGMAP_NAME -n $NAMESPACE &>/dev/null; do
COUNTER=$((COUNTER + 1))
echo "Attempt $COUNTER: ConfigMap '$CONFIGMAP_NAME' not found in namespace '$NAMESPACE', retrying..."
sleep 60
done

if [[ $COUNTER -eq $MAX_ATTEMPTS ]]; then
echo "ConfigMap '$CONFIGMAP_NAME' did not become available within $MAX_ATTEMPTS attempts."
# Output for debugging
kubectl get configmaps -n $NAMESPACE
exit 1
else
echo "ConfigMap '$CONFIGMAP_NAME' is now available." >&2
fi
69 changes: 69 additions & 0 deletions ibm/terraform/modules/cluster/scripts/reset_iks_api_key.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/bin/bash

set -euo pipefail

REGION="$1"
RESOURCE_GROUP_ID="$2"
APIKEY_KEY_NAME="containers-kubernetes-key"

# Expects the environment variable $IBMCLOUD_API_KEY to be set
if [[ -z "${IBMCLOUD_API_KEY}" ]]; then
echo "API key must be set with IBMCLOUD_API_KEY environment variable" >&2
exit 1
fi

if [[ -z "${REGION}" ]]; then
echo "Region must be passed as first input script argument" >&2
exit 1
fi

if [[ -z "${RESOURCE_GROUP_ID}" ]]; then
echo "Resource_group_id must be passed as second input script argument" >&2
exit 1
fi

# Login to ibmcloud with cli
attempts=1
until ibmcloud login -q -r "${REGION}" -g "${RESOURCE_GROUP_ID}" || [ $attempts -ge 3 ]; do
attempts=$((attempts+1))
echo "Error logging in to IBM Cloud CLI..." >&2
sleep 5
done

# run api-key reset command if apikey for given region + resource group does not already exist
reset=true
#key_descriptions=()
#while IFS='' read -r line; do key_descriptions+=("$line"); done < <(ibmcloud iam api-keys --all --output json | jq -r --arg name "${APIKEY_KEY_NAME}" '.[] | select(.name == $name) | .description')
#for i in "${key_descriptions[@]}"; do
# if [[ "$i" =~ ${REGION} ]] && [[ "$i" =~ ${RESOURCE_GROUP_ID} ]]; then
# echo "Found key named ${APIKEY_KEY_NAME} which covers clusters in ${REGION} and resource group ID ${RESOURCE_GROUP_ID}"
# reset=false
# break
# fi
#done
key_descriptions=()
ibmcloud iam api-keys --all --output json | jq -r --arg name "${APIKEY_KEY_NAME}" '.[] | select(.name == $name) | .description' | while IFS='' read -r line; do
echo "Line read: $line" # Debug output
key_descriptions+=("$line")
done

# Check if the array is empty
if [ ${#key_descriptions[@]} -eq 0 ]; then
echo "No key descriptions found."
else
# Iterate over key descriptions
for i in "${key_descriptions[@]}"; do
if [[ "$i" =~ ${REGION} ]] && [[ "$i" =~ ${RESOURCE_GROUP_ID} ]]; then
echo "Found key named ${APIKEY_KEY_NAME} which covers clusters in ${REGION} and resource group ID ${RESOURCE_GROUP_ID}"
reset=false
break
fi
done
fi

if [ "${reset}" == true ]; then
cmd="ibmcloud ks api-key reset --region ${REGION}"
yes | "${cmd}" || echo "Error executing command: ${cmd} && exit $?"
# sleep for 10 secs to allow the new key to be replicated across backend DB instances before attempting to create cluster
sleep 10
fi
Loading