Terraform 0.12.6
AzureRM provider 1.35.0
azurerm_kubernetes_clusterresource "azurerm_kubernetes_cluster" "aks" {
name = "${var.aks_cluster_name}"
location = "${var.location}"
dns_prefix = "${var.aks_cluster_name}"
resource_group_name = "${var.rg_name}"
node_resource_group = "${var.nodes_rg_name}"
kubernetes_version = "1.14.6"
linux_profile {
admin_username = "aks_admin"
ssh_key {
key_data = "ssh-rsa XYZ"
}
}
agent_pool_profile {
name = "workers"
count = 1
max_count = 20
min_count = 1
max_pods = 50
vm_size = "Standard_B4ms"
os_type = "Linux"
os_disk_size_gb = 100
enable_auto_scaling = true
type = "VirtualMachineScaleSets"
vnet_subnet_id = "${data.azurerm_subnet.aks_sub.id}"
node_taints = ["sku=workers:PreferNoSchedule"]
}
agent_pool_profile {
name = "gpularge"
count = 1
max_count = 20
min_count = 1
max_pods = 50
vm_size = "Standard_NC6_Promo"
os_type = "Linux"
os_disk_size_gb = 30
enable_auto_scaling = true
type = "VirtualMachineScaleSets"
vnet_subnet_id = "${data.azurerm_subnet.aks_sub.id}"
node_taints = ["sku=gpularge:NoSchedule"]
}
agent_pool_profile {
name = "gpuxlarge"
count = 1
max_count = 10
min_count = 1
max_pods = 50
vm_size = "Standard_NC12_Promo"
os_type = "Linux"
os_disk_size_gb = 50
enable_auto_scaling = true
type = "VirtualMachineScaleSets"
vnet_subnet_id = "${data.azurerm_subnet.aks_sub.id}"
node_taints = ["sku=gpuxlarge:NoSchedule"]
}
agent_pool_profile {
name = "cpularge"
count = 1
max_count = 20
min_count = 1
max_pods = 50
vm_size = "Standard_F8s_v2"
os_type = "Linux"
os_disk_size_gb = 200
enable_auto_scaling = true
type = "VirtualMachineScaleSets"
vnet_subnet_id = "${data.azurerm_subnet.aks_sub.id}"
node_taints = ["sku=cpularge:NoSchedule"]
}
service_principal {
client_id = "${data.azuread_application.aks_sp.application_id}"
client_secret = "${data.azurerm_key_vault_secret.aks_sp_secret.value}"
}
network_profile {
network_plugin = "azure"
network_policy = "calico"
service_cidr = "10.0.0.0/16"
dns_service_ip = "10.0.0.2"
docker_bridge_cidr = "172.17.0.1/16"
}
role_based_access_control {
enabled = true
azure_active_directory {
server_app_id = "${var.aks_server_app_id}"
server_app_secret = "${data.azurerm_key_vault_secret.aks_app_secret.value}"
client_app_id = "${var.aks_client_app_id}"
tenant_id = "${data.azurerm_client_config.current.tenant_id}"
}
}
}
The first Terraform Apply is fine, the cluster is created, no issues whatsoever.
On a second Terraform run, without ANY code changes, Terraform wants to replace the whole cluster because it thinks some agent_pool_profile have changed, which is false. It seems like the ordering of the agent_pool_profile is messed up in the statefile or something.
# module.aks.azurerm_kubernetes_cluster.aks must be replaced
-/+ resource "azurerm_kubernetes_cluster" "aks" {
- api_server_authorized_ip_ranges = [] -> null
dns_prefix = "[MASKED]"
~ enable_pod_security_policy = false -> (known after apply)
~ fqdn = "[MASKED]" -> (known after apply)
~ id = "/subscriptions/[MASKED]/resourcegroups/[MASKED]/providers/Microsoft.ContainerService/managedClusters/[MASKED]" -> (known after apply)
~ kube_admin_config = [
- {
- client_certificate = "[MASKED]"
- client_key = "[MASKED]"
- host = "[MASKED]:443"
- password = "[MASKED]"
- username = "[MASKED]"
},
] -> (known after apply)
~ kube_admin_config_raw = (sensitive value)
~ kube_config = [
- {
- client_certificate = ""
- client_key = ""
- cluster_ca_certificate = "[MASKED]"
- host = "[MASKED]:443"
- password = ""
- username = "[MASKED]"
},
] -> (known after apply)
~ kube_config_raw = (sensitive value)
kubernetes_version = "1.14.6"
location = "northeurope"
name = "[MASKED]"
node_resource_group = "[MASKED]"
resource_group_name = "[MASKED]"
~ addon_profile {
+ aci_connector_linux {
+ enabled = (known after apply)
+ subnet_name = (known after apply)
}
+ http_application_routing {
+ enabled = (known after apply)
+ http_application_routing_zone_name = (known after apply)
}
+ kube_dashboard {
+ enabled = (known after apply)
}
+ oms_agent {
+ enabled = (known after apply)
+ log_analytics_workspace_id = (known after apply)
}
}
~ agent_pool_profile {
- availability_zones = [] -> null
count = 1
+ dns_prefix = (known after apply)
enable_auto_scaling = true
~ fqdn = "[MASKED]" -> (known after apply)
max_count = 20
max_pods = 50
min_count = 1
~ name = "cpularge" -> "workers" # forces replacement
~ node_taints = [
- "sku=cpularge:NoSchedule",
+ "sku=workers:PreferNoSchedule",
]
~ os_disk_size_gb = 200 -> 100 # forces replacement
os_type = "Linux"
type = "VirtualMachineScaleSets"
~ vm_size = "Standard_F8s_v2" -> "Standard_B4ms" # forces replacement
vnet_subnet_id = "/subscriptions/[MASKED]/resourceGroups/[MASKED]/providers/Microsoft.Network/virtualNetworks/[MASKED]/subnets/tf-aks-sub-northeu-01"
}
~ agent_pool_profile {
- availability_zones = [] -> null
count = 1
+ dns_prefix = (known after apply)
enable_auto_scaling = true
~ fqdn = "[MASKED]" -> (known after apply)
max_count = 20
max_pods = 50
min_count = 1
name = "gpularge"
node_taints = [
"sku=gpularge:NoSchedule",
]
os_disk_size_gb = 30
os_type = "Linux"
type = "VirtualMachineScaleSets"
vm_size = "Standard_NC6_Promo"
vnet_subnet_id = "/subscriptions/[MASKED]/resourceGroups/[MASKED]/providers/Microsoft.Network/virtualNetworks/[MASKED]/subnets/tf-aks-sub-northeu-01"
}
~ agent_pool_profile {
- availability_zones = [] -> null
count = 1
+ dns_prefix = (known after apply)
enable_auto_scaling = true
~ fqdn = "[MASKED]" -> (known after apply)
max_count = 10
max_pods = 50
min_count = 1
name = "gpuxlarge"
node_taints = [
"sku=gpuxlarge:NoSchedule",
]
os_disk_size_gb = 50
os_type = "Linux"
type = "VirtualMachineScaleSets"
vm_size = "Standard_NC12_Promo"
vnet_subnet_id = "/subscriptions/[MASKED]/resourceGroups/[MASKED]/providers/Microsoft.Network/virtualNetworks/[MASKED]/subnets/tf-aks-sub-northeu-01"
}
~ agent_pool_profile {
- availability_zones = [] -> null
count = 1
+ dns_prefix = (known after apply)
enable_auto_scaling = true
~ fqdn = "[MASKED]" -> (known after apply)
max_count = 20
max_pods = 50
min_count = 1
~ name = "workers" -> "cpularge" # forces replacement
~ node_taints = [
- "sku=workers:PreferNoSchedule",
+ "sku=cpularge:NoSchedule",
]
~ os_disk_size_gb = 100 -> 200 # forces replacement
os_type = "Linux"
type = "VirtualMachineScaleSets"
~ vm_size = "Standard_B4ms" -> "Standard_F8s_v2" # forces replacement
vnet_subnet_id = "/subscriptions/[MASKED]/resourceGroups/[MASKED]/providers/Microsoft.Network/virtualNetworks/[MASKED]/subnets/tf-aks-sub-northeu-01"
}
linux_profile {
admin_username = "aks_admin"
ssh_key {
key_data = "ssh-rsa XYZ"
}
}
~ network_profile {
dns_service_ip = "10.0.0.2"
docker_bridge_cidr = "172.17.0.1/16"
~ load_balancer_sku = "Basic" -> "basic"
network_plugin = "azure"
network_policy = "calico"
+ pod_cidr = (known after apply)
service_cidr = "10.0.0.0/16"
}
role_based_access_control {
enabled = true
azure_active_directory {
client_app_id = (sensitive value)
server_app_id = (sensitive value)
server_app_secret = (sensitive value)
tenant_id = "[MASKED]"
}
}
service_principal {
client_id = (sensitive value)
client_secret = (sensitive value)
}
}
Terraform should not detect any changes.
Terraform detects changes in the agent_pools and attempts to re-create the AKS cluster.
terraform applyI have a theory: The state file shows the pools in alphabetical order by name. So in my template that was experiencing the same issue, I swapped the two pools so they were alphabetical and the subsequent terraform plan shows the expected behaviour of no change instead of having to redeploy.
The example provided by mikkoc above aligns with this theory.
That was exactly the issue: I changed the order to alphabetical and the problem disappeared. Thanks @timio73 !!
No problem. I would still consider this a bug as the pools should index and not be dependent on order in the template.
Also encountered this, thanks for figuring out the problem!
It appears something similarly problematic occurs when you use a dynamic block:
dynamic "agent_pool_profile" {
for_each = var.agent_pools
content {
count = agent_pool_profile.value.count
name = agent_pool_profile.value.name
vm_size = agent_pool_profile.value.vm_size # az vm list-sizes --location centralus
os_type = agent_pool_profile.value.os_type
os_disk_size_gb = agent_pool_profile.value.os_disk_size_gb
}
}
If you add a new agent_pool_profile to the var.agent_pools variable, it forces deletion and creation of the cluster, even if it's in alphabetical order.
~ agent_pool_profile {
- availability_zones = [] -> null
count = 1
+ dns_prefix = (known after apply)
- enable_auto_scaling = false -> null
- max_count = 0 -> null
~ max_pods = 110 -> (known after apply)
- min_count = 0 -> null
name = "default"
- node_taints = [] -> null
os_disk_size_gb = 30
os_type = "Linux"
type = "AvailabilitySet"
vm_size = "Standard_B2s"
}
~ agent_pool_profile {
- availability_zones = [] -> null
count = 2
+ dns_prefix = (known after apply)
- enable_auto_scaling = false -> null
- max_count = 0 -> null
~ max_pods = 110 -> (known after apply)
- min_count = 0 -> null
name = "nodepool"
- node_taints = [] -> null
os_disk_size_gb = 50
os_type = "Linux"
type = "AvailabilitySet"
vm_size = "Standard_F2"
}
+ agent_pool_profile {
+ count = 2
+ dns_prefix = (known after apply)
+ fqdn = (known after apply)
+ max_pods = (known after apply)
+ name = "nodepool2" # forces replacement
+ os_disk_size_gb = 50 # forces replacement
+ os_type = "Linux" # forces replacement
+ type = "AvailabilitySet" # forces replacement
+ vm_size = "Standard_F2" # forces replacement
}
EDIT: I now realize my specific issue was already reported in #3971
This has been released in version 1.37.0 of the provider. Please see the Terraform documentation on provider versioning or reach out if you need any assistance upgrading. As an example:
provider "azurerm" {
version = "~> 1.37.0"
}
# ... other configuration ...
I'm going to lock this issue because it has been closed for _30 days_ ⏳. This helps our maintainers find and focus on the active issues.
If you feel this issue should be reopened, we encourage creating a new issue linking back to this one for added context. If you feel I made an error 🤖 🙉 , please reach out to my human friends 👉 [email protected]. Thanks!
Most helpful comment
No problem. I would still consider this a bug as the pools should index and not be dependent on order in the template.