0.11.8 and terraform azurerm 1.34.0
azurerm_kubernetes_clusterresource "azurerm_kubernetes_cluster" "azure-aks" {
name = "${var.prefix}aks-${var.app_name}-${local.azure_location_short}"
location = "${data.azurerm_resource_group.azure-k8s-resource_group.location}"
resource_group_name = "${data.azurerm_resource_group.azure-k8s-resource_group.name}"
kubernetes_version = "${var.k8s_version}"
dns_prefix = "${var.prefix}${var.app_name}"
role_based_access_control {
enabled = "true"
azure_active_directory {
client_app_id = "${var.k8s_aad_client_app_id}"
server_app_id = "${var.k8s_aad_server_app_id}"
server_app_secret = "${var.k8s_aad_server_app_secret}"
}
}
# @TODO replace the terraform workspace because it wil contain a -
agent_pool_profile {
name = "${replace("${var.prefix}","-","")}pool${local.app_name_short}"
count = "${var.k8s_node_count}"
vm_size = "${var.k8s_node_vm_size}"
os_type = "Linux"
os_disk_size_gb = "${var.k8s_node_disk_size}"
vnet_subnet_id = "${azurerm_subnet.azure-cluster_subnet.id}"
}
linux_profile {
admin_username = "azureuser"
"ssh_key" {
key_data = "${tls_private_key.azure-aks-admin_key.public_key_openssh}"
}
}
network_profile {
network_plugin = "azure"
dns_service_ip = "${cidrhost(local.service_subnet_cidr, 2)}"
docker_bridge_cidr = "172.17.0.1/16"
service_cidr = "${local.service_subnet_cidr}"
load_balancer_sku = "basic"
}
service_principal {
client_id = "${var.k8s_sp_client_id}"
client_secret = "${var.k8s_sp_secret}"
}
tags {
costcenter = "cai"
env = "${terraform.workspace}"
team = "cai"
}
lifecycle {
prevent_destroy = true
ignore_changes = ["network_profile"]
}
}
No changes to the aks cluster
Wants to destroy the aks cluster and rebuild.
service_principal.0.client_secret:
The Service Principle secret and key have never changed in our CI pipelines and after reverting back to 1.33.1 it works.
-/+ module.kubernetes.module.azure-kubernetes.azurerm_kubernetes_cluster.azure-aks (new resource required)
id: "/subscriptions/eb5118ad-645a-41fb-b078-330fe07d1024/resourcegroups/prod-control_plane-freemium-we-cai/providers/Microsoft.ContainerService/managedClusters/prod-aks-controlplane-we" => <computed> (forces new resource)
addon_profile.#: "1" => <computed>
agent_pool_profile.#: "1" => "1"
agent_pool_profile.0.count: "3" => "3"
agent_pool_profile.0.dns_prefix: "" => <computed>
agent_pool_profile.0.fqdn: "prod-controlplane-c629bd06.hcp.westeurope.azmk8s.io" => <computed>
agent_pool_profile.0.max_pods: "30" => <computed>
agent_pool_profile.0.name: "prodpoolcp" => "prodpoolcp"
agent_pool_profile.0.os_disk_size_gb: "30" => "30"
agent_pool_profile.0.os_type: "Linux" => "Linux"
agent_pool_profile.0.type: "AvailabilitySet" => "AvailabilitySet"
agent_pool_profile.0.vm_size: "Standard_F8s_v2" => "Standard_F8s_v2"
agent_pool_profile.0.vnet_subnet_id: "/subscriptions/eb5118ad-645a-41fb-b078-330fe07d1024/resourceGroups/prod-control_plane-freemium-we-cai/providers/Microsoft.Network/virtualNetworks/prod-vnet-controlplane-freemium-we-cai/subnets/prod-cluster_subnet-freemium-we-cai" => "/subscriptions/eb5118ad-645a-41fb-b078-330fe07d1024/resourceGroups/prod-control_plane-freemium-we-cai/providers/Microsoft.Network/virtualNetworks/prod-vnet-controlplane-freemium-we-cai/subnets/prod-cluster_subnet-freemium-we-cai"
dns_prefix: "prod-controlplane" => "prod-controlplane"
enable_pod_security_policy: "false" => <computed>
fqdn: "prod-controlplane-c629bd06.hcp.westeurope.azmk8s.io" => <computed>
kube_admin_config.#: "1" => <computed>
kube_admin_config_raw: <sensitive> => <computed> (attribute changed)
kube_config.#: "1" => <computed>
kube_config_raw: <sensitive> => <computed> (attribute changed)
kubernetes_version: "1.13.5" => "1.13.5"
linux_profile.#: "1" => "1"
linux_profile.0.admin_username: "azureuser" => "azureuser"
linux_profile.0.ssh_key.#: "1" => "1"
linux_profile.0.ssh_key.0.key_data: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDYzWSrKadRvUCirjT/D/ZPaSwOJ7Z0sql1DFl4gRjV5Rxy2OBZK2gL8C0xv2+YrZd1E1ejdjInjj4mgDN822jVbvoA3h7Ev9dfW1nVhPyAvRDQ94GUdRN5e3jP1KQ6H8GMlQg0t9iJbg5z8bG9VyMmRFTfpKER0+INsbJNj71j2qnnaoEPu/As0s+EGtnh3AserRRQGhjYH9ju5jQfNsylz1BNvajmyI11l/dJRZ2410iwCA6X61fZQ05PF3iN53TEbTZxabl5zUHQCZrvsBovTJTS1xPTBI/6CCyowbubcb9RDbSo0qC9jtdF3fKCjptgdH94/Zqkyq2MXadrhC4zIu9/gd/uk8b5n1NjQXEkyNZ8VQl01d7mmp1eOmEgApczm/bK/xGPi6F677lKAzIUHdlUdFpciSdPDAiHFg5TpKY9RFwitbCs2SMZZgWvXLXPS5YF4WpPOBUuHy6/7yI+59ZZD2iJdpr9K5P/4Q3qNVYGSgGQiBicgEcRrGPV3NyugEM62YRk7ToZN9UW+qD976e+qc7bw41hM/Z6P97IWt2M0F/T91kmNl6CGyND4RrDz7CKtcp1OWuvIvV4/HBQtnyXRkzMBN6yFGoUQo/DjP8cQuCcMAxJbXZzFhmhbmazdkvx8+BxPEE6356/ga/ofE3aNrMmIpaqIjZggTMoNQ==\n" => "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDYzWSrKadRvUCirjT/D/ZPaSwOJ7Z0sql1DFl4gRjV5Rxy2OBZK2gL8C0xv2+YrZd1E1ejdjInjj4mgDN822jVbvoA3h7Ev9dfW1nVhPyAvRDQ94GUdRN5e3jP1KQ6H8GMlQg0t9iJbg5z8bG9VyMmRFTfpKER0+INsbJNj71j2qnnaoEPu/As0s+EGtnh3AserRRQGhjYH9ju5jQfNsylz1BNvajmyI11l/dJRZ2410iwCA6X61fZQ05PF3iN53TEbTZxabl5zUHQCZrvsBovTJTS1xPTBI/6CCyowbubcb9RDbSo0qC9jtdF3fKCjptgdH94/Zqkyq2MXadrhC4zIu9/gd/uk8b5n1NjQXEkyNZ8VQl01d7mmp1eOmEgApczm/bK/xGPi6F677lKAzIUHdlUdFpciSdPDAiHFg5TpKY9RFwitbCs2SMZZgWvXLXPS5YF4WpPOBUuHy6/7yI+59ZZD2iJdpr9K5P/4Q3qNVYGSgGQiBicgEcRrGPV3NyugEM62YRk7ToZN9UW+qD976e+qc7bw41hM/Z6P97IWt2M0F/T91kmNl6CGyND4RrDz7CKtcp1OWuvIvV4/HBQtnyXRkzMBN6yFGoUQo/DjP8cQuCcMAxJbXZzFhmhbmazdkvx8+BxPEE6356/ga/ofE3aNrMmIpaqIjZggTMoNQ==\n"
location: "westeurope" => "westeurope"
name: "prod-aks-controlplane-we" => "prod-aks-controlplane-we"
network_profile.#: "1" => "1"
network_profile.0.dns_service_ip: "10.1.0.2" => "10.1.0.2"
network_profile.0.docker_bridge_cidr: "172.17.0.1/16" => "172.17.0.1/16"
network_profile.0.load_balancer_sku: "basic" => "basic"
network_profile.0.network_plugin: "azure" => "azure"
network_profile.0.network_policy: "" => <computed>
network_profile.0.pod_cidr: "" => <computed>
network_profile.0.service_cidr: "10.1.0.0/22" => "10.1.0.0/22"
node_resource_group: "MC_prod-control_plane-freemium-we-cai_prod-aks-controlplane-we_westeurope" => <computed>
resource_group_name: "prod-control_plane-freemium-we-cai" => "prod-control_plane-freemium-we-cai"
role_based_access_control.#: "1" => "1"
role_based_access_control.0.azure_active_directory.#: "1" => "1"
role_based_access_control.0.azure_active_directory.0.client_app_id: "4595190c-8a36-477d-b986-04c9559fb31e" => "4595190c-8a36-477d-b986-04c9559fb31e"
role_based_access_control.0.azure_active_directory.0.server_app_id: "5f3d7b74-6abd-4ea2-9ab3-8c0c0522f8af" => "5f3d7b74-6abd-4ea2-9ab3-8c0c0522f8af"
role_based_access_control.0.azure_active_directory.0.server_app_secret: <sensitive> => <sensitive> (attribute changed)
role_based_access_control.0.azure_active_directory.0.tenant_id: "3b8591f6-257a-41c8-84f5-2ba08008751f" => <computed>
role_based_access_control.0.enabled: "true" => "true"
service_principal.#: "1" => "1"
service_principal.0.client_id: "9295cdfa-ae76-41e8-9e2f-99ed14323dbe" => "9295cdfa-ae76-41e8-9e2f-99ed14323dbe"
service_principal.0.client_secret: <sensitive> => <sensitive> (forces new resource)
tags.%: "3" => "3"
tags.costcenter: "cai" => "cai"
tags.env: "prod" => "prod"
tags.team: "cai" => "cai"
terraform apply on any old cluster with the new versionI recognized that you are using a terraform version more than one year old (from 16.08.2018) Can you try a current version and report back if the error still persist?
The same problem occurs to me when trying to upgrade the provider version to 1.34.0. I am using Terraform 0.12.9.
From the output of terraform plan I am not able to trace which property is forcing the AKS resource to recreate.
@dominik-lekse do you use a SP as well? Or do you use it without. I'm really curious why that would recreate it, while it didn't change. Maybe it's a wrongly marked as so.
@MichaelMeelisRTL I am using the explicit service principal. Could this issue be related to https://github.com/terraform-providers/terraform-provider-azurerm/pull/4339?
Yes it looks like it... Exactly the problem I faced and could see.
Can confirm this problem applies to 0.12.9 and 0.12.6 as well. Does also not seem to matter what kind of provider version that last touched your state (tested with both 1.29 and 1.33.1).
For what it's worth, I'm seeing the same issue. With Terraform 0.12.8 and AzureRM provider 1.33.1, no changes to code = no changes to AKS cluster.
As soon as I upgrade the AzureRM provider to 1.34, it wants to recreate the cluster.
Terraform code:
resource "azurerm_kubernetes_cluster" "primary" {
name = "REDACTED"
resource_group_name = azurerm_resource_group.services.name
location = var.primary_cluster_location
dns_prefix = "REDACTED"
agent_pool_profile {
name = "default"
count = "2"
vm_size = "Standard_A2_v2"
os_type = "Linux"
os_disk_size_gb = 30
}
linux_profile {
admin_username = "REDACTED"
ssh_key {
key_data = var.cluster_admin_ssh_key
}
}
service_principal {
client_id = var.cluster_application
client_secret = var.cluster_service_principal_password
}
}
Oddly, like @dominik-lekse, there's nothing listed in Terraform's output to hint as to which field triggered the requirement to recreate:
# module.services.azurerm_kubernetes_cluster.primary must be replaced
-/+ resource "azurerm_kubernetes_cluster" "primary" {
- api_server_authorized_ip_ranges = [] -> null
dns_prefix = "REDACTED"
~ enable_pod_security_policy = false -> (known after apply)
~ fqdn = "REDACTED" -> (known after apply)
~ id = "/subscriptions/REDACTED/resourcegroups/REDACTED/providers/Microsoft.ContainerService/managedClusters/REDACTED" -> (known after apply)
~ kube_admin_config = [] -> (known after apply)
+ kube_admin_config_raw = (sensitive value)
~ kube_config = [
- {
- client_certificate = "REDACTED"
- client_key = "REDACTED"
- cluster_ca_certificate = "REDACTED"
- host = "https://REDACTED.azmk8s.io:443"
- password = "REDACTED"
- username = "clusterUser_REDACTED"
},
] -> (known after apply)
~ kube_config_raw = (sensitive value)
~ kubernetes_version = "1.11.2" -> (known after apply)
location = "southeastasia"
name = "REDACTED"
~ node_resource_group = "MC_REDACTED" -> (known after apply)
resource_group_name = "REDACTED"
~ tags = {} -> (known after apply)
~ addon_profile {
+ aci_connector_linux {
+ enabled = (known after apply)
+ subnet_name = (known after apply)
}
+ http_application_routing {
+ enabled = (known after apply)
+ http_application_routing_zone_name = (known after apply)
}
+ kube_dashboard {
+ enabled = (known after apply)
}
~ oms_agent {
~ enabled = false -> (known after apply)
+ log_analytics_workspace_id = (known after apply)
}
}
~ agent_pool_profile {
- availability_zones = [] -> null
count = 2
+ dns_prefix = (known after apply)
- enable_auto_scaling = false -> null
~ fqdn = "REDACTED.azmk8s.io" -> (known after apply)
- max_count = 0 -> null
~ max_pods = 110 -> (known after apply)
- min_count = 0 -> null
name = "default"
- node_taints = [] -> null
os_disk_size_gb = 30
os_type = "Linux"
type = "AvailabilitySet"
vm_size = "Standard_A2_v2"
}
linux_profile {
admin_username = "REDACTED"
ssh_key {
key_data = "ssh-rsa REDACTED"
}
}
~ network_profile {
~ dns_service_ip = "10.0.0.10" -> (known after apply)
~ docker_bridge_cidr = "172.17.0.1/16" -> (known after apply)
~ load_balancer_sku = "Basic" -> (known after apply)
~ network_plugin = "kubenet" -> (known after apply)
+ network_policy = (known after apply)
~ pod_cidr = "10.244.0.0/16" -> (known after apply)
~ service_cidr = "10.0.0.0/16" -> (known after apply)
}
~ role_based_access_control {
~ enabled = false -> (known after apply)
+ azure_active_directory {
+ client_app_id = (known after apply)
+ server_app_id = (known after apply)
+ server_app_secret = (sensitive value)
+ tenant_id = (known after apply)
}
}
~ service_principal {
client_id = "REDACTED"
+ client_secret = (sensitive value)
}
}
We experience the same problem running Terraform v0.11.13 and azurerm v1.30.1. It wants to replace the existing cluster with a new one due to
service_principal.0.client_secret: <sensitive> => <sensitive> (forces new resource)
Interestingly enough we see the problem when running terraform from our gitlab build pipe (image: hashicorp/terraform:0.11.13), but everything works fine when running it from my machine (os: Pop!_OS 18.04 LTS)
After this bug showed up we have set up a new cluster and it worked just fine running the script several times from our gitlab build pipe, but all of a sudden this problem showed up again.
We can reproduce the differing behaviors from gitlab and from local machine repeatedly.
We just upgraded to terraform 0.12.7 (from 0.11.13) and azurerm 1.34.0, other than that we changed nothing.
Now it forces aks recreation.
# module.aks.azurerm_kubernetes_cluster.k8s must be replaced
-/+ resource "azurerm_kubernetes_cluster" "k8s" {
- api_server_authorized_ip_ranges = [] -> null
dns_prefix = "[REDACTED]"
~ enable_pod_security_policy = false -> (known after apply)
~ fqdn = "[REDACTED]" -> (known after apply)
~ id = "[REDACTED]" -> (known after apply)
~ kube_admin_config = [] -> (known after apply)
+ kube_admin_config_raw = (sensitive value)
~ kube_config = [
- {
- client_certificate = "[REDACTED]"
- client_key = "[REDACTED]"
- cluster_ca_certificate = "[REDACTED]"
- host = "[REDACTED]"
- password = "[REDACTED]"
- username = "[REDACTED]"
},
] -> (known after apply)
~ kube_config_raw = (sensitive value)
kubernetes_version = "1.12.8"
location = "westus2"
name = "[REDACTED]"
~ node_resource_group = "[REDACTED]" -> (known after apply)
resource_group_name = "[REDACTED]"
tags = {
"env" = "ops"
}
~ addon_profile {
+ http_application_routing { # forces replacement
+ enabled = false # forces replacement
+ http_application_routing_zone_name = (known after apply)
}
~ oms_agent {
enabled = true
+ log_analytics_workspace_id = "[REDACTED]"
}
}
~ agent_pool_profile {
- availability_zones = [] -> null
count = 6
+ dns_prefix = (known after apply)
- enable_auto_scaling = false -> null
~ fqdn = "[REDACTED]" -> (known after apply)
- max_count = 0 -> null
~ max_pods = 30 -> (known after apply)
- min_count = 0 -> null
name = "minion"
- node_taints = [] -> null
os_disk_size_gb = 200
os_type = "Linux"
type = "AvailabilitySet"
vm_size = "Standard_DS2_v2"
vnet_subnet_id = "[REDACTED]"
}
linux_profile {
admin_username = "ubuntu"
ssh_key {
key_data = "[REDACTED]"
}
}
~ network_profile {
~ dns_service_ip = "10.0.0.10" -> (known after apply)
~ docker_bridge_cidr = "172.17.0.1/16" -> (known after apply)
~ load_balancer_sku = "Basic" -> "basic"
network_plugin = "azure"
+ network_policy = (known after apply)
+ pod_cidr = (known after apply)
~ service_cidr = "10.0.0.0/16" -> (known after apply)
}
role_based_access_control {
enabled = true
}
~ service_principal {
client_id = "[REDACTED]"
+ client_secret = (sensitive value)
}
}
EDIT: Maybe i'm wrong but it looks like, the following parameters are causing this behavior, tho enabled = false has always been the same.
+ http_application_routing { # forces replacement
+ enabled = false # forces replacement
+ http_application_routing_zone_name = (known after apply)
}
@tombuildsstuff i think this fix should be deployed for milestone 1.34
@ajcoded unfortunately 1.34 has already shipped, which is why this is assigned to the next available milestone
One more example on our side.
We have a terraform state created with azurerm provider version 1.33.*. when we run terraform apply without no changes to template, but with provider 1.34 - it tries to re-create AKS cluster.
So, looks like running apply with provider 1.34 is not safe when you already have AKS cluster created with a previous version.
๐
I've been taking a look into this issue and have opened #4469 which includes a couple of fixes for this:
As such this will no longer force recreation of the Kubernetes Cluster - first by retrieving the existing Client Secret from the configuration during a Read to ensure that this is set; and secondary (should a diff show) by allowing the Service Principal to be rotated.
With regards to upgrading - I've confirmed the following 3 upgrade paths work successfully:
The fix for this is in #4469 which has been merged and will ship in v1.35.0 of the Azure Provider which we're planning to release in the next couple of days - but since the fix for this has been merged I'm going to close this issue for the moment.
Thanks!
This has been released in version 1.35.0 of the provider. Please see the Terraform documentation on provider versioning or reach out if you need any assistance upgrading. As an example:
provider "azurerm" {
version = "~> 1.35.0"
}
# ... other configuration ...
Looks solved, when I plan terraform, it now only changes the following:
~ service_principal {
client_id = "xxx-xxx-xxx"
+ client_secret = (sensitive value)
}
}
Can I safely run apply?
Thanks
@FrankGNL since that fields obscured, if you check your statefile and client_secret shows is an empty string what Terraform's doing is calling the API's UpdateServicePrincipal method to update the Client ID and Client Secret to the same value, which should be a noop ( in testing that updated the statefile to contain the Client Secret, which removes the diff ๐) - so in short, it should be safe to apply that ๐
I am still facing this issue - AKS forces to destroy when running terraform plan - Given that no changes to the cluster have been made.
I am using terraform version 0.12.18 and azurerm 1.39.0
I'm going to lock this issue because it has been closed for _30 days_ โณ. This helps our maintainers find and focus on the active issues.
If you feel this issue should be reopened, we encourage creating a new issue linking back to this one for added context. If you feel I made an error ๐ค ๐ , please reach out to my human friends ๐ [email protected]. Thanks!
Most helpful comment
๐
I've been taking a look into this issue and have opened #4469 which includes a couple of fixes for this:
As such this will no longer force recreation of the Kubernetes Cluster - first by retrieving the existing Client Secret from the configuration during a Read to ensure that this is set; and secondary (should a diff show) by allowing the Service Principal to be rotated.
With regards to upgrading - I've confirmed the following 3 upgrade paths work successfully:
The fix for this is in #4469 which has been merged and will ship in v1.35.0 of the Azure Provider which we're planning to release in the next couple of days - but since the fix for this has been merged I'm going to close this issue for the moment.
Thanks!