Connect control plane reference AWS deployment#16
Conversation
There was a problem hiding this comment.
Code Review
This pull request provisions the AWS infrastructure and Kubernetes resources for the Cofide Connect control plane using Terragrunt, Terraform, and Helm. Critical feedback highlights several issues that will cause deployment failures, including circular dependencies on EKS addons, invalid and missing arguments in the NAT Gateway resource, and a hardcoded local developer path in the Helm install script. Additionally, multiple modules contain unsafe resource indexing (using [0] when count is 0) that will trigger Terraform planning errors, which should be resolved using one() or join().
3aa2712 to
1466b9e
Compare
|
Some suggestions from diff --git a/control-plane/deployment/aws/infra/modules/spire/iam-role/main.tf b/control-plane/deployment/aws/infra/modules/spire/iam-role/main.tf
index 8d69d1b..ad521fd 100644
--- a/control-plane/deployment/aws/infra/modules/spire/iam-role/main.tf
+++ b/control-plane/deployment/aws/infra/modules/spire/iam-role/main.tf
@@ -17,6 +17,9 @@ locals {
# Strip the ARN prefix to get the bare OIDC provider URL for IRSA trust conditions.
oidc_provider_url = var.oidc_provider_arn != null ? trimprefix(var.oidc_provider_arn, "arn:aws:iam::${local.account_id}:oidc-provider/") : null
+
+ # KMS alias prefix used by the SPIRE key manager plugin for all keys it creates.
+ kms_alias_prefix = "arn:aws:kms:${local.region}:${local.account_id}:alias/SPIRE_SERVER*"
}
# --- Trust policy ---
@@ -46,7 +49,7 @@ data "aws_iam_policy_document" "assume_role" {
condition {
test = "StringEquals"
variable = "aws:RequestTag/kubernetes-service-account"
- values = [var.service_account]
+ values = [var.service_account_name]
}
}
}
@@ -62,7 +65,7 @@ data "aws_iam_policy_document" "assume_role" {
condition {
test = "StringEquals"
variable = "${local.oidc_provider_url}:sub"
- values = ["system:serviceaccount:${var.namespace}:${var.service_account}"]
+ values = ["system:serviceaccount:${var.namespace}:${var.service_account_name}"]
}
condition {
test = "StringEquals"
@@ -114,15 +117,13 @@ data "aws_iam_policy_document" "kms" {
# Only the alias resource is needed here. CreateKey sets the key policy
# atomically, so the key-resource side of alias operations is covered by
# the key policy's kms:* grant to this role.
- resources = [
- "arn:aws:kms:${local.region}:${local.account_id}:alias/SPIRE_SERVER*",
- ]
+ resources = [local.kms_alias_prefix]
}
statement {
sid = "DeleteKeyAliases"
actions = ["kms:DeleteAlias"]
- resources = ["arn:aws:kms:${local.region}:${local.account_id}:alias/SPIRE_SERVER*"]
+ resources = [local.kms_alias_prefix]
}
# SPIRE's default key policy grants kms:* directly to this role on every key it creates,
@@ -165,6 +166,6 @@ resource "aws_eks_pod_identity_association" "spire_server" {
cluster_name = var.cluster_name
namespace = var.namespace
- service_account = var.service_account
+ service_account = var.service_account_name
role_arn = aws_iam_role.spire_server.arn
}
diff --git a/control-plane/deployment/aws/infra/modules/spire/iam-role/variables.tf b/control-plane/deployment/aws/infra/modules/spire/iam-role/variables.tf
index 4b90ac3..f185dbd 100644
--- a/control-plane/deployment/aws/infra/modules/spire/iam-role/variables.tf
+++ b/control-plane/deployment/aws/infra/modules/spire/iam-role/variables.tf
@@ -20,7 +20,7 @@ variable "namespace" {
description = "Kubernetes namespace the SPIRE server runs in. Used for the Pod Identity association and IRSA trust condition."
}
-variable "service_account" {
+variable "service_account_name" {
type = string
default = "spire-server"
description = "Kubernetes service account name for the SPIRE server. Used for the Pod Identity association and IRSA trust condition."
diff --git a/control-plane/deployment/aws/infra/stack/spire-server/iam-role/common.local.hcl.example b/control-plane/deployment/aws/infra/stack/spire-server/iam-role/common.local.hcl.example
index aff1245..bb3a1a3 100644
--- a/control-plane/deployment/aws/infra/stack/spire-server/iam-role/common.local.hcl.example
+++ b/control-plane/deployment/aws/infra/stack/spire-server/iam-role/common.local.hcl.example
@@ -11,8 +11,8 @@ locals {
# iam_mode = "irsa"
# Optional: override the Kubernetes namespace and service account.
- # namespace = "spire-server"
- # service_account = "spire-server"
+ # namespace = "spire-server"
+ # service_account_name = "spire-server"
# Optional: provide dependency values directly instead of reading from unit outputs.
# Use these when the relevant units have not been deployed via this stack.
diff --git a/control-plane/deployment/aws/infra/stack/spire-server/iam-role/terragrunt.hcl b/control-plane/deployment/aws/infra/stack/spire-server/iam-role/terragrunt.hcl
index f33e5f7..1c9f578 100644
--- a/control-plane/deployment/aws/infra/stack/spire-server/iam-role/terragrunt.hcl
+++ b/control-plane/deployment/aws/infra/stack/spire-server/iam-role/terragrunt.hcl
@@ -36,7 +36,7 @@ locals {
role_name = "cofide-connect-control-plane-aws-reference-arch-spire-server"
iam_mode = "pod_identity"
namespace = "spire-server"
- service_account = "spire-server"
+ service_account_name = "spire-server"
}
unit_config_path = "${get_terragrunt_dir()}/common.local.hcl"
@@ -58,7 +58,7 @@ inputs = {
role_name = local.merged_config.role_name
iam_mode = local.merged_config.iam_mode
namespace = local.merged_config.namespace
- service_account = local.merged_config.service_account
+ service_account_name = local.merged_config.service_account_name
cluster_name = local.user_cluster_name != null ? local.user_cluster_name : try(dependency.cluster.outputs.cluster_name, null)
oidc_provider_arn = local.user_oidc_provider_arn != null ? local.user_oidc_provider_arn : try(dependency.cluster.outputs.oidc_provider_arn, null) |
…in/common.local.hcl.example Co-authored-by: Mark Goddard <mark@cofide.io>
…in/common.local.hcl.example Co-authored-by: Mark Goddard <mark@cofide.io>
…tance/common.local.hcl.example Co-authored-by: Mark Goddard <mark@cofide.io>
….hcl.example Co-authored-by: Mark Goddard <mark@cofide.io>
…on.local.hcl.example Co-authored-by: Mark Goddard <mark@cofide.io>
…on.local.hcl.example Co-authored-by: Mark Goddard <mark@cofide.io>
Applied 👍 I've found the same issue when trying to use LLMs to simplify things - they're not very good at finding things to eliminate, just limited renaming/rephrasing. |
Reference deployment of the Connect control plane to AWS (other examples can be added, GCP, Azure, on-prem, etc.) currently deployed to the demo AWS account.
Easier to review by looking at how the READMEs render on the branch here https://github.com/cofide/connect-reference/tree/connect-control-plane-reference-aws-deployment/control-plane/deployment/aws - this ended up much larger than a normal PR in order to get all the pieces in place with documentation.
One immediate change remaining - generating a better diagram to replace the current ASCII one.