diff --git a/tools/kubernetes/terraform/examples/v5e/main.tf b/tools/kubernetes/terraform/examples/v5e/main.tf index c3b6990c..304251dc 100644 --- a/tools/kubernetes/terraform/examples/v5e/main.tf +++ b/tools/kubernetes/terraform/examples/v5e/main.tf @@ -2,6 +2,7 @@ variable "project_id" {} variable "resource_name_prefix" {} variable "region" {} variable "tpu_node_pools" {} +variable "cpu_node_pool" {} variable "maintenance_interval" {} @@ -11,5 +12,6 @@ module "tpu-gke" { resource_name_prefix = var.resource_name_prefix region = var.region tpu_node_pools = var.tpu_node_pools + cpu_node_pool = var.cpu_node_pool maintenance_interval = var.maintenance_interval } diff --git a/tools/kubernetes/terraform/examples/v5e/terraform.tfvars b/tools/kubernetes/terraform/examples/v5e/terraform.tfvars index 73f76abb..61c17f97 100644 --- a/tools/kubernetes/terraform/examples/v5e/terraform.tfvars +++ b/tools/kubernetes/terraform/examples/v5e/terraform.tfvars @@ -98,4 +98,11 @@ tpu_node_pools = [{ topology = "16x16" policy = "sb-compact-4d" }] +cpu_node_pool = { + zone = ["us-east5-a", "us-east5-b", "us-east5-c"] + machine_type = "n2-standard-64", + initial_node_count_per_zone = 1, + min_node_count_per_zone = 1, + max_node_count_per_zone = 10 +} maintenance_interval = "PERIODIC" diff --git a/tools/kubernetes/terraform/module/main.tf b/tools/kubernetes/terraform/module/main.tf index 6c551df3..ac88bc27 100644 --- a/tools/kubernetes/terraform/module/main.tf +++ b/tools/kubernetes/terraform/module/main.tf @@ -117,3 +117,30 @@ resource "google_container_node_pool" "multihost_tpu" { policy_name = var.tpu_node_pools[count.index].policy } } + +resource "google_container_node_pool" "cpu_node_pool" { + provider = google-beta + project = var.project_id + name = "cpu-node-pool" + location = var.region + node_locations = var.cpu_node_pool.zone + cluster = google_container_cluster.tpu_cluster.name + initial_node_count = var.cpu_node_pool.initial_node_count_per_zone + autoscaling { + min_node_count = var.cpu_node_pool.min_node_count_per_zone + max_node_count = var.cpu_node_pool.max_node_count_per_zone + } + node_config { + oauth_scopes = [ + "https://www.googleapis.com/auth/cloud-platform" + ] + machine_type = var.cpu_node_pool.machine_type + + metadata = { + disable-legacy-endpoints = "true" + } + gcfs_config { + enabled = true + } + } +} diff --git a/tools/kubernetes/terraform/module/terraform.tfvars b/tools/kubernetes/terraform/module/terraform.tfvars index a24e1f9c..45697b73 100644 --- a/tools/kubernetes/terraform/module/terraform.tfvars +++ b/tools/kubernetes/terraform/module/terraform.tfvars @@ -18,4 +18,11 @@ tpu_node_pools = [{ machine_type = "ct4p-hightpu-4t" topology = "2x2x2" }] +cpu_node_pool = { + zone = ["us-central2-a", "us-central2-b", "us-central2-c"] + machine_type = "n2-standard-64", + initial_node_count_per_zone = 1, + min_node_count_per_zone = 1, + max_node_count_per_zone = 10 +} maintenance_interval = "AS_NEEDED" diff --git a/tools/kubernetes/terraform/module/variables.tf b/tools/kubernetes/terraform/module/variables.tf index 35f460aa..8804cec6 100644 --- a/tools/kubernetes/terraform/module/variables.tf +++ b/tools/kubernetes/terraform/module/variables.tf @@ -38,6 +38,23 @@ variable "tpu_node_pools" { })) } +variable "cpu_node_pool" { + description = "cpu nodepool config" + type = object({ + zone = list(string), + machine_type = string, + initial_node_count_per_zone = number, + min_node_count_per_zone = number, + max_node_count_per_zone = number + }) + validation { + condition = ( + (var.cpu_node_pool.min_node_count_per_zone >=0 && var.cpu_node_pool.min_node_count_per_zone <= var.cpu_node_pool.max_node_count_per_zone) + ) + error_message = "cpu_node_pool.min_node_count_per_zone must be >= 0 and <= cpu_node_pool.max_node_count_per_zone." + } +} + variable "maintenance_interval" { default = "AS_NEEDED" description = "maintenance interval for TPU machines."