diff --git a/rules/apps.libsonnet b/rules/apps.libsonnet index decc0125c..6c64ff4a2 100644 --- a/rules/apps.libsonnet +++ b/rules/apps.libsonnet @@ -205,6 +205,28 @@ { name: 'k8s.rules.pod_owner', rules: [ + // workload aggregation for replicasets + { + record: 'namespace_workload_pod:kube_pod_owner:relabel', + expr: ||| + max by (%(clusterLabel)s, namespace, workload, pod) ( + label_replace( + label_replace( + kube_pod_owner{%(kubeStateMetricsSelector)s, owner_kind="ReplicaSet"}, + "replicaset", "$1", "owner_name", "(.*)" + ) * on (%(clusterLabel), replicaset, namespace) group_left(owner_name) topk by(%(clusterLabel)s, replicaset, namespace) ( + 1, max by (%(clusterLabel)s, replicaset, namespace, owner_name) ( + kube_replicaset_owner{%(kubeStateMetricsSelector)s, owner_kind=""} + ) + ), + "workload", "$1", "owner_name", "(.*)" + ) + ) + ||| % $._config, + labels: { + workload_type: 'replicaset', + }, + }, // workload aggregation for deployments { record: 'namespace_workload_pod:kube_pod_owner:relabel', @@ -214,9 +236,9 @@ label_replace( kube_pod_owner{%(kubeStateMetricsSelector)s, owner_kind="ReplicaSet"}, "replicaset", "$1", "owner_name", "(.*)" - ) * on(replicaset, namespace) group_left(owner_name) topk by(replicaset, namespace) ( - 1, max by (replicaset, namespace, owner_name) ( - kube_replicaset_owner{%(kubeStateMetricsSelector)s} + ) * on(replicaset, namespace, %(clusterLabel)s) group_left(owner_name) topk by(%(clusterLabel)s, replicaset, namespace) ( + 1, max by (%(clusterLabel)s, replicaset, namespace, owner_name) ( + kube_replicaset_owner{%(kubeStateMetricsSelector)s, owner_kind="Deployment"} ) ), "workload", "$1", "owner_name", "(.*)" @@ -227,6 +249,7 @@ workload_type: 'deployment', }, }, + // workload aggregation for daemonsets { record: 'namespace_workload_pod:kube_pod_owner:relabel', expr: ||| @@ -241,6 +264,7 @@ workload_type: 'daemonset', }, }, + // workload aggregation for statefulsets { record: 'namespace_workload_pod:kube_pod_owner:relabel', expr: ||| @@ -255,6 +279,7 @@ workload_type: 'statefulset', }, }, + // workload aggregation for jobs { record: 'namespace_workload_pod:kube_pod_owner:relabel', expr: ||| @@ -269,6 +294,65 @@ workload_type: 'job', }, }, + // workload aggregation for barepods + { + record: 'namespace_workload_pod:kube_pod_owner:relabel', + expr: ||| + max by(%(clusterLabel)s, namespace, pod, workload) ( + label_replace( + kube_pod_owner{%(kubeStateMetricsSelector)s, owner_kind="", owner_name=""}, + "workload", "$1", "pod", "(.+)" + ) + ) + ||| % $._config, + labels: { + workload_type: 'barepod', + }, + }, + // workload aggregation for staticpods + { + record: 'namespace_workload_pod:kube_pod_owner:relabel', + expr: ||| + max by(%(clusterLabel)s, namespace, workload, pod) ( + label_replace( + kube_pod_owner{%(kubeStateMetricsSelector)s, owner_kind="Node"}, + "workload", "$1", "owner_name", "(.+)" + ) + ) + ||| % $._config, + labels: { + workload_type: 'staticpod', + }, + }, + // workload aggregation for non-standard workloads for replicaset + { + record: 'namespace_workload_pod:kube_pod_owner:relabel', + expr: ||| + max by (%(clusterLabel)s, namespace, pod, workload, workload_type) ( + label_replace( + label_replace( + kube_pod_owner{job!="", owner_kind="ReplicaSet"} + , "workload", "$1", "owner_name", "(.+)" + ) + * on(%(clusterLabel)s, namespace, workload) group_left(owner_kind) + label_replace( + group by (%(clusterLabel)s, namespace, replicaset, owner_kind, owner_name) ( + kube_replicaset_owner{job!="", owner_kind!="Deployment", owner_kind!=""} + ) + , "workload", "$1", "replicaset", "(.+)" + ) + OR + label_replace( + group by (cluster, namespace, pod, owner_name, owner_kind) ( + kube_pod_owner{ owner_kind!="ReplicaSet", owner_kind!="DaemonSet", owner_kind!="StatefulSet", owner_kind!="Job", owner_kind!="Node", owner_kind!=""} + ) + , "workload", "$1", "owner_name", "(.+)" + ) + , "workload_type", "$1", "owner_kind", "(.+)" + ) + ) + ||| % $._config, + }, ], }, ], diff --git a/tests/rules-pod-owner-test.yaml b/tests/rules-pod-owner-test.yaml new file mode 100644 index 000000000..64780db1e --- /dev/null +++ b/tests/rules-pod-owner-test.yaml @@ -0,0 +1,78 @@ +rules_files: +- ../prometheus_rules.yaml + +- interval: 1m + input_series: + - series: 'kube_pod_owner{endpoint="https",instance="instance1",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="true",owner_kind="ReplicaSet",owner_name="ds-7cc77d965f",pod="ds-7cc77d965f-cgsdv",service="ksm"}' + values: '1 1' + - series: 'kube_pod_owner{endpoint="https",instance="instance2",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="true",owner_kind="ReplicaSet",owner_name="ds-7cc77d965f",pod="ds-7cc77d965f-cgsdv",service="ksm"}' + values: '1 stale' + - series: 'kube_replicaset_owner{endpoint="https",instance="instance1",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="true",owner_kind="Deployment",owner_name="ds",pod="ds-777f6bf798-kq7tj",replicaset="ds-7cc77d965f",service="ksm"}' + values: '1 1' + - series: 'kube_replicaset_owner{endpoint="https",instance="instance2",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="true",owner_kind="Deployment",owner_name="ds",pod="ds-777f6bf798-kq7tj",replicaset="ds-7cc77d965f",service="ksm"}' + values: '1 stale' + promql_expr_test: + - eval_time: 0m + expr: namespace_workload_pod:kube_pod_owner:relabel + exp_samples: + - value: 1 + labels: 'namespace_workload_pod:kube_pod_owner:relabel{cluster="kubernetes",namespace="ns1", pod="ds-7cc77d965f-cgsdv", workload="ds", workload_type="deployment"}' + - eval_time: 1m + expr: namespace_workload_pod:kube_pod_owner:relabel + exp_samples: + - value: 1 + labels: 'namespace_workload_pod:kube_pod_owner:relabel{cluster="kubernetes",namespace="ns1", pod="ds-7cc77d965f-cgsdv", workload="ds", workload_type="deployment"}' + +- interval: 1m + input_series: + - series: 'kube_pod_owner{endpoint="https",instance="instance1",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="false",owner_kind="",owner_name="bare-pod-xyz123",pod="bare-pod-xyz123",service="ksm"}' + values: '1 1' + - series: 'kube_pod_owner{endpoint="https",instance="instance2",job="kube-state-metrics",cluster="kubernetes",namespace="ns2",owner_is_controller="false",owner_kind="",owner_name="bare-pod-xyz123",pod="bare-pod-xyz123",service="ksm"}' + values: '1 stale' + promql_expr_test: + - eval_time: 0m + expr: namespace_workload_pod:kube_pod_owner:relabel + exp_samples: + - value: 1 + labels: 'namespace_workload_pod:kube_pod_owner:relabel{cluster="kubernetes",namespace="ns1", pod="bare-pod-xyz123", workload="bare-pod-xyz123", workload_type="barepod"}' + - eval_time: 1m + expr: namespace_workload_pod:kube_pod_owner:relabel + exp_samples: + - value: 1 + labels: 'namespace_workload_pod:kube_pod_owner:relabel{cluster="kubernetes",namespace="ns1", pod="bare-pod-xyz123", workload="bare-pod-xyz123", workload_type="barepod"}' + +- interval: 1m + input_series: + - series: 'kube_pod_owner{endpoint="https",instance="instance1",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="true",owner_kind="Node",owner_name="etcd-node1",pod="etcd-node1",service="ksm"}' + values: '1 1' + - series: 'kube_pod_owner{endpoint="https",instance="instance2",job="kube-state-metrics",cluster="kubernetes",namespace="ns2",owner_is_controller="true",owner_kind="Node",owner_name="etcd-node1",pod="etcd-node1",service="ksm"}' + values: '1 stale' + promql_expr_test: + - eval_time: 0m + expr: namespace_workload_pod:kube_pod_owner:relabel + exp_samples: + - value: 1 + labels: 'namespace_workload_pod:kube_pod_owner:relabel{cluster="kubernetes",namespace="ns1", pod="etcd-node1", workload="etcd-node1", workload_type="staticpod"}' + - eval_time: 1m + expr: namespace_workload_pod:kube_pod_owner:relabel + exp_samples: + - value: 1 + labels: 'namespace_workload_pod:kube_pod_owner:relabel{cluster="kubernetes",namespace="ns1", pod="etcd-node1", workload="etcd-node1", workload_type="staticpod"}' + +- interval: 1m + input_series: + - series: 'kube_pod_owner{endpoint="https",instance="instance1",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="true",owner_kind="CustomController",owner_name="myapp-controller",pod="myapp-controller-abc123",service="ksm"}' + values: '1 1' + - series: 'kube_pod_owner{endpoint="https",instance="instance2",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="true",owner_kind="CustomController",owner_name="myapp-controller",pod="myapp-controller-abc123",service="ksm"}' + values: '1 stale' + promql_expr_test: + - eval_time: 0m + expr: namespace_workload_pod:kube_pod_owner:relabel + exp_samples: + - value: 1 + labels: 'namespace_workload_pod:kube_pod_owner:relabel{cluster="kubernetes",namespace="ns1", pod="myapp-controller-abc123", workload="myapp-controller", workload_type="customcontroller"}' + - eval_time: 1m + expr: namespace_workload_pod:kube_pod_owner:relabel + exp_samples: + - value: 1 + labels: 'namespace_workload_pod:kube_pod_owner:relabel{cluster="kubernetes",namespace="ns1", pod="myapp-controller-abc123", workload="myapp-controller", workload_type="customcontroller"}' \ No newline at end of file diff --git a/tests/tests.yaml b/tests/tests.yaml index 16607f881..c9d4b3b5b 100644 --- a/tests/tests.yaml +++ b/tests/tests.yaml @@ -772,29 +772,6 @@ tests: - value: 1 labels: 'node:node_num_cpu:sum{cluster="kubernetes",node="node-2"}' - -- interval: 1m - input_series: - - series: 'kube_pod_owner{endpoint="https",instance="instance1",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="true",owner_kind="ReplicaSet",owner_name="ds-7cc77d965f",pod="ds-7cc77d965f-cgsdv",service="ksm"}' - values: '1 1' - - series: 'kube_pod_owner{endpoint="https",instance="instance2",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="true",owner_kind="ReplicaSet",owner_name="ds-7cc77d965f",pod="ds-7cc77d965f-cgsdv",service="ksm"}' - values: '1 stale' - - series: 'kube_replicaset_owner{endpoint="https",instance="instance1",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="true",owner_kind="Deployment",owner_name="ds",pod="ds-777f6bf798-kq7tj",replicaset="ds-7cc77d965f",service="ksm"}' - values: '1 1' - - series: 'kube_replicaset_owner{endpoint="https",instance="instance2",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="true",owner_kind="Deployment",owner_name="ds",pod="ds-777f6bf798-kq7tj",replicaset="ds-7cc77d965f",service="ksm"}' - values: '1 stale' - promql_expr_test: - - eval_time: 0m - expr: namespace_workload_pod:kube_pod_owner:relabel - exp_samples: - - value: 1 - labels: 'namespace_workload_pod:kube_pod_owner:relabel{cluster="kubernetes",namespace="ns1", pod="ds-7cc77d965f-cgsdv", workload="ds", workload_type="deployment"}' - - eval_time: 1m - expr: namespace_workload_pod:kube_pod_owner:relabel - exp_samples: - - value: 1 - labels: 'namespace_workload_pod:kube_pod_owner:relabel{cluster="kubernetes",namespace="ns1", pod="ds-7cc77d965f-cgsdv", workload="ds", workload_type="deployment"}' - - interval: 1m input_series: - series: 'kube_pod_status_phase{endpoint="https",instance="instance1",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",phase="Pending",pod="pod-ds-7cc77d965f-cgsdv",service="ksm"}'