Fixing issue with missing metrics when pod has only pending containers (#8472)
Also added Pod Phase and Pod Reason fields fixes #8347 Co-authored-by: Vyacheslav-Stepanov <Vyacheslav_Stepanov@epam.com>
This commit is contained in:
parent
a063f9d7f7
commit
0fe2386494
|
|
@ -224,12 +224,14 @@ subjects:
|
||||||
- node_name
|
- node_name
|
||||||
- pod_name
|
- pod_name
|
||||||
- node_selector (\*varies)
|
- node_selector (\*varies)
|
||||||
|
- phase
|
||||||
- state
|
- state
|
||||||
- readiness
|
- readiness
|
||||||
- fields:
|
- fields:
|
||||||
- restarts_total
|
- restarts_total
|
||||||
- state_code
|
- state_code
|
||||||
- state_reason
|
- state_reason
|
||||||
|
- phase_reason
|
||||||
- terminated_reason (string, deprecated in 1.15: use `state_reason` instead)
|
- terminated_reason (string, deprecated in 1.15: use `state_reason` instead)
|
||||||
- resource_requests_millicpu_units
|
- resource_requests_millicpu_units
|
||||||
- resource_requests_memory_bytes
|
- resource_requests_memory_bytes
|
||||||
|
|
@ -301,7 +303,7 @@ kubernetes_persistentvolume,phase=Released,pv_name=pvc-aaaaaaaa-bbbb-cccc-1111-2
|
||||||
kubernetes_persistentvolumeclaim,namespace=default,phase=Bound,pvc_name=data-etcd-0,selector_select1=s1,storageclass=ebs-1-retain phase_type=0i 1547597615000000000
|
kubernetes_persistentvolumeclaim,namespace=default,phase=Bound,pvc_name=data-etcd-0,selector_select1=s1,storageclass=ebs-1-retain phase_type=0i 1547597615000000000
|
||||||
kubernetes_pod,namespace=default,node_name=ip-172-17-0-2.internal,pod_name=tick1 last_transition_time=1547578322000000000i,ready="false" 1547597616000000000
|
kubernetes_pod,namespace=default,node_name=ip-172-17-0-2.internal,pod_name=tick1 last_transition_time=1547578322000000000i,ready="false" 1547597616000000000
|
||||||
kubernetes_service,cluster_ip=172.29.61.80,namespace=redis-cache-0001,port_name=redis,port_protocol=TCP,selector_app=myapp,selector_io.kompose.service=redis,selector_role=slave,service_name=redis-slave created=1588690034000000000i,generation=0i,port=6379i,target_port=0i 1547597616000000000
|
kubernetes_service,cluster_ip=172.29.61.80,namespace=redis-cache-0001,port_name=redis,port_protocol=TCP,selector_app=myapp,selector_io.kompose.service=redis,selector_role=slave,service_name=redis-slave created=1588690034000000000i,generation=0i,port=6379i,target_port=0i 1547597616000000000
|
||||||
kubernetes_pod_container,container_name=telegraf,namespace=default,node_name=ip-172-17-0-2.internal,node_selector_node-role.kubernetes.io/compute=true,pod_name=tick1,state=running,readiness=ready resource_requests_cpu_units=0.1,resource_limits_memory_bytes=524288000,resource_limits_cpu_units=0.5,restarts_total=0i,state_code=0i,state_reason="",resource_requests_memory_bytes=524288000 1547597616000000000
|
kubernetes_pod_container,container_name=telegraf,namespace=default,node_name=ip-172-17-0-2.internal,node_selector_node-role.kubernetes.io/compute=true,pod_name=tick1,phase=Running,state=running,readiness=ready resource_requests_cpu_units=0.1,resource_limits_memory_bytes=524288000,resource_limits_cpu_units=0.5,restarts_total=0i,state_code=0i,state_reason="",phase_reason="",resource_requests_memory_bytes=524288000 1547597616000000000
|
||||||
kubernetes_statefulset,namespace=default,selector_select1=s1,statefulset_name=etcd replicas_updated=3i,spec_replicas=3i,observed_generation=1i,created=1544101669000000000i,generation=1i,replicas=3i,replicas_current=3i,replicas_ready=3i 1547597616000000000
|
kubernetes_statefulset,namespace=default,selector_select1=s1,statefulset_name=etcd replicas_updated=3i,spec_replicas=3i,observed_generation=1i,created=1544101669000000000i,generation=1i,replicas=3i,replicas_current=3i,replicas_ready=3i 1547597616000000000
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -27,8 +27,16 @@ func (ki *KubernetesInventory) gatherPod(p v1.Pod, acc telegraf.Accumulator) err
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
for i, cs := range p.Status.ContainerStatuses {
|
containerList := map[string]*v1.ContainerStatus{}
|
||||||
c := p.Spec.Containers[i]
|
for _, v := range p.Status.ContainerStatuses {
|
||||||
|
containerList[*v.Name] = v
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range p.Spec.Containers {
|
||||||
|
cs, ok := containerList[*c.Name]
|
||||||
|
if !ok {
|
||||||
|
cs = &v1.ContainerStatus{}
|
||||||
|
}
|
||||||
gatherPodContainer(*p.Spec.NodeName, ki, p, *cs, *c, acc)
|
gatherPodContainer(*p.Spec.NodeName, ki, p, *cs, *c, acc)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -39,7 +47,9 @@ func gatherPodContainer(nodeName string, ki *KubernetesInventory, p v1.Pod, cs v
|
||||||
stateCode := 3
|
stateCode := 3
|
||||||
stateReason := ""
|
stateReason := ""
|
||||||
state := "unknown"
|
state := "unknown"
|
||||||
|
readiness := "unready"
|
||||||
|
|
||||||
|
if cs.State != nil {
|
||||||
switch {
|
switch {
|
||||||
case cs.State.Running != nil:
|
case cs.State.Running != nil:
|
||||||
stateCode = 0
|
stateCode = 0
|
||||||
|
|
@ -53,8 +63,8 @@ func gatherPodContainer(nodeName string, ki *KubernetesInventory, p v1.Pod, cs v
|
||||||
state = "waiting"
|
state = "waiting"
|
||||||
stateReason = cs.State.Waiting.GetReason()
|
stateReason = cs.State.Waiting.GetReason()
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
readiness := "unready"
|
|
||||||
if cs.GetReady() {
|
if cs.GetReady() {
|
||||||
readiness = "ready"
|
readiness = "ready"
|
||||||
}
|
}
|
||||||
|
|
@ -62,18 +72,28 @@ func gatherPodContainer(nodeName string, ki *KubernetesInventory, p v1.Pod, cs v
|
||||||
fields := map[string]interface{}{
|
fields := map[string]interface{}{
|
||||||
"restarts_total": cs.GetRestartCount(),
|
"restarts_total": cs.GetRestartCount(),
|
||||||
"state_code": stateCode,
|
"state_code": stateCode,
|
||||||
"terminated_reason": cs.State.Terminated.GetReason(),
|
}
|
||||||
|
|
||||||
|
// deprecated in 1.15: use `state_reason` instead
|
||||||
|
if state == "terminated" {
|
||||||
|
fields["terminated_reason"] = stateReason
|
||||||
}
|
}
|
||||||
|
|
||||||
if stateReason != "" {
|
if stateReason != "" {
|
||||||
fields["state_reason"] = stateReason
|
fields["state_reason"] = stateReason
|
||||||
}
|
}
|
||||||
|
|
||||||
|
phaseReason := p.Status.GetReason()
|
||||||
|
if phaseReason != "" {
|
||||||
|
fields["phase_reason"] = phaseReason
|
||||||
|
}
|
||||||
|
|
||||||
tags := map[string]string{
|
tags := map[string]string{
|
||||||
"container_name": *c.Name,
|
"container_name": *c.Name,
|
||||||
"namespace": *p.Metadata.Namespace,
|
"namespace": *p.Metadata.Namespace,
|
||||||
"node_name": *p.Spec.NodeName,
|
"node_name": *p.Spec.NodeName,
|
||||||
"pod_name": *p.Metadata.Name,
|
"pod_name": *p.Metadata.Name,
|
||||||
|
"phase": *p.Status.Phase,
|
||||||
"state": state,
|
"state": state,
|
||||||
"readiness": readiness,
|
"readiness": readiness,
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -225,6 +225,7 @@ func TestPod(t *testing.T) {
|
||||||
"container_name": "running",
|
"container_name": "running",
|
||||||
"node_name": "node1",
|
"node_name": "node1",
|
||||||
"pod_name": "pod1",
|
"pod_name": "pod1",
|
||||||
|
"phase": "Running",
|
||||||
"state": "running",
|
"state": "running",
|
||||||
"readiness": "ready",
|
"readiness": "ready",
|
||||||
"node_selector_select1": "s1",
|
"node_selector_select1": "s1",
|
||||||
|
|
@ -245,6 +246,7 @@ func TestPod(t *testing.T) {
|
||||||
"container_name": "completed",
|
"container_name": "completed",
|
||||||
"node_name": "node1",
|
"node_name": "node1",
|
||||||
"pod_name": "pod1",
|
"pod_name": "pod1",
|
||||||
|
"phase": "Running",
|
||||||
"state": "terminated",
|
"state": "terminated",
|
||||||
"readiness": "unready",
|
"readiness": "unready",
|
||||||
},
|
},
|
||||||
|
|
@ -263,6 +265,7 @@ func TestPod(t *testing.T) {
|
||||||
"container_name": "waiting",
|
"container_name": "waiting",
|
||||||
"node_name": "node1",
|
"node_name": "node1",
|
||||||
"pod_name": "pod1",
|
"pod_name": "pod1",
|
||||||
|
"phase": "Running",
|
||||||
"state": "waiting",
|
"state": "waiting",
|
||||||
"readiness": "unready",
|
"readiness": "unready",
|
||||||
},
|
},
|
||||||
|
|
@ -551,3 +554,220 @@ func TestPodSelectorFilter(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestPodPendingContainers(t *testing.T) {
|
||||||
|
cli := &client{}
|
||||||
|
selectInclude := []string{}
|
||||||
|
selectExclude := []string{}
|
||||||
|
now := time.Now()
|
||||||
|
started := time.Date(now.Year(), now.Month(), now.Day(), now.Hour()-1, 1, 36, 0, now.Location())
|
||||||
|
created := time.Date(now.Year(), now.Month(), now.Day(), now.Hour()-2, 1, 36, 0, now.Location())
|
||||||
|
cond1 := time.Date(now.Year(), 7, 5, 7, 53, 29, 0, now.Location())
|
||||||
|
cond2 := time.Date(now.Year(), 7, 5, 7, 53, 31, 0, now.Location())
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
handler *mockHandler
|
||||||
|
output *testutil.Accumulator
|
||||||
|
hasError bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "collect pods",
|
||||||
|
handler: &mockHandler{
|
||||||
|
responseMap: map[string]interface{}{
|
||||||
|
"/pods/": &v1.PodList{
|
||||||
|
Items: []*v1.Pod{
|
||||||
|
{
|
||||||
|
Spec: &v1.PodSpec{
|
||||||
|
NodeName: toStrPtr("node1"),
|
||||||
|
Containers: []*v1.Container{
|
||||||
|
{
|
||||||
|
Name: toStrPtr("waiting"),
|
||||||
|
Image: toStrPtr("image1"),
|
||||||
|
Ports: []*v1.ContainerPort{
|
||||||
|
{
|
||||||
|
ContainerPort: toInt32Ptr(8080),
|
||||||
|
Protocol: toStrPtr("TCP"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Resources: &v1.ResourceRequirements{
|
||||||
|
Limits: map[string]*resource.Quantity{
|
||||||
|
"cpu": {String_: toStrPtr("100m")},
|
||||||
|
},
|
||||||
|
Requests: map[string]*resource.Quantity{
|
||||||
|
"cpu": {String_: toStrPtr("100m")},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: toStrPtr("terminated"),
|
||||||
|
Image: toStrPtr("image1"),
|
||||||
|
Ports: []*v1.ContainerPort{
|
||||||
|
{
|
||||||
|
ContainerPort: toInt32Ptr(8080),
|
||||||
|
Protocol: toStrPtr("TCP"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Resources: &v1.ResourceRequirements{
|
||||||
|
Limits: map[string]*resource.Quantity{
|
||||||
|
"cpu": {String_: toStrPtr("100m")},
|
||||||
|
},
|
||||||
|
Requests: map[string]*resource.Quantity{
|
||||||
|
"cpu": {String_: toStrPtr("100m")},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Volumes: []*v1.Volume{
|
||||||
|
{
|
||||||
|
Name: toStrPtr("vol1"),
|
||||||
|
VolumeSource: &v1.VolumeSource{
|
||||||
|
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
|
||||||
|
ClaimName: toStrPtr("pc1"),
|
||||||
|
ReadOnly: toBoolPtr(true),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: toStrPtr("vol2"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
NodeSelector: map[string]string{
|
||||||
|
"select1": "s1",
|
||||||
|
"select2": "s2",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Status: &v1.PodStatus{
|
||||||
|
Phase: toStrPtr("Pending"),
|
||||||
|
Reason: toStrPtr("NetworkNotReady"),
|
||||||
|
HostIP: toStrPtr("180.12.10.18"),
|
||||||
|
PodIP: toStrPtr("10.244.2.15"),
|
||||||
|
StartTime: &metav1.Time{Seconds: toInt64Ptr(started.Unix())},
|
||||||
|
Conditions: []*v1.PodCondition{
|
||||||
|
{
|
||||||
|
Type: toStrPtr("Initialized"),
|
||||||
|
Status: toStrPtr("True"),
|
||||||
|
LastTransitionTime: &metav1.Time{Seconds: toInt64Ptr(cond1.Unix())},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: toStrPtr("Ready"),
|
||||||
|
Status: toStrPtr("True"),
|
||||||
|
LastTransitionTime: &metav1.Time{Seconds: toInt64Ptr(cond2.Unix())},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: toStrPtr("Scheduled"),
|
||||||
|
Status: toStrPtr("True"),
|
||||||
|
LastTransitionTime: &metav1.Time{Seconds: toInt64Ptr(cond1.Unix())},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
ContainerStatuses: []*v1.ContainerStatus{},
|
||||||
|
},
|
||||||
|
Metadata: &metav1.ObjectMeta{
|
||||||
|
OwnerReferences: []*metav1.OwnerReference{
|
||||||
|
{
|
||||||
|
ApiVersion: toStrPtr("apps/v1"),
|
||||||
|
Kind: toStrPtr("DaemonSet"),
|
||||||
|
Name: toStrPtr("forwarder"),
|
||||||
|
Controller: toBoolPtr(true),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Generation: toInt64Ptr(11232),
|
||||||
|
Namespace: toStrPtr("ns1"),
|
||||||
|
Name: toStrPtr("pod1"),
|
||||||
|
Labels: map[string]string{
|
||||||
|
"lab1": "v1",
|
||||||
|
"lab2": "v2",
|
||||||
|
},
|
||||||
|
CreationTimestamp: &metav1.Time{Seconds: toInt64Ptr(created.Unix())},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
output: &testutil.Accumulator{
|
||||||
|
Metrics: []*testutil.Metric{
|
||||||
|
{
|
||||||
|
Measurement: podContainerMeasurement,
|
||||||
|
Fields: map[string]interface{}{
|
||||||
|
"phase_reason": "NetworkNotReady",
|
||||||
|
"restarts_total": int32(0),
|
||||||
|
"state_code": 3,
|
||||||
|
"resource_requests_millicpu_units": int64(100),
|
||||||
|
"resource_limits_millicpu_units": int64(100),
|
||||||
|
},
|
||||||
|
Tags: map[string]string{
|
||||||
|
"namespace": "ns1",
|
||||||
|
"container_name": "waiting",
|
||||||
|
"node_name": "node1",
|
||||||
|
"pod_name": "pod1",
|
||||||
|
"phase": "Pending",
|
||||||
|
"state": "unknown",
|
||||||
|
"readiness": "unready",
|
||||||
|
"node_selector_select1": "s1",
|
||||||
|
"node_selector_select2": "s2",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Measurement: podContainerMeasurement,
|
||||||
|
Fields: map[string]interface{}{
|
||||||
|
"phase_reason": "NetworkNotReady",
|
||||||
|
"restarts_total": int32(0),
|
||||||
|
"state_code": 3,
|
||||||
|
"resource_requests_millicpu_units": int64(100),
|
||||||
|
"resource_limits_millicpu_units": int64(100),
|
||||||
|
},
|
||||||
|
Tags: map[string]string{
|
||||||
|
"namespace": "ns1",
|
||||||
|
"container_name": "terminated",
|
||||||
|
"node_name": "node1",
|
||||||
|
"pod_name": "pod1",
|
||||||
|
"phase": "Pending",
|
||||||
|
"state": "unknown",
|
||||||
|
"readiness": "unready",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
hasError: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, v := range tests {
|
||||||
|
ks := &KubernetesInventory{
|
||||||
|
client: cli,
|
||||||
|
SelectorInclude: selectInclude,
|
||||||
|
SelectorExclude: selectExclude,
|
||||||
|
}
|
||||||
|
ks.createSelectorFilters()
|
||||||
|
acc := new(testutil.Accumulator)
|
||||||
|
for _, pod := range ((v.handler.responseMap["/pods/"]).(*v1.PodList)).Items {
|
||||||
|
err := ks.gatherPod(*pod, acc)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Failed to gather pod - %s", err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
err := acc.FirstError()
|
||||||
|
if err == nil && v.hasError {
|
||||||
|
t.Fatalf("%s failed, should have error", v.name)
|
||||||
|
} else if err != nil && !v.hasError {
|
||||||
|
t.Fatalf("%s failed, err: %v", v.name, err)
|
||||||
|
}
|
||||||
|
if v.output == nil && len(acc.Metrics) > 0 {
|
||||||
|
t.Fatalf("%s: collected extra data", v.name)
|
||||||
|
} else if v.output != nil && len(v.output.Metrics) > 0 {
|
||||||
|
for i := range v.output.Metrics {
|
||||||
|
for k, m := range v.output.Metrics[i].Tags {
|
||||||
|
if acc.Metrics[i].Tags[k] != m {
|
||||||
|
t.Fatalf("%s: tag %s metrics unmatch Expected %s, got %s, i %d\n", v.name, k, m, acc.Metrics[i].Tags[k], i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for k, m := range v.output.Metrics[i].Fields {
|
||||||
|
if acc.Metrics[i].Fields[k] != m {
|
||||||
|
t.Fatalf("%s: field %s metrics unmatch Expected %v(%T), got %v(%T), i %d\n", v.name, k, m, m, acc.Metrics[i].Fields[k], acc.Metrics[i].Fields[k], i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue