History

Sebastian Spaink d67f75e557 docs: Remove warning not to remove go:embed (#11797 ) Co-authored-by: Joshua Powers <powersj@fastmail.com>		2022-09-13 12:47:58 -05:00
..
README.md	chore: Markdown fixes for inputs/[a-m]* (#11606 )	2022-08-09 18:57:31 +02:00
client.go	fix(inputs.kube_inventory): send file location to enable token auto-refresh (#11577 )	2022-08-02 15:29:33 -06:00
client_test.go	fix(inputs.kube_inventory): send file location to enable token auto-refresh (#11577 )	2022-08-02 15:29:33 -06:00
daemonset.go	fix(inputs/kube_inventory): don't skip resources with zero s/ns timestamps (#9978 )	2021-10-25 16:01:35 -05:00
daemonset_test.go	fix(inputs): Linter issues (#11576 )	2022-08-02 13:52:17 +02:00
deployment.go	Linter fixes - revive:unused-parameter, unparam, varcheck and unused (#8984 )	2021-03-22 12:21:36 -05:00
deployment_test.go	Linter fixes (unhandled errors) -- Part 1 (#8992 )	2021-04-08 11:43:39 -05:00
endpoint.go	fix(inputs/kube_inventory): don't skip resources with zero s/ns timestamps (#9978 )	2021-10-25 16:01:35 -05:00
endpoint_test.go	Fix segfault in kube_inventory (#9456 )	2021-07-06 13:57:52 -06:00
ingress.go	fix(inputs/kube_inventory): don't skip resources with zero s/ns timestamps (#9978 )	2021-10-25 16:01:35 -05:00
ingress_test.go	fix: segfault in ingress, persistentvolumeclaim, statefulset in kube_inventory (#9585 )	2021-10-19 15:09:37 -06:00
kube_inventory.go	docs: Remove warning not to remove go:embed (#11797 )	2022-09-13 12:47:58 -05:00
node.go	fix: Linter fixes for plugins/inputs/[k-l]* (#9999 )	2021-10-27 09:48:57 -06:00
node_test.go	fix: Fixing k8s nodes and pods parsing error (#9581 )	2021-08-04 17:52:52 -05:00
persistentvolume.go	Migrate from github.com/ericchiang/k8s to github.com/kubernetes/client-go (#8937 )	2021-03-17 16:35:25 -05:00
persistentvolume_test.go	Linter fixes (unhandled errors) -- Part 1 (#8992 )	2021-04-08 11:43:39 -05:00
persistentvolumeclaim.go	fix: segfault in ingress, persistentvolumeclaim, statefulset in kube_inventory (#9585 )	2021-10-19 15:09:37 -06:00
persistentvolumeclaim_test.go	fix: segfault in ingress, persistentvolumeclaim, statefulset in kube_inventory (#9585 )	2021-10-19 15:09:37 -06:00
pod.go	fix: Linter fixes for plugins/inputs/[k-l]* (#9999 )	2021-10-27 09:48:57 -06:00
pod_test.go	fix(inputs/kube_inventory): don't skip resources with zero s/ns timestamps (#9978 )	2021-10-25 16:01:35 -05:00
sample.conf	chore: Markdown fixes for inputs/[a-m]* (#11606 )	2022-08-09 18:57:31 +02:00
service.go	fix(inputs/kube_inventory): don't skip resources with zero s/ns timestamps (#9978 )	2021-10-25 16:01:35 -05:00
service_test.go	Linter fixes (unhandled errors) -- Part 1 (#8992 )	2021-04-08 11:43:39 -05:00
statefulset.go	fix: segfault in ingress, persistentvolumeclaim, statefulset in kube_inventory (#9585 )	2021-10-19 15:09:37 -06:00
statefulset_test.go	fix: segfault in ingress, persistentvolumeclaim, statefulset in kube_inventory (#9585 )	2021-10-19 15:09:37 -06:00

README.md

Kubernetes Inventory Input Plugin

This plugin generates metrics derived from the state of the following Kubernetes resources:

daemonsets
deployments
endpoints
ingress
nodes
persistentvolumes
persistentvolumeclaims
pods (containers)
services
statefulsets

Kubernetes is a fast moving project, with a new minor release every 3 months. As such, we will aim to maintain support only for versions that are supported by the major cloud providers; this is roughly 4 release / 2 years.

This plugin supports Kubernetes 1.11 and later.

Series Cardinality Warning

This plugin may produce a high number of series which, when not controlled for, will cause high load on your database. Use the following techniques to avoid cardinality issues:

Use metric filtering options to exclude unneeded measurements and tags.
Write to a database with an appropriate retention policy.
Consider using the Time Series Index.
Monitor your databases series cardinality.
Consult the InfluxDB documentation for the most up-to-date techniques.

Configuration

# Read metrics from the Kubernetes api
[[inputs.kube_inventory]]
  ## URL for the Kubernetes API
  url = "https://127.0.0.1"

  ## Namespace to use. Set to "" to use all namespaces.
  # namespace = "default"

  ## Use bearer token for authorization. ('bearer_token' takes priority)
  ##
  ## If both of these are empty, we'll use the default serviceaccount:
  ## at: /run/secrets/kubernetes.io/serviceaccount/token
  ##
  ## To auto-refresh the token, please use a file with the bearer_token option.
  ## If given a string, Telegraf cannot refresh the token periodically.
  # bearer_token = "/run/secrets/kubernetes.io/serviceaccount/token"
  ## OR
  ## deprecated in 1.24.0; use bearer_token with a file
  # bearer_token_string = "abc_123"

  ## Set response_timeout (default 5 seconds)
  # response_timeout = "5s"

  ## Optional Resources to exclude from gathering
  ## Leave them with blank with try to gather everything available.
  ## Values can be - "daemonsets", deployments", "endpoints", "ingress",
  ## "nodes", "persistentvolumes", "persistentvolumeclaims", "pods", "services",
  ## "statefulsets"
  # resource_exclude = [ "deployments", "nodes", "statefulsets" ]

  ## Optional Resources to include when gathering
  ## Overrides resource_exclude if both set.
  # resource_include = [ "deployments", "nodes", "statefulsets" ]

  ## selectors to include and exclude as tags.  Globs accepted.
  ## Note that an empty array for both will include all selectors as tags
  ## selector_exclude overrides selector_include if both set.
  # selector_include = []
  # selector_exclude = ["*"]

  ## Optional TLS Config
  ## Trusted root certificates for server
  # tls_ca = "/path/to/cafile"
  ## Used for TLS client certificate authentication
  # tls_cert = "/path/to/certfile"
  ## Used for TLS client certificate authentication
  # tls_key = "/path/to/keyfile"
  ## Send the specified TLS server name via SNI
  # tls_server_name = "kubernetes.example.com"
  ## Use TLS but skip chain & host verification
  # insecure_skip_verify = false

  ## Uncomment to remove deprecated metrics.
  # fielddrop = ["terminated_reason"]

Kubernetes Permissions

If using RBAC authorization, you will need to create a cluster role to list "persistentvolumes" and "nodes". You will then need to make an aggregated ClusterRole that will eventually be bound to a user or group.

---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: influx:cluster:viewer
  labels:
    rbac.authorization.k8s.io/aggregate-view-telegraf: "true"
rules:
  - apiGroups: [""]
    resources: ["persistentvolumes", "nodes"]
    verbs: ["get", "list"]

---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: influx:telegraf
aggregationRule:
  clusterRoleSelectors:
    - matchLabels:
        rbac.authorization.k8s.io/aggregate-view-telegraf: "true"
    - matchLabels:
        rbac.authorization.k8s.io/aggregate-to-view: "true"
rules: [] # Rules are automatically filled in by the controller manager.

Bind the newly created aggregated ClusterRole with the following config file, updating the subjects as needed.

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: influx:telegraf:viewer
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: influx:telegraf
subjects:
  - kind: ServiceAccount
    name: telegraf
    namespace: default

Quickstart in k3s

When monitoring k3s server instances one can re-use already generated administration token. This is less secure than using the more restrictive dedicated telegraf user but more convienient to set up.

# an empty token will make telegraf use the client cert/key files instead
$ touch /run/telegraf-kubernetes-token
# replace `telegraf` with the user the telegraf process is running as
$ install -o telegraf -m400 /var/lib/rancher/k3s/server/tls/client-admin.crt /run/telegraf-kubernetes-cert
$ install -o telegraf -m400 /var/lib/rancher/k3s/server/tls/client-admin.key /run/telegraf-kubernetes-key

[kube_inventory]
bearer_token = "/run/telegraf-kubernetes-token"
tls_cert = "/run/telegraf-kubernetes-cert"
tls_key = "/run/telegraf-kubernetes-key"

Metrics

kubernetes_daemonset
- tags:
  - daemonset_name
  - namespace
  - selector (*varies)
- fields:
  - generation
  - current_number_scheduled
  - desired_number_scheduled
  - number_available
  - number_misscheduled
  - number_ready
  - number_unavailable
  - updated_number_scheduled
kubernetes_deployment
- tags:
  - deployment_name
  - namespace
  - selector (*varies)
- fields:
  - replicas_available
  - replicas_unavailable
  - created
kubernetes_endpoints
- tags:
  - endpoint_name
  - namespace
  - hostname
  - node_name
  - port_name
  - port_protocol
  - kind (*varies)
- fields:
  - created
  - generation
  - ready
  - port
kubernetes_ingress
- tags:
  - ingress_name
  - namespace
  - hostname
  - ip
  - backend_service_name
  - path
  - host
- fields:
  - created
  - generation
  - backend_service_port
  - tls
kubernetes_node
- tags:
  - node_name
- fields:
  - capacity_cpu_cores
  - capacity_millicpu_cores
  - capacity_memory_bytes
  - capacity_pods
  - allocatable_cpu_cores
  - allocatable_millicpu_cores
  - allocatable_memory_bytes
  - allocatable_pods
kubernetes_persistentvolume
- tags:
  - pv_name
  - phase
  - storageclass
- fields:
  - phase_type (int, see below)
kubernetes_persistentvolumeclaim
- tags:
  - pvc_name
  - namespace
  - phase
  - storageclass
  - selector (*varies)
- fields:
  - phase_type (int, see below)
kubernetes_pod_container
- tags:
  - container_name
  - namespace
  - node_name
  - pod_name
  - node_selector (*varies)
  - phase
  - state
  - readiness
- fields:
  - restarts_total
  - state_code
  - state_reason
  - phase_reason
  - terminated_reason (string, deprecated in 1.15: use state_reason instead)
  - resource_requests_millicpu_units
  - resource_requests_memory_bytes
  - resource_limits_millicpu_units
  - resource_limits_memory_bytes
kubernetes_service
- tags:
  - service_name
  - namespace
  - port_name
  - port_protocol
  - external_name
  - cluster_ip
  - selector (*varies)
- fields
  - created
  - generation
  - port
  - target_port
kubernetes_statefulset
- tags:
  - statefulset_name
  - namespace
  - selector (*varies)
- fields:
  - created
  - generation
  - replicas
  - replicas_current
  - replicas_ready
  - replicas_updated
  - spec_replicas
  - observed_generation

pv `phase_type`

The persistentvolume "phase" is saved in the phase tag with a correlated numeric field called phase_type corresponding with that tag value.

Tag value	Corresponding field value
bound	0
failed	1
pending	2
released	3
available	4
unknown	5

pvc `phase_type`

The persistentvolumeclaim "phase" is saved in the phase tag with a correlated numeric field called phase_type corresponding with that tag value.

Tag value	Corresponding field value
bound	0
lost	1
pending	2
unknown	3

Example Output

kubernetes_configmap,configmap_name=envoy-config,namespace=default,resource_version=56593031 created=1544103867000000000i 1547597616000000000
kubernetes_daemonset,daemonset_name=telegraf,selector_select1=s1,namespace=logging number_unavailable=0i,desired_number_scheduled=11i,number_available=11i,number_misscheduled=8i,number_ready=11i,updated_number_scheduled=11i,created=1527758699000000000i,generation=16i,current_number_scheduled=11i 1547597616000000000
kubernetes_deployment,deployment_name=deployd,selector_select1=s1,namespace=default replicas_unavailable=0i,created=1544103082000000000i,replicas_available=1i 1547597616000000000
kubernetes_node,node_name=ip-172-17-0-2.internal allocatable_pods=110i,capacity_memory_bytes=128837533696,capacity_pods=110i,capacity_cpu_cores=16i,allocatable_cpu_cores=16i,allocatable_memory_bytes=128732676096 1547597616000000000
kubernetes_persistentvolume,phase=Released,pv_name=pvc-aaaaaaaa-bbbb-cccc-1111-222222222222,storageclass=ebs-1-retain phase_type=3i 1547597616000000000
kubernetes_persistentvolumeclaim,namespace=default,phase=Bound,pvc_name=data-etcd-0,selector_select1=s1,storageclass=ebs-1-retain phase_type=0i 1547597615000000000
kubernetes_pod,namespace=default,node_name=ip-172-17-0-2.internal,pod_name=tick1 last_transition_time=1547578322000000000i,ready="false" 1547597616000000000
kubernetes_service,cluster_ip=172.29.61.80,namespace=redis-cache-0001,port_name=redis,port_protocol=TCP,selector_app=myapp,selector_io.kompose.service=redis,selector_role=slave,service_name=redis-slave created=1588690034000000000i,generation=0i,port=6379i,target_port=0i 1547597616000000000
kubernetes_pod_container,container_name=telegraf,namespace=default,node_name=ip-172-17-0-2.internal,node_selector_node-role.kubernetes.io/compute=true,pod_name=tick1,phase=Running,state=running,readiness=ready resource_requests_cpu_units=0.1,resource_limits_memory_bytes=524288000,resource_limits_cpu_units=0.5,restarts_total=0i,state_code=0i,state_reason="",phase_reason="",resource_requests_memory_bytes=524288000 1547597616000000000
kubernetes_statefulset,namespace=default,selector_select1=s1,statefulset_name=etcd replicas_updated=3i,spec_replicas=3i,observed_generation=1i,created=1544101669000000000i,generation=1i,replicas=3i,replicas_current=3i,replicas_ready=3i 1547597616000000000