2015-06-20 20:38:01 +08:00
package prometheus
import (
2018-11-06 05:30:16 +08:00
"context"
2015-06-20 20:38:01 +08:00
"errors"
"fmt"
2016-03-02 00:12:23 +08:00
"io/ioutil"
2017-09-19 06:06:11 +08:00
"net"
2015-06-20 20:38:01 +08:00
"net/http"
2017-09-19 06:06:11 +08:00
"net/url"
2021-03-09 00:00:56 +08:00
"os"
"strings"
2015-06-20 20:38:01 +08:00
"sync"
2016-03-01 00:52:58 +08:00
"time"
2017-03-30 06:04:29 +08:00
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
2020-06-26 02:44:22 +08:00
"github.com/influxdata/telegraf/plugins/common/tls"
2017-03-30 06:04:29 +08:00
"github.com/influxdata/telegraf/plugins/inputs"
2020-12-03 03:48:44 +08:00
parser_v2 "github.com/influxdata/telegraf/plugins/parsers/prometheus"
2021-03-09 00:00:56 +08:00
"github.com/kubernetes/apimachinery/pkg/fields"
"github.com/kubernetes/apimachinery/pkg/labels"
2015-06-20 20:38:01 +08:00
)
2019-12-04 03:47:31 +08:00
const acceptHeader = ` application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited;q=0.7,text/plain;version=0.0.4;q=0.3,*/*;q=0.1 `
2016-07-07 18:15:47 +08:00
2015-06-20 20:38:01 +08:00
type Prometheus struct {
2017-09-19 06:06:11 +08:00
// An array of urls to scrape metrics from.
2018-02-06 03:16:00 +08:00
URLs [ ] string ` toml:"urls" `
2016-03-18 03:17:48 +08:00
2017-09-19 06:06:11 +08:00
// An array of Kubernetes services to scrape metrics from.
KubernetesServices [ ] string
2018-11-06 05:30:16 +08:00
// Location of kubernetes config file
KubeConfig string
2020-03-03 10:51:31 +08:00
// Label Selector/s for Kubernetes
KubernetesLabelSelector string ` toml:"kubernetes_label_selector" `
// Field Selector/s for Kubernetes
KubernetesFieldSelector string ` toml:"kubernetes_field_selector" `
2016-03-18 03:17:48 +08:00
// Bearer Token authorization file path
2019-01-16 07:25:26 +08:00
BearerToken string ` toml:"bearer_token" `
BearerTokenString string ` toml:"bearer_token_string" `
2016-06-23 15:59:44 +08:00
2019-07-03 02:14:48 +08:00
// Basic authentication credentials
Username string ` toml:"username" `
Password string ` toml:"password" `
2016-11-08 00:34:02 +08:00
ResponseTimeout internal . Duration ` toml:"response_timeout" `
2019-11-21 12:53:57 +08:00
MetricVersion int ` toml:"metric_version" `
URLTag string ` toml:"url_tag" `
2018-05-05 07:33:23 +08:00
tls . ClientConfig
2017-05-10 07:20:43 +08:00
2019-09-24 06:39:50 +08:00
Log telegraf . Logger
2017-05-10 07:20:43 +08:00
client * http . Client
2018-11-06 05:30:16 +08:00
// Should we scrape Kubernetes services for prometheus annotations
2021-03-09 00:00:56 +08:00
MonitorPods bool ` toml:"monitor_kubernetes_pods" `
PodScrapeScope string ` toml:"pod_scrape_scope" `
NodeIP string ` toml:"node_ip" `
PodScrapeInterval int ` toml:"pod_scrape_interval" `
PodNamespace string ` toml:"monitor_kubernetes_pods_namespace" `
lock sync . Mutex
kubernetesPods map [ string ] URLAndAddress
cancel context . CancelFunc
wg sync . WaitGroup
// Only for monitor_kubernetes_pods=true and pod_scrape_scope="node"
podLabelSelector labels . Selector
podFieldSelector fields . Selector
nodeIP string
isNodeScrapeScope bool
2015-06-20 20:38:01 +08:00
}
var sampleConfig = `
2016-02-19 05:26:51 +08:00
# # An array of urls to scrape metrics from .
2015-10-16 05:53:29 +08:00
urls = [ "http://localhost:9100/metrics" ]
2016-03-18 03:17:48 +08:00
2019-11-27 07:46:31 +08:00
# # Metric version controls the mapping from Prometheus metrics into
# # Telegraf metrics . When using the prometheus_client output , use the same
# # value in both plugins to ensure metrics are round - tripped without
# # modification .
# #
# # example : metric_version = 1 ; deprecated in 1.13
# # metric_version = 2 ; recommended version
# metric_version = 1
2019-11-21 12:53:57 +08:00
# # Url tag name ( tag containing scrapped url . optional , default is "url" )
# url_tag = "scrapeUrl"
2017-09-19 06:06:11 +08:00
# # An array of Kubernetes services to scrape metrics from .
2017-09-19 07:21:45 +08:00
# kubernetes_services = [ "http://my-service-dns.my-namespace:9100/metrics" ]
2017-09-19 06:06:11 +08:00
2018-11-06 05:30:16 +08:00
# # Kubernetes config file to create client from .
# kube_config = "/path/to/kubernetes.config"
# # Scrape Kubernetes pods for the following prometheus annotations :
# # - prometheus . io / scrape : Enable scraping for this pod
# # - prometheus . io / scheme : If the metrics endpoint is secured then you will need to
# # set this to ' https ' & most likely set the tls config .
# # - prometheus . io / path : If the metrics path is not / metrics , define it with this annotation .
# # - prometheus . io / port : If port is not 9102 use this annotation
# monitor_kubernetes_pods = true
2021-03-09 00:00:56 +08:00
# # Get the list of pods to scrape with either the scope of
# # - cluster : the kubernetes watch api ( default , no need to specify )
# # - node : the local cadvisor api ; for scalability . Note that the config node_ip or the environment variable NODE_IP must be set to the host IP .
# pod_scrape_scope = "cluster"
# # Only for node scrape scope : node IP of the node that telegraf is running on .
# # Either this config or the environment variable NODE_IP must be set .
# node_ip = "10.180.1.1"
# # Only for node scrape scope : interval in seconds for how often to get updated pod list for scraping .
# # Default is 60 seconds .
# pod_scrape_interval = 60
2019-04-11 05:52:46 +08:00
# # Restricts Kubernetes monitoring to a single namespace
# # ex : monitor_kubernetes_pods_namespace = "default"
# monitor_kubernetes_pods_namespace = ""
2020-03-03 10:51:31 +08:00
# label selector to target pods which have the label
# kubernetes_label_selector = "env=dev,app=nginx"
# field selector to target pods
# eg . To scrape pods on a specific node
# kubernetes_field_selector = "spec.nodeName=$HOSTNAME"
2018-11-06 05:30:16 +08:00
2019-01-16 07:25:26 +08:00
# # Use bearer token for authorization . ( ' bearer_token ' takes priority )
# bearer_token = "/path/to/bearer/token"
# # OR
# bearer_token_string = "abc_123"
2016-06-23 15:59:44 +08:00
2019-07-03 02:14:48 +08:00
# # HTTP Basic Authentication username and password . ( ' bearer_token ' and
# # ' bearer_token_string ' take priority )
# username = ""
# password = ""
2019-11-27 07:46:31 +08:00
# # Specify timeout duration for slower prometheus clients ( default is 3 s )
2016-11-08 00:34:02 +08:00
# response_timeout = "3s"
2018-05-05 07:33:23 +08:00
# # Optional TLS Config
# tls_ca = / path / to / cafile
# tls_cert = / path / to / certfile
# tls_key = / path / to / keyfile
# # Use TLS but skip chain & host verification
2016-06-23 15:59:44 +08:00
# insecure_skip_verify = false
2015-08-26 23:21:39 +08:00
`
2015-06-20 20:38:01 +08:00
2016-03-18 03:17:48 +08:00
func ( p * Prometheus ) SampleConfig ( ) string {
2015-06-20 20:38:01 +08:00
return sampleConfig
}
2016-03-18 03:17:48 +08:00
func ( p * Prometheus ) Description ( ) string {
2015-06-20 20:38:01 +08:00
return "Read metrics from one or many prometheus clients"
}
2019-11-27 07:46:31 +08:00
func ( p * Prometheus ) Init ( ) error {
if p . MetricVersion != 2 {
p . Log . Warnf ( "Use of deprecated configuration: 'metric_version = 1'; please update to 'metric_version = 2'" )
}
2020-03-03 10:51:31 +08:00
2021-03-09 00:00:56 +08:00
// Config proccessing for node scrape scope for monitor_kubernetes_pods
p . isNodeScrapeScope = strings . EqualFold ( p . PodScrapeScope , "node" )
if p . isNodeScrapeScope {
// Need node IP to make cAdvisor call for pod list. Check if set in config and valid IP address
if p . NodeIP == "" || net . ParseIP ( p . NodeIP ) == nil {
p . Log . Infof ( "The config node_ip is empty or invalid. Using NODE_IP env var as default." )
// Check if set as env var and is valid IP address
envVarNodeIP := os . Getenv ( "NODE_IP" )
if envVarNodeIP == "" || net . ParseIP ( envVarNodeIP ) == nil {
errorMessage := "The node_ip config and the environment variable NODE_IP are not set or invalid. Cannot get pod list for monitor_kubernetes_pods using node scrape scope"
return errors . New ( errorMessage )
}
p . NodeIP = envVarNodeIP
}
// Parse label and field selectors - will be used to filter pods after cAdvisor call
var err error
p . podLabelSelector , err = labels . Parse ( p . KubernetesLabelSelector )
if err != nil {
return fmt . Errorf ( "Error parsing the specified label selector(s): %s" , err . Error ( ) )
}
p . podFieldSelector , err = fields . ParseSelector ( p . KubernetesFieldSelector )
if err != nil {
return fmt . Errorf ( "Error parsing the specified field selector(s): %s" , err . Error ( ) )
}
isValid , invalidSelector := fieldSelectorIsSupported ( p . podFieldSelector )
if ! isValid {
return fmt . Errorf ( "The field selector %s is not supported for pods" , invalidSelector )
}
p . Log . Infof ( "Using pod scrape scope at node level to get pod list using cAdvisor." )
p . Log . Infof ( "Using the label selector: %v and field selector: %v" , p . podLabelSelector , p . podFieldSelector )
}
2019-11-27 07:46:31 +08:00
return nil
}
2015-06-20 20:38:01 +08:00
var ErrProtocolError = errors . New ( "prometheus protocol error" )
2018-02-06 03:16:00 +08:00
func ( p * Prometheus ) AddressToURL ( u * url . URL , address string ) * url . URL {
2017-09-19 06:06:11 +08:00
host := address
if u . Port ( ) != "" {
host = address + ":" + u . Port ( )
}
2018-02-06 03:16:00 +08:00
reconstructedURL := & url . URL {
2017-09-19 06:06:11 +08:00
Scheme : u . Scheme ,
Opaque : u . Opaque ,
User : u . User ,
Path : u . Path ,
RawPath : u . RawPath ,
ForceQuery : u . ForceQuery ,
RawQuery : u . RawQuery ,
Fragment : u . Fragment ,
Host : host ,
}
2018-02-06 03:16:00 +08:00
return reconstructedURL
2017-09-19 06:06:11 +08:00
}
2018-02-06 03:16:00 +08:00
type URLAndAddress struct {
OriginalURL * url . URL
URL * url . URL
2017-09-23 08:26:19 +08:00
Address string
2018-11-06 05:30:16 +08:00
Tags map [ string ] string
2017-09-19 06:06:11 +08:00
}
2019-01-17 07:49:24 +08:00
func ( p * Prometheus ) GetAllURLs ( ) ( map [ string ] URLAndAddress , error ) {
allURLs := make ( map [ string ] URLAndAddress , 0 )
2018-02-06 03:16:00 +08:00
for _ , u := range p . URLs {
URL , err := url . Parse ( u )
if err != nil {
2019-09-24 06:39:50 +08:00
p . Log . Errorf ( "Could not parse %q, skipping it. Error: %s" , u , err . Error ( ) )
2018-02-06 03:16:00 +08:00
continue
}
2019-01-17 07:49:24 +08:00
allURLs [ URL . String ( ) ] = URLAndAddress { URL : URL , OriginalURL : URL }
2017-09-19 06:06:11 +08:00
}
2019-01-17 07:49:24 +08:00
2018-11-06 05:30:16 +08:00
p . lock . Lock ( )
defer p . lock . Unlock ( )
// loop through all pods scraped via the prometheus annotation on the pods
2019-01-17 07:49:24 +08:00
for k , v := range p . kubernetesPods {
allURLs [ k ] = v
}
2018-11-06 05:30:16 +08:00
2017-09-19 06:06:11 +08:00
for _ , service := range p . KubernetesServices {
2018-02-06 03:16:00 +08:00
URL , err := url . Parse ( service )
2017-09-19 06:06:11 +08:00
if err != nil {
return nil , err
}
2018-11-06 05:30:16 +08:00
2018-02-06 03:16:00 +08:00
resolvedAddresses , err := net . LookupHost ( URL . Hostname ( ) )
2017-09-19 06:06:11 +08:00
if err != nil {
2019-09-24 06:39:50 +08:00
p . Log . Errorf ( "Could not resolve %q, skipping it. Error: %s" , URL . Host , err . Error ( ) )
2017-09-19 06:06:11 +08:00
continue
}
for _ , resolved := range resolvedAddresses {
2018-02-06 03:16:00 +08:00
serviceURL := p . AddressToURL ( URL , resolved )
2019-01-17 07:49:24 +08:00
allURLs [ serviceURL . String ( ) ] = URLAndAddress {
URL : serviceURL ,
Address : resolved ,
OriginalURL : URL ,
}
2017-09-19 06:06:11 +08:00
}
}
2018-02-06 03:16:00 +08:00
return allURLs , nil
2017-09-19 06:06:11 +08:00
}
2015-06-20 20:38:01 +08:00
// Reads stats from all configured servers accumulates stats.
// Returns one of the errors encountered while gather stats (if any).
2016-03-18 03:17:48 +08:00
func ( p * Prometheus ) Gather ( acc telegraf . Accumulator ) error {
2017-05-10 07:20:43 +08:00
if p . client == nil {
2018-11-03 08:51:40 +08:00
client , err := p . createHTTPClient ( )
2017-05-10 07:20:43 +08:00
if err != nil {
return err
}
p . client = client
}
2015-06-20 20:38:01 +08:00
var wg sync . WaitGroup
2018-02-06 03:16:00 +08:00
allURLs , err := p . GetAllURLs ( )
2017-09-19 06:06:11 +08:00
if err != nil {
return err
}
2018-02-06 03:16:00 +08:00
for _ , URL := range allURLs {
2015-06-20 20:38:01 +08:00
wg . Add ( 1 )
2018-02-06 03:16:00 +08:00
go func ( serviceURL URLAndAddress ) {
2015-06-20 20:38:01 +08:00
defer wg . Done ( )
2018-02-06 03:16:00 +08:00
acc . AddError ( p . gatherURL ( serviceURL , acc ) )
} ( URL )
2015-06-20 20:38:01 +08:00
}
wg . Wait ( )
2017-04-25 02:13:26 +08:00
return nil
2015-06-20 20:38:01 +08:00
}
2018-11-03 08:51:40 +08:00
func ( p * Prometheus ) createHTTPClient ( ) ( * http . Client , error ) {
2018-05-05 07:33:23 +08:00
tlsCfg , err := p . ClientConfig . TLSConfig ( )
2016-06-23 15:59:44 +08:00
if err != nil {
2017-05-10 07:20:43 +08:00
return nil , err
2016-06-23 15:59:44 +08:00
}
2017-05-10 07:20:43 +08:00
client := & http . Client {
Transport : & http . Transport {
TLSClientConfig : tlsCfg ,
DisableKeepAlives : true ,
} ,
Timeout : p . ResponseTimeout . Duration ,
2016-03-18 03:17:48 +08:00
}
2017-05-10 07:20:43 +08:00
return client , nil
}
2018-02-06 03:16:00 +08:00
func ( p * Prometheus ) gatherURL ( u URLAndAddress , acc telegraf . Accumulator ) error {
2018-11-03 08:51:40 +08:00
var req * http . Request
var err error
var uClient * http . Client
2019-11-21 12:53:57 +08:00
var metrics [ ] telegraf . Metric
2018-11-03 08:51:40 +08:00
if u . URL . Scheme == "unix" {
path := u . URL . Query ( ) . Get ( "path" )
if path == "" {
path = "/metrics"
}
2020-10-08 23:20:35 +08:00
addr := "http://localhost" + path
req , err = http . NewRequest ( "GET" , addr , nil )
if err != nil {
return fmt . Errorf ( "unable to create new request '%s': %s" , addr , err )
}
2018-11-03 08:51:40 +08:00
// ignore error because it's been handled before getting here
tlsCfg , _ := p . ClientConfig . TLSConfig ( )
uClient = & http . Client {
Transport : & http . Transport {
TLSClientConfig : tlsCfg ,
DisableKeepAlives : true ,
Dial : func ( network , addr string ) ( net . Conn , error ) {
c , err := net . Dial ( "unix" , u . URL . Path )
return c , err
} ,
} ,
Timeout : p . ResponseTimeout . Duration ,
}
} else {
if u . URL . Path == "" {
u . URL . Path = "/metrics"
}
req , err = http . NewRequest ( "GET" , u . URL . String ( ) , nil )
2020-10-08 23:20:35 +08:00
if err != nil {
return fmt . Errorf ( "unable to create new request '%s': %s" , u . URL . String ( ) , err )
}
2018-11-03 08:51:40 +08:00
}
2017-05-10 07:20:43 +08:00
req . Header . Add ( "Accept" , acceptHeader )
2016-03-18 03:17:48 +08:00
if p . BearerToken != "" {
2019-01-16 07:25:26 +08:00
token , err := ioutil . ReadFile ( p . BearerToken )
2016-03-18 03:17:48 +08:00
if err != nil {
return err
}
req . Header . Set ( "Authorization" , "Bearer " + string ( token ) )
2019-01-16 07:25:26 +08:00
} else if p . BearerTokenString != "" {
req . Header . Set ( "Authorization" , "Bearer " + p . BearerTokenString )
2019-07-03 02:14:48 +08:00
} else if p . Username != "" || p . Password != "" {
req . SetBasicAuth ( p . Username , p . Password )
2016-03-18 03:17:48 +08:00
}
2018-11-03 08:51:40 +08:00
var resp * http . Response
if u . URL . Scheme != "unix" {
resp , err = p . client . Do ( req )
} else {
resp , err = uClient . Do ( req )
}
2015-06-20 20:38:01 +08:00
if err != nil {
2018-02-06 03:16:00 +08:00
return fmt . Errorf ( "error making HTTP request to %s: %s" , u . URL , err )
2015-06-20 20:38:01 +08:00
}
defer resp . Body . Close ( )
2018-11-03 08:51:40 +08:00
2015-06-20 20:38:01 +08:00
if resp . StatusCode != http . StatusOK {
2018-02-06 03:16:00 +08:00
return fmt . Errorf ( "%s returned HTTP status %s" , u . URL , resp . Status )
2015-06-20 20:38:01 +08:00
}
2016-03-02 00:12:23 +08:00
body , err := ioutil . ReadAll ( resp . Body )
if err != nil {
return fmt . Errorf ( "error reading body: %s" , err )
}
2015-06-20 20:38:01 +08:00
2019-11-21 12:53:57 +08:00
if p . MetricVersion == 2 {
2021-01-08 00:21:09 +08:00
parser := parser_v2 . Parser { Header : resp . Header }
2020-12-03 03:48:44 +08:00
metrics , err = parser . Parse ( body )
2019-11-21 12:53:57 +08:00
} else {
metrics , err = Parse ( body , resp . Header )
}
2016-03-02 00:12:23 +08:00
if err != nil {
2016-07-07 18:15:47 +08:00
return fmt . Errorf ( "error reading metrics for %s: %s" ,
2018-02-06 03:16:00 +08:00
u . URL , err )
2016-03-02 00:12:23 +08:00
}
2018-11-03 08:51:40 +08:00
2016-03-02 00:12:23 +08:00
for _ , metric := range metrics {
tags := metric . Tags ( )
2018-02-06 03:16:00 +08:00
// strip user and password from URL
u . OriginalURL . User = nil
2019-11-27 07:46:31 +08:00
if p . URLTag != "" {
tags [ p . URLTag ] = u . OriginalURL . String ( )
}
2018-02-06 03:16:00 +08:00
if u . Address != "" {
tags [ "address" ] = u . Address
2017-09-19 06:06:11 +08:00
}
2018-11-06 05:30:16 +08:00
for k , v := range u . Tags {
tags [ k ] = v
}
2017-10-19 05:51:08 +08:00
switch metric . Type ( ) {
case telegraf . Counter :
acc . AddCounter ( metric . Name ( ) , metric . Fields ( ) , tags , metric . Time ( ) )
case telegraf . Gauge :
acc . AddGauge ( metric . Name ( ) , metric . Fields ( ) , tags , metric . Time ( ) )
2017-10-25 07:28:52 +08:00
case telegraf . Summary :
acc . AddSummary ( metric . Name ( ) , metric . Fields ( ) , tags , metric . Time ( ) )
case telegraf . Histogram :
acc . AddHistogram ( metric . Name ( ) , metric . Fields ( ) , tags , metric . Time ( ) )
2017-10-19 05:51:08 +08:00
default :
acc . AddFields ( metric . Name ( ) , metric . Fields ( ) , tags , metric . Time ( ) )
}
2015-06-20 20:38:01 +08:00
}
2015-10-23 00:17:57 +08:00
2015-06-20 20:38:01 +08:00
return nil
}
2021-03-09 00:00:56 +08:00
/ * Check if the field selector specified is valid .
* See ToSelectableFields ( ) for list of fields that are selectable :
* https : //github.com/kubernetes/kubernetes/release-1.20/pkg/registry/core/pod/strategy.go
* /
func fieldSelectorIsSupported ( fieldSelector fields . Selector ) ( bool , string ) {
supportedFieldsToSelect := map [ string ] bool {
"spec.nodeName" : true ,
"spec.restartPolicy" : true ,
"spec.schedulerName" : true ,
"spec.serviceAccountName" : true ,
"status.phase" : true ,
"status.podIP" : true ,
"status.nominatedNodeName" : true ,
}
for _ , requirement := range fieldSelector . Requirements ( ) {
if ! supportedFieldsToSelect [ requirement . Field ] {
return false , requirement . Field
}
}
return true , ""
}
2018-11-06 05:30:16 +08:00
// Start will start the Kubernetes scraping if enabled in the configuration
func ( p * Prometheus ) Start ( a telegraf . Accumulator ) error {
if p . MonitorPods {
var ctx context . Context
ctx , p . cancel = context . WithCancel ( context . Background ( ) )
return p . start ( ctx )
}
return nil
}
func ( p * Prometheus ) Stop ( ) {
2018-12-15 06:34:05 +08:00
if p . MonitorPods {
p . cancel ( )
}
2018-11-06 05:30:16 +08:00
p . wg . Wait ( )
}
2015-06-20 20:38:01 +08:00
func init ( ) {
2016-01-28 05:21:36 +08:00
inputs . Add ( "prometheus" , func ( ) telegraf . Input {
2019-01-17 07:49:24 +08:00
return & Prometheus {
ResponseTimeout : internal . Duration { Duration : time . Second * 3 } ,
kubernetesPods : map [ string ] URLAndAddress { } ,
2019-11-21 12:53:57 +08:00
URLTag : "url" ,
2019-01-17 07:49:24 +08:00
}
2015-06-20 20:38:01 +08:00
} )
}