Add configurable number of 'most recent' date-stamped indices to gather in Elasticsearch input (#8543)

Add configurable number of 'most recent' date-stamped indices to gather in the Elasticsearch input plugin, and allow wildcards to account for date-suffixed index names. Configuring '3' for num_most_recent_indices will only gather the 3 latest indices, based on the date or number they end with. Finding the date or number is dependent on the targeted indices being configured with wildcards at the end of their 'base' names.
2020-12-21 11:45:19 -05:00 · 2020-12-21 11:45:19 -05:00 · c47fcf6626
parent a7dff56dde
commit c47fcf6626
4 changed files with 2206 additions and 47 deletions
--- a/plugins/inputs/elasticsearch/README.md
+++ b/plugins/inputs/elasticsearch/README.md
@ -53,6 +53,7 @@ Note that specific statistics information can change between Elasticsearch versi
  cluster_stats_only_from_master = true
  ## Indices to collect; can be one or more indices names or _all
  ## Use of wildcards is allowed. Use a wildcard at the end to retrieve index names that end with a changing value, like a date.
  indices_include = ["_all"]
  ## One of "shards", "cluster", "indices"
@ -74,6 +75,10 @@ Note that specific statistics information can change between Elasticsearch versi
  # tls_key = "/etc/telegraf/key.pem"
  ## Use TLS but skip chain & host verification
  # insecure_skip_verify = false
  ## Sets the number of most recent indices to return for indices that are configured with a date-stamped suffix.
  ## Each 'indices_include' entry ending with a wildcard (*) or glob matching pattern will group together all indices that match it, and ## sort them by the date or number after the wildcard. Metrics then are gathered for only the 'num_most_recent_indices' amount of most ## recent indices.
  # num_most_recent_indices = 0
 ```
 ### Metrics
--- a/plugins/inputs/elasticsearch/elasticsearch.go
+++ b/plugins/inputs/elasticsearch/elasticsearch.go
@ -12,6 +12,7 @@ import (
 	"time"
 	"github.com/influxdata/telegraf"
 	"github.com/influxdata/telegraf/filter"
 	"github.com/influxdata/telegraf/internal"
 	"github.com/influxdata/telegraf/plugins/common/tls"
 	"github.com/influxdata/telegraf/plugins/inputs"
@ -115,6 +116,7 @@ const sampleConfig = `
  cluster_stats_only_from_master = true
  ## Indices to collect; can be one or more indices names or _all
  ## Use of wildcards is allowed. Use a wildcard at the end to retrieve index names that end with a changing value, like a date.
  indices_include = ["_all"]
  ## One of "shards", "cluster", "indices"
@ -135,6 +137,11 @@ const sampleConfig = `
  # tls_key = "/etc/telegraf/key.pem"
  ## Use TLS but skip chain & host verification
  # insecure_skip_verify = false
  ## Sets the number of most recent indices to return for indices that are configured with a date-stamped suffix.
  ## Each 'indices_include' entry ending with a wildcard (*) or glob matching pattern will group together all indices that match it, and sort them
  ## by the date or number after the wildcard. Metrics then are gathered for only the 'num_most_recent_indices' amount of most recent indices.
  # num_most_recent_indices = 0
 `
 // Elasticsearch is a plugin to read stats from one or many Elasticsearch
@ -152,11 +159,14 @@ type Elasticsearch struct {
 	NodeStats                  []string          `toml:"node_stats"`
 	Username                   string            `toml:"username"`
 	Password                   string            `toml:"password"`
 	NumMostRecentIndices       int               `toml:"num_most_recent_indices"`
 	tls.ClientConfig
 	client          *http.Client
 	serverInfo      map[string]serverInfo
 	serverInfoMutex sync.Mutex
 	indexMatchers   map[string]filter.Filter
 }
 type serverInfo struct {
 	nodeID   string
@ -214,6 +224,19 @@ func (e *Elasticsearch) Description() string {
 	return "Read stats from one or more Elasticsearch servers or clusters"
 }
 // Init the plugin.
 func (e *Elasticsearch) Init() error {
 	// Compile the configured indexes to match for sorting.
 	indexMatchers, err := e.compileIndexMatchers()
 	if err != nil {
 		return err
 	}
 	e.indexMatchers = indexMatchers
 	return nil
 }
 // Gather reads the stats from Elasticsearch and writes it to the
 // Accumulator.
 func (e *Elasticsearch) Gather(acc telegraf.Accumulator) error {
@ -527,66 +550,135 @@ func (e *Elasticsearch) gatherIndicesStats(url string, acc telegraf.Accumulator)
 		acc.AddFields("elasticsearch_indices_stats_"+m, jsonParser.Fields, map[string]string{"index_name": "_all"}, now)
 	}
-	// Individual Indices stats
+	// Gather stats for each index.
-	for id, index := range indicesStats.Indices {
+	err := e.gatherIndividualIndicesStats(indicesStats.Indices, now, acc)
-		indexTag := map[string]string{"index_name": id}
+
-		stats := map[string]interface{}{
+	return err
-			"primaries": index.Primaries,
+}
-			"total":     index.Total,
+
 // gatherSortedIndicesStats gathers stats for all indices in no particular order.
 func (e *Elasticsearch) gatherIndividualIndicesStats(indices map[string]indexStat, now time.Time, acc telegraf.Accumulator) error {
 	// Sort indices into buckets based on their configured prefix, if any matches.
 	categorizedIndexNames, err := e.categorizeIndices(indices)
 	if err != nil {
 		return err
 	}
 	for _, matchingIndices := range categorizedIndexNames {
 		// Establish the number of each category of indices to use. User can configure to use only the latest 'X' amount.
 		indicesCount := len(matchingIndices)
 		indicesToTrackCount := indicesCount
 		// Sort the indices if configured to do so.
 		if e.NumMostRecentIndices > 0 {
 			if e.NumMostRecentIndices < indicesToTrackCount {
 				indicesToTrackCount = e.NumMostRecentIndices
 			}
 			sort.Strings(matchingIndices)
 		}
-		for m, s := range stats {
+
-			f := jsonparser.JSONFlattener{}
+		// Gather only the number of indexes that have been configured, in descending order (most recent, if date-stamped).
-			// parse Json, getting strings and bools
+		for i := indicesCount - 1; i >= indicesCount-indicesToTrackCount; i-- {
-			err := f.FullFlattenJSON("", s, true, true)
+			indexName := matchingIndices[i]
 			err := e.gatherSingleIndexStats(indexName, indices[indexName], now, acc)
 			if err != nil {
 				return err
 			}
-			acc.AddFields("elasticsearch_indices_stats_"+m, f.Fields, indexTag, now)
+		}
 	}
 	return nil
 }
 func (e *Elasticsearch) categorizeIndices(indices map[string]indexStat) (map[string][]string, error) {
 	categorizedIndexNames := map[string][]string{}
 	// If all indices are configured to be gathered, bucket them all together.
 	if len(e.IndicesInclude) == 0 || e.IndicesInclude[0] == "_all" {
 		for indexName := range indices {
 			categorizedIndexNames["_all"] = append(categorizedIndexNames["_all"], indexName)
 		}
-		if e.IndicesLevel == "shards" {
+		return categorizedIndexNames, nil
-			for shardNumber, shards := range index.Shards {
+	}
 				for _, shard := range shards {
-					// Get Shard Stats
+	// Bucket each returned index with its associated configured index (if any match).
-					flattened := jsonparser.JSONFlattener{}
+	for indexName := range indices {
-					err := flattened.FullFlattenJSON("", shard, true, true)
+		match := indexName
-					if err != nil {
+		for name, matcher := range e.indexMatchers {
-						return err
+			// If a configured index matches one of the returned indexes, mark it as a match.
-					}
+			if matcher.Match(match) {
 				match = name
 				break
 			}
 		}
-					// determine shard tag and primary/replica designation
+		// Bucket all matching indices together for sorting.
-					shardType := "replica"
+		categorizedIndexNames[match] = append(categorizedIndexNames[match], indexName)
-					if flattened.Fields["routing_primary"] == true {
+	}
 						shardType = "primary"
 					}
 					delete(flattened.Fields, "routing_primary")
-					routingState, ok := flattened.Fields["routing_state"].(string)
+	return categorizedIndexNames, nil
-					if ok {
+}
 						flattened.Fields["routing_state"] = mapShardStatusToCode(routingState)
 					}
-					routingNode, _ := flattened.Fields["routing_node"].(string)
+func (e *Elasticsearch) gatherSingleIndexStats(name string, index indexStat, now time.Time, acc telegraf.Accumulator) error {
-					shardTags := map[string]string{
+	indexTag := map[string]string{"index_name": name}
-						"index_name": id,
+	stats := map[string]interface{}{
-						"node_id":    routingNode,
+		"primaries": index.Primaries,
-						"shard_name": string(shardNumber),
+		"total":     index.Total,
-						"type":       shardType,
+	}
-					}
+	for m, s := range stats {
 		f := jsonparser.JSONFlattener{}
 		// parse Json, getting strings and bools
 		err := f.FullFlattenJSON("", s, true, true)
 		if err != nil {
 			return err
 		}
 		acc.AddFields("elasticsearch_indices_stats_"+m, f.Fields, indexTag, now)
 	}
-					for key, field := range flattened.Fields {
+	if e.IndicesLevel == "shards" {
-						switch field.(type) {
+		for shardNumber, shards := range index.Shards {
-						case string, bool:
+			for _, shard := range shards {
 							delete(flattened.Fields, key)
 						}
 					}
-					acc.AddFields("elasticsearch_indices_stats_shards",
+				// Get Shard Stats
-						flattened.Fields,
+				flattened := jsonparser.JSONFlattener{}
-						shardTags,
+				err := flattened.FullFlattenJSON("", shard, true, true)
-						now)
+				if err != nil {
 					return err
 				}
 				// determine shard tag and primary/replica designation
 				shardType := "replica"
 				if flattened.Fields["routing_primary"] == true {
 					shardType = "primary"
 				}
 				delete(flattened.Fields, "routing_primary")
 				routingState, ok := flattened.Fields["routing_state"].(string)
 				if ok {
 					flattened.Fields["routing_state"] = mapShardStatusToCode(routingState)
 				}
 				routingNode, _ := flattened.Fields["routing_node"].(string)
 				shardTags := map[string]string{
 					"index_name": name,
 					"node_id":    routingNode,
 					"shard_name": string(shardNumber),
 					"type":       shardType,
 				}
 				for key, field := range flattened.Fields {
 					switch field.(type) {
 					case string, bool:
 						delete(flattened.Fields, key)
 					}
 				}
 				acc.AddFields("elasticsearch_indices_stats_shards",
 					flattened.Fields,
 					shardTags,
 					now)
 			}
 		}
 	}
@ -656,6 +748,23 @@ func (e *Elasticsearch) gatherJSONData(url string, v interface{}) error {
 	return nil
 }
 func (e *Elasticsearch) compileIndexMatchers() (map[string]filter.Filter, error) {
 	indexMatchers := map[string]filter.Filter{}
 	var err error
 	// Compile each configured index into a glob matcher.
 	for _, configuredIndex := range e.IndicesInclude {
 		if _, exists := indexMatchers[configuredIndex]; !exists {
 			indexMatchers[configuredIndex], err = filter.Compile([]string{configuredIndex})
 			if err != nil {
 				return nil, err
 			}
 		}
 	}
 	return indexMatchers, nil
 }
 func init() {
 	inputs.Add("elasticsearch", func() telegraf.Input {
 		return NewElasticsearch()
--- a/plugins/inputs/elasticsearch/elasticsearch_test.go
+++ b/plugins/inputs/elasticsearch/elasticsearch_test.go
@ -310,6 +310,49 @@ func TestGatherClusterIndicesStats(t *testing.T) {
 		map[string]string{"index_name": "twitter"})
 }
 func TestGatherDateStampedIndicesStats(t *testing.T) {
 	es := newElasticsearchWithClient()
 	es.IndicesInclude = []string{"twitter*", "influx*", "penguins"}
 	es.NumMostRecentIndices = 2
 	es.Servers = []string{"http://example.com:9200"}
 	es.client.Transport = newTransportMock(http.StatusOK, dateStampedIndicesResponse)
 	es.serverInfo = make(map[string]serverInfo)
 	es.serverInfo["http://example.com:9200"] = defaultServerInfo()
 	es.Init()
 	var acc testutil.Accumulator
 	if err := es.gatherIndicesStats(es.Servers[0]+"/"+strings.Join(es.IndicesInclude, ",")+"/_stats", &acc); err != nil {
 		t.Fatal(err)
 	}
 	// includes 2 most recent indices for "twitter", only expect the most recent two.
 	acc.AssertContainsTaggedFields(t, "elasticsearch_indices_stats_primaries",
 		clusterIndicesExpected,
 		map[string]string{"index_name": "twitter_2020_08_02"})
 	acc.AssertContainsTaggedFields(t, "elasticsearch_indices_stats_primaries",
 		clusterIndicesExpected,
 		map[string]string{"index_name": "twitter_2020_08_01"})
 	acc.AssertDoesNotContainsTaggedFields(t, "elasticsearch_indices_stats_primaries",
 		clusterIndicesExpected,
 		map[string]string{"index_name": "twitter_2020_07_31"})
 	// includes 2 most recent indices for "influx", only expect the most recent two.
 	acc.AssertContainsTaggedFields(t, "elasticsearch_indices_stats_primaries",
 		clusterIndicesExpected,
 		map[string]string{"index_name": "influx2021.01.02"})
 	acc.AssertContainsTaggedFields(t, "elasticsearch_indices_stats_primaries",
 		clusterIndicesExpected,
 		map[string]string{"index_name": "influx2021.01.01"})
 	acc.AssertDoesNotContainsTaggedFields(t, "elasticsearch_indices_stats_primaries",
 		clusterIndicesExpected,
 		map[string]string{"index_name": "influx2020.12.31"})
 	// not configured to sort the 'penguins' index, but ensure it is also included.
 	acc.AssertContainsTaggedFields(t, "elasticsearch_indices_stats_primaries",
 		clusterIndicesExpected,
 		map[string]string{"index_name": "penguins"})
 }
 func TestGatherClusterIndiceShardsStats(t *testing.T) {
 	es := newElasticsearchWithClient()
 	es.IndicesLevel = "shards"
--- a/plugins/inputs/elasticsearch/testdata_test.go
+++ b/plugins/inputs/elasticsearch/testdata_test.go