feat(inputs.couchbase): Add failover metrics (#13825)

This commit is contained in:
Joshua Powers 2023-08-29 08:02:44 -06:00 committed by GitHub
parent 1d24efe55c
commit fb45a1d98a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 136 additions and 11 deletions

View File

@ -43,10 +43,14 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
## Whether to collect cluster-wide bucket statistics ## Whether to collect cluster-wide bucket statistics
## It is recommended to disable this in favor of node_stats ## It is recommended to disable this in favor of node_stats
## to get a better view of the cluster. ## to get a better view of the cluster.
cluster_bucket_stats = true # cluster_bucket_stats = true
## Whether to collect bucket stats for each individual node ## Whether to collect bucket stats for each individual node
node_bucket_stats = false # node_bucket_stats = false
## List of additional stats to collect, choose from:
## * autofailover
# additional_stats = []
``` ```
## Metrics ## Metrics
@ -65,6 +69,21 @@ Fields:
- memory_free (unit: bytes, example: 23181365248.0) - memory_free (unit: bytes, example: 23181365248.0)
- memory_total (unit: bytes, example: 64424656896.0) - memory_total (unit: bytes, example: 64424656896.0)
### couchbase_autofailover
Tags:
- cluster: sanitized string from `servers` configuration field
e.g.: `http://user:password@couchbase-0.example.com:8091/endpoint` becomes
`http://couchbase-0.example.com:8091/endpoint`
Fields:
- count (unit: int, example: 1)
- enabled (unit: bool, example: true)
- max_count (unit: int, example: 2)
- timeout (unit: int, example: 72)
### couchbase_bucket and couchbase_node_bucket ### couchbase_bucket and couchbase_node_bucket
Tags: Tags:

View File

@ -4,6 +4,7 @@ package couchbase
import ( import (
_ "embed" _ "embed"
"encoding/json" "encoding/json"
"fmt"
"net/http" "net/http"
"regexp" "regexp"
"sync" "sync"
@ -13,6 +14,7 @@ import (
"github.com/influxdata/telegraf" "github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/filter" "github.com/influxdata/telegraf/filter"
"github.com/influxdata/telegraf/internal/choice"
"github.com/influxdata/telegraf/plugins/common/tls" "github.com/influxdata/telegraf/plugins/common/tls"
"github.com/influxdata/telegraf/plugins/inputs" "github.com/influxdata/telegraf/plugins/inputs"
) )
@ -21,12 +23,11 @@ import (
var sampleConfig string var sampleConfig string
type Couchbase struct { type Couchbase struct {
Servers []string Servers []string `toml:"servers"`
BucketStatsIncluded []string `toml:"bucket_stats_included"` BucketStatsIncluded []string `toml:"bucket_stats_included"`
ClusterBucketStats bool `toml:"cluster_bucket_stats"` ClusterBucketStats bool `toml:"cluster_bucket_stats"`
NodeBucketStats bool `toml:"node_bucket_stats"` NodeBucketStats bool `toml:"node_bucket_stats"`
AdditionalStats []string `toml:"additional_stats"`
bucketInclude filter.Filter bucketInclude filter.Filter
client *http.Client client *http.Client
@ -34,6 +35,13 @@ type Couchbase struct {
tls.ClientConfig tls.ClientConfig
} }
type autoFailover struct {
Count int `json:"count"`
Enabled bool `json:"enabled"`
MaxCount int `json:"maxCount"`
Timeout int `json:"timeout"`
}
var regexpURI = regexp.MustCompile(`(\S+://)?(\S+\:\S+@)`) var regexpURI = regexp.MustCompile(`(\S+://)?(\S+\:\S+@)`)
func (*Couchbase) SampleConfig() string { func (*Couchbase) SampleConfig() string {
@ -87,9 +95,8 @@ func (cb *Couchbase) gatherServer(acc telegraf.Accumulator, addr string) error {
acc.AddFields("couchbase_node", fields, tags) acc.AddFields("couchbase_node", fields, tags)
} }
for name, bucket := range pool.BucketMap {
cluster := regexpURI.ReplaceAllString(addr, "${1}") cluster := regexpURI.ReplaceAllString(addr, "${1}")
for name, bucket := range pool.BucketMap {
if cb.ClusterBucketStats { if cb.ClusterBucketStats {
fields := cb.basicBucketStats(bucket.BasicStats) fields := cb.basicBucketStats(bucket.BasicStats)
tags := map[string]string{"cluster": cluster, "bucket": name} tags := map[string]string{"cluster": cluster, "bucket": name}
@ -117,9 +124,49 @@ func (cb *Couchbase) gatherServer(acc telegraf.Accumulator, addr string) error {
} }
} }
if choice.Contains("autofailover", cb.AdditionalStats) {
tags := map[string]string{"cluster": cluster}
fields, err := cb.gatherAutoFailoverStats(addr)
if err != nil {
return fmt.Errorf("unable to collect autofailover settings: %w", err)
}
acc.AddFields("couchbase_autofailover", fields, tags)
}
return nil return nil
} }
func (cb *Couchbase) gatherAutoFailoverStats(server string) (map[string]any, error) {
var fields map[string]any
url := server + "/settings/autoFailover"
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return fields, err
}
r, err := cb.client.Do(req)
if err != nil {
return fields, err
}
defer r.Body.Close()
var stats autoFailover
if err := json.NewDecoder(r.Body).Decode(&stats); err != nil {
return fields, err
}
fields = map[string]any{
"count": stats.Count,
"enabled": stats.Enabled,
"max_count": stats.MaxCount,
"timeout": stats.Timeout,
}
return fields, nil
}
// basicBucketStats gets the basic bucket statistics // basicBucketStats gets the basic bucket statistics
func (cb *Couchbase) basicBucketStats(basicStats map[string]interface{}) map[string]interface{} { func (cb *Couchbase) basicBucketStats(basicStats map[string]interface{}) map[string]interface{} {
fields := make(map[string]interface{}) fields := make(map[string]interface{})

View File

@ -171,6 +171,50 @@ func TestGatherNodeOnly(t *testing.T) {
acc.AssertDoesNotContainMeasurement(t, "couchbase_bucket") acc.AssertDoesNotContainMeasurement(t, "couchbase_bucket")
} }
func TestGatherFailover(t *testing.T) {
faker := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/pools":
_, _ = w.Write(readJSON(t, "testdata/pools_response.json"))
case "/pools/default":
_, _ = w.Write(readJSON(t, "testdata/pools_default_response.json"))
case "/pools/default/buckets":
_, _ = w.Write(readJSON(t, "testdata/bucket_response.json"))
case "/settings/autoFailover":
_, _ = w.Write(readJSON(t, "testdata/settings_autofailover.json"))
default:
w.WriteHeader(http.StatusNotFound)
}
}))
cb := Couchbase{
Servers: []string{faker.URL},
ClusterBucketStats: false,
NodeBucketStats: false,
AdditionalStats: []string{"autofailover"},
}
require.NoError(t, cb.Init())
var acc testutil.Accumulator
require.NoError(t, cb.gatherServer(&acc, faker.URL))
require.Equal(t, 0, len(acc.Errors))
require.Equal(t, 8, len(acc.Metrics))
var metric *testutil.Metric
for _, m := range acc.Metrics {
if m.Measurement == "couchbase_autofailover" {
metric = m
break
}
}
require.NotNil(t, metric)
require.Equal(t, 1, metric.Fields["count"])
require.Equal(t, true, metric.Fields["enabled"])
require.Equal(t, 2, metric.Fields["max_count"])
require.Equal(t, 72, metric.Fields["timeout"])
}
func readJSON(t *testing.T, jsonFilePath string) []byte { func readJSON(t *testing.T, jsonFilePath string) []byte {
data, err := os.ReadFile(jsonFilePath) data, err := os.ReadFile(jsonFilePath)
require.NoErrorf(t, err, "could not read from data file %s", jsonFilePath) require.NoErrorf(t, err, "could not read from data file %s", jsonFilePath)

View File

@ -25,7 +25,11 @@
## Whether to collect cluster-wide bucket statistics ## Whether to collect cluster-wide bucket statistics
## It is recommended to disable this in favor of node_stats ## It is recommended to disable this in favor of node_stats
## to get a better view of the cluster. ## to get a better view of the cluster.
cluster_bucket_stats = true # cluster_bucket_stats = true
## Whether to collect bucket stats for each individual node ## Whether to collect bucket stats for each individual node
node_bucket_stats = false # node_bucket_stats = false
## List of additional stats to collect, choose from:
## * autofailover
# additional_stats = []

View File

@ -0,0 +1,11 @@
{
"enabled": true,
"timeout": 72,
"count": 1,
"failoverOnDataDiskIssues": {
"enabled": true,
"timePeriod": 89
},
"maxCount": 2,
"canAbortRebalance": true
}