feat(inputs.couchbase): Add failover metrics (#13825)
This commit is contained in:
parent
1d24efe55c
commit
fb45a1d98a
|
|
@ -43,10 +43,14 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
|
||||||
## Whether to collect cluster-wide bucket statistics
|
## Whether to collect cluster-wide bucket statistics
|
||||||
## It is recommended to disable this in favor of node_stats
|
## It is recommended to disable this in favor of node_stats
|
||||||
## to get a better view of the cluster.
|
## to get a better view of the cluster.
|
||||||
cluster_bucket_stats = true
|
# cluster_bucket_stats = true
|
||||||
|
|
||||||
## Whether to collect bucket stats for each individual node
|
## Whether to collect bucket stats for each individual node
|
||||||
node_bucket_stats = false
|
# node_bucket_stats = false
|
||||||
|
|
||||||
|
## List of additional stats to collect, choose from:
|
||||||
|
## * autofailover
|
||||||
|
# additional_stats = []
|
||||||
```
|
```
|
||||||
|
|
||||||
## Metrics
|
## Metrics
|
||||||
|
|
@ -65,6 +69,21 @@ Fields:
|
||||||
- memory_free (unit: bytes, example: 23181365248.0)
|
- memory_free (unit: bytes, example: 23181365248.0)
|
||||||
- memory_total (unit: bytes, example: 64424656896.0)
|
- memory_total (unit: bytes, example: 64424656896.0)
|
||||||
|
|
||||||
|
### couchbase_autofailover
|
||||||
|
|
||||||
|
Tags:
|
||||||
|
|
||||||
|
- cluster: sanitized string from `servers` configuration field
|
||||||
|
e.g.: `http://user:password@couchbase-0.example.com:8091/endpoint` becomes
|
||||||
|
`http://couchbase-0.example.com:8091/endpoint`
|
||||||
|
|
||||||
|
Fields:
|
||||||
|
|
||||||
|
- count (unit: int, example: 1)
|
||||||
|
- enabled (unit: bool, example: true)
|
||||||
|
- max_count (unit: int, example: 2)
|
||||||
|
- timeout (unit: int, example: 72)
|
||||||
|
|
||||||
### couchbase_bucket and couchbase_node_bucket
|
### couchbase_bucket and couchbase_node_bucket
|
||||||
|
|
||||||
Tags:
|
Tags:
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ package couchbase
|
||||||
import (
|
import (
|
||||||
_ "embed"
|
_ "embed"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"regexp"
|
"regexp"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
@ -13,6 +14,7 @@ import (
|
||||||
|
|
||||||
"github.com/influxdata/telegraf"
|
"github.com/influxdata/telegraf"
|
||||||
"github.com/influxdata/telegraf/filter"
|
"github.com/influxdata/telegraf/filter"
|
||||||
|
"github.com/influxdata/telegraf/internal/choice"
|
||||||
"github.com/influxdata/telegraf/plugins/common/tls"
|
"github.com/influxdata/telegraf/plugins/common/tls"
|
||||||
"github.com/influxdata/telegraf/plugins/inputs"
|
"github.com/influxdata/telegraf/plugins/inputs"
|
||||||
)
|
)
|
||||||
|
|
@ -21,12 +23,11 @@ import (
|
||||||
var sampleConfig string
|
var sampleConfig string
|
||||||
|
|
||||||
type Couchbase struct {
|
type Couchbase struct {
|
||||||
Servers []string
|
Servers []string `toml:"servers"`
|
||||||
|
|
||||||
BucketStatsIncluded []string `toml:"bucket_stats_included"`
|
BucketStatsIncluded []string `toml:"bucket_stats_included"`
|
||||||
|
|
||||||
ClusterBucketStats bool `toml:"cluster_bucket_stats"`
|
ClusterBucketStats bool `toml:"cluster_bucket_stats"`
|
||||||
NodeBucketStats bool `toml:"node_bucket_stats"`
|
NodeBucketStats bool `toml:"node_bucket_stats"`
|
||||||
|
AdditionalStats []string `toml:"additional_stats"`
|
||||||
|
|
||||||
bucketInclude filter.Filter
|
bucketInclude filter.Filter
|
||||||
client *http.Client
|
client *http.Client
|
||||||
|
|
@ -34,6 +35,13 @@ type Couchbase struct {
|
||||||
tls.ClientConfig
|
tls.ClientConfig
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type autoFailover struct {
|
||||||
|
Count int `json:"count"`
|
||||||
|
Enabled bool `json:"enabled"`
|
||||||
|
MaxCount int `json:"maxCount"`
|
||||||
|
Timeout int `json:"timeout"`
|
||||||
|
}
|
||||||
|
|
||||||
var regexpURI = regexp.MustCompile(`(\S+://)?(\S+\:\S+@)`)
|
var regexpURI = regexp.MustCompile(`(\S+://)?(\S+\:\S+@)`)
|
||||||
|
|
||||||
func (*Couchbase) SampleConfig() string {
|
func (*Couchbase) SampleConfig() string {
|
||||||
|
|
@ -87,9 +95,8 @@ func (cb *Couchbase) gatherServer(acc telegraf.Accumulator, addr string) error {
|
||||||
acc.AddFields("couchbase_node", fields, tags)
|
acc.AddFields("couchbase_node", fields, tags)
|
||||||
}
|
}
|
||||||
|
|
||||||
for name, bucket := range pool.BucketMap {
|
|
||||||
cluster := regexpURI.ReplaceAllString(addr, "${1}")
|
cluster := regexpURI.ReplaceAllString(addr, "${1}")
|
||||||
|
for name, bucket := range pool.BucketMap {
|
||||||
if cb.ClusterBucketStats {
|
if cb.ClusterBucketStats {
|
||||||
fields := cb.basicBucketStats(bucket.BasicStats)
|
fields := cb.basicBucketStats(bucket.BasicStats)
|
||||||
tags := map[string]string{"cluster": cluster, "bucket": name}
|
tags := map[string]string{"cluster": cluster, "bucket": name}
|
||||||
|
|
@ -117,9 +124,49 @@ func (cb *Couchbase) gatherServer(acc telegraf.Accumulator, addr string) error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if choice.Contains("autofailover", cb.AdditionalStats) {
|
||||||
|
tags := map[string]string{"cluster": cluster}
|
||||||
|
fields, err := cb.gatherAutoFailoverStats(addr)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("unable to collect autofailover settings: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
acc.AddFields("couchbase_autofailover", fields, tags)
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (cb *Couchbase) gatherAutoFailoverStats(server string) (map[string]any, error) {
|
||||||
|
var fields map[string]any
|
||||||
|
|
||||||
|
url := server + "/settings/autoFailover"
|
||||||
|
req, err := http.NewRequest("GET", url, nil)
|
||||||
|
if err != nil {
|
||||||
|
return fields, err
|
||||||
|
}
|
||||||
|
|
||||||
|
r, err := cb.client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return fields, err
|
||||||
|
}
|
||||||
|
defer r.Body.Close()
|
||||||
|
|
||||||
|
var stats autoFailover
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&stats); err != nil {
|
||||||
|
return fields, err
|
||||||
|
}
|
||||||
|
|
||||||
|
fields = map[string]any{
|
||||||
|
"count": stats.Count,
|
||||||
|
"enabled": stats.Enabled,
|
||||||
|
"max_count": stats.MaxCount,
|
||||||
|
"timeout": stats.Timeout,
|
||||||
|
}
|
||||||
|
|
||||||
|
return fields, nil
|
||||||
|
}
|
||||||
|
|
||||||
// basicBucketStats gets the basic bucket statistics
|
// basicBucketStats gets the basic bucket statistics
|
||||||
func (cb *Couchbase) basicBucketStats(basicStats map[string]interface{}) map[string]interface{} {
|
func (cb *Couchbase) basicBucketStats(basicStats map[string]interface{}) map[string]interface{} {
|
||||||
fields := make(map[string]interface{})
|
fields := make(map[string]interface{})
|
||||||
|
|
|
||||||
|
|
@ -171,6 +171,50 @@ func TestGatherNodeOnly(t *testing.T) {
|
||||||
acc.AssertDoesNotContainMeasurement(t, "couchbase_bucket")
|
acc.AssertDoesNotContainMeasurement(t, "couchbase_bucket")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGatherFailover(t *testing.T) {
|
||||||
|
faker := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
switch r.URL.Path {
|
||||||
|
case "/pools":
|
||||||
|
_, _ = w.Write(readJSON(t, "testdata/pools_response.json"))
|
||||||
|
case "/pools/default":
|
||||||
|
_, _ = w.Write(readJSON(t, "testdata/pools_default_response.json"))
|
||||||
|
case "/pools/default/buckets":
|
||||||
|
_, _ = w.Write(readJSON(t, "testdata/bucket_response.json"))
|
||||||
|
case "/settings/autoFailover":
|
||||||
|
_, _ = w.Write(readJSON(t, "testdata/settings_autofailover.json"))
|
||||||
|
default:
|
||||||
|
w.WriteHeader(http.StatusNotFound)
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
|
||||||
|
cb := Couchbase{
|
||||||
|
Servers: []string{faker.URL},
|
||||||
|
ClusterBucketStats: false,
|
||||||
|
NodeBucketStats: false,
|
||||||
|
AdditionalStats: []string{"autofailover"},
|
||||||
|
}
|
||||||
|
require.NoError(t, cb.Init())
|
||||||
|
|
||||||
|
var acc testutil.Accumulator
|
||||||
|
require.NoError(t, cb.gatherServer(&acc, faker.URL))
|
||||||
|
require.Equal(t, 0, len(acc.Errors))
|
||||||
|
require.Equal(t, 8, len(acc.Metrics))
|
||||||
|
|
||||||
|
var metric *testutil.Metric
|
||||||
|
for _, m := range acc.Metrics {
|
||||||
|
if m.Measurement == "couchbase_autofailover" {
|
||||||
|
metric = m
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
require.NotNil(t, metric)
|
||||||
|
require.Equal(t, 1, metric.Fields["count"])
|
||||||
|
require.Equal(t, true, metric.Fields["enabled"])
|
||||||
|
require.Equal(t, 2, metric.Fields["max_count"])
|
||||||
|
require.Equal(t, 72, metric.Fields["timeout"])
|
||||||
|
}
|
||||||
|
|
||||||
func readJSON(t *testing.T, jsonFilePath string) []byte {
|
func readJSON(t *testing.T, jsonFilePath string) []byte {
|
||||||
data, err := os.ReadFile(jsonFilePath)
|
data, err := os.ReadFile(jsonFilePath)
|
||||||
require.NoErrorf(t, err, "could not read from data file %s", jsonFilePath)
|
require.NoErrorf(t, err, "could not read from data file %s", jsonFilePath)
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,11 @@
|
||||||
## Whether to collect cluster-wide bucket statistics
|
## Whether to collect cluster-wide bucket statistics
|
||||||
## It is recommended to disable this in favor of node_stats
|
## It is recommended to disable this in favor of node_stats
|
||||||
## to get a better view of the cluster.
|
## to get a better view of the cluster.
|
||||||
cluster_bucket_stats = true
|
# cluster_bucket_stats = true
|
||||||
|
|
||||||
## Whether to collect bucket stats for each individual node
|
## Whether to collect bucket stats for each individual node
|
||||||
node_bucket_stats = false
|
# node_bucket_stats = false
|
||||||
|
|
||||||
|
## List of additional stats to collect, choose from:
|
||||||
|
## * autofailover
|
||||||
|
# additional_stats = []
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,11 @@
|
||||||
|
{
|
||||||
|
"enabled": true,
|
||||||
|
"timeout": 72,
|
||||||
|
"count": 1,
|
||||||
|
"failoverOnDataDiskIssues": {
|
||||||
|
"enabled": true,
|
||||||
|
"timePeriod": 89
|
||||||
|
},
|
||||||
|
"maxCount": 2,
|
||||||
|
"canAbortRebalance": true
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue