feat(inputs.couchbase): Add failover metrics (#13825)
This commit is contained in:
parent
1d24efe55c
commit
fb45a1d98a
|
|
@ -43,10 +43,14 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
|
|||
## Whether to collect cluster-wide bucket statistics
|
||||
## It is recommended to disable this in favor of node_stats
|
||||
## to get a better view of the cluster.
|
||||
cluster_bucket_stats = true
|
||||
# cluster_bucket_stats = true
|
||||
|
||||
## Whether to collect bucket stats for each individual node
|
||||
node_bucket_stats = false
|
||||
# node_bucket_stats = false
|
||||
|
||||
## List of additional stats to collect, choose from:
|
||||
## * autofailover
|
||||
# additional_stats = []
|
||||
```
|
||||
|
||||
## Metrics
|
||||
|
|
@ -65,6 +69,21 @@ Fields:
|
|||
- memory_free (unit: bytes, example: 23181365248.0)
|
||||
- memory_total (unit: bytes, example: 64424656896.0)
|
||||
|
||||
### couchbase_autofailover
|
||||
|
||||
Tags:
|
||||
|
||||
- cluster: sanitized string from `servers` configuration field
|
||||
e.g.: `http://user:password@couchbase-0.example.com:8091/endpoint` becomes
|
||||
`http://couchbase-0.example.com:8091/endpoint`
|
||||
|
||||
Fields:
|
||||
|
||||
- count (unit: int, example: 1)
|
||||
- enabled (unit: bool, example: true)
|
||||
- max_count (unit: int, example: 2)
|
||||
- timeout (unit: int, example: 72)
|
||||
|
||||
### couchbase_bucket and couchbase_node_bucket
|
||||
|
||||
Tags:
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ package couchbase
|
|||
import (
|
||||
_ "embed"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"sync"
|
||||
|
|
@ -13,6 +14,7 @@ import (
|
|||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/filter"
|
||||
"github.com/influxdata/telegraf/internal/choice"
|
||||
"github.com/influxdata/telegraf/plugins/common/tls"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
)
|
||||
|
|
@ -21,12 +23,11 @@ import (
|
|||
var sampleConfig string
|
||||
|
||||
type Couchbase struct {
|
||||
Servers []string
|
||||
|
||||
Servers []string `toml:"servers"`
|
||||
BucketStatsIncluded []string `toml:"bucket_stats_included"`
|
||||
|
||||
ClusterBucketStats bool `toml:"cluster_bucket_stats"`
|
||||
NodeBucketStats bool `toml:"node_bucket_stats"`
|
||||
ClusterBucketStats bool `toml:"cluster_bucket_stats"`
|
||||
NodeBucketStats bool `toml:"node_bucket_stats"`
|
||||
AdditionalStats []string `toml:"additional_stats"`
|
||||
|
||||
bucketInclude filter.Filter
|
||||
client *http.Client
|
||||
|
|
@ -34,6 +35,13 @@ type Couchbase struct {
|
|||
tls.ClientConfig
|
||||
}
|
||||
|
||||
type autoFailover struct {
|
||||
Count int `json:"count"`
|
||||
Enabled bool `json:"enabled"`
|
||||
MaxCount int `json:"maxCount"`
|
||||
Timeout int `json:"timeout"`
|
||||
}
|
||||
|
||||
var regexpURI = regexp.MustCompile(`(\S+://)?(\S+\:\S+@)`)
|
||||
|
||||
func (*Couchbase) SampleConfig() string {
|
||||
|
|
@ -87,9 +95,8 @@ func (cb *Couchbase) gatherServer(acc telegraf.Accumulator, addr string) error {
|
|||
acc.AddFields("couchbase_node", fields, tags)
|
||||
}
|
||||
|
||||
cluster := regexpURI.ReplaceAllString(addr, "${1}")
|
||||
for name, bucket := range pool.BucketMap {
|
||||
cluster := regexpURI.ReplaceAllString(addr, "${1}")
|
||||
|
||||
if cb.ClusterBucketStats {
|
||||
fields := cb.basicBucketStats(bucket.BasicStats)
|
||||
tags := map[string]string{"cluster": cluster, "bucket": name}
|
||||
|
|
@ -117,9 +124,49 @@ func (cb *Couchbase) gatherServer(acc telegraf.Accumulator, addr string) error {
|
|||
}
|
||||
}
|
||||
|
||||
if choice.Contains("autofailover", cb.AdditionalStats) {
|
||||
tags := map[string]string{"cluster": cluster}
|
||||
fields, err := cb.gatherAutoFailoverStats(addr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to collect autofailover settings: %w", err)
|
||||
}
|
||||
|
||||
acc.AddFields("couchbase_autofailover", fields, tags)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cb *Couchbase) gatherAutoFailoverStats(server string) (map[string]any, error) {
|
||||
var fields map[string]any
|
||||
|
||||
url := server + "/settings/autoFailover"
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return fields, err
|
||||
}
|
||||
|
||||
r, err := cb.client.Do(req)
|
||||
if err != nil {
|
||||
return fields, err
|
||||
}
|
||||
defer r.Body.Close()
|
||||
|
||||
var stats autoFailover
|
||||
if err := json.NewDecoder(r.Body).Decode(&stats); err != nil {
|
||||
return fields, err
|
||||
}
|
||||
|
||||
fields = map[string]any{
|
||||
"count": stats.Count,
|
||||
"enabled": stats.Enabled,
|
||||
"max_count": stats.MaxCount,
|
||||
"timeout": stats.Timeout,
|
||||
}
|
||||
|
||||
return fields, nil
|
||||
}
|
||||
|
||||
// basicBucketStats gets the basic bucket statistics
|
||||
func (cb *Couchbase) basicBucketStats(basicStats map[string]interface{}) map[string]interface{} {
|
||||
fields := make(map[string]interface{})
|
||||
|
|
|
|||
|
|
@ -171,6 +171,50 @@ func TestGatherNodeOnly(t *testing.T) {
|
|||
acc.AssertDoesNotContainMeasurement(t, "couchbase_bucket")
|
||||
}
|
||||
|
||||
func TestGatherFailover(t *testing.T) {
|
||||
faker := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case "/pools":
|
||||
_, _ = w.Write(readJSON(t, "testdata/pools_response.json"))
|
||||
case "/pools/default":
|
||||
_, _ = w.Write(readJSON(t, "testdata/pools_default_response.json"))
|
||||
case "/pools/default/buckets":
|
||||
_, _ = w.Write(readJSON(t, "testdata/bucket_response.json"))
|
||||
case "/settings/autoFailover":
|
||||
_, _ = w.Write(readJSON(t, "testdata/settings_autofailover.json"))
|
||||
default:
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
}
|
||||
}))
|
||||
|
||||
cb := Couchbase{
|
||||
Servers: []string{faker.URL},
|
||||
ClusterBucketStats: false,
|
||||
NodeBucketStats: false,
|
||||
AdditionalStats: []string{"autofailover"},
|
||||
}
|
||||
require.NoError(t, cb.Init())
|
||||
|
||||
var acc testutil.Accumulator
|
||||
require.NoError(t, cb.gatherServer(&acc, faker.URL))
|
||||
require.Equal(t, 0, len(acc.Errors))
|
||||
require.Equal(t, 8, len(acc.Metrics))
|
||||
|
||||
var metric *testutil.Metric
|
||||
for _, m := range acc.Metrics {
|
||||
if m.Measurement == "couchbase_autofailover" {
|
||||
metric = m
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
require.NotNil(t, metric)
|
||||
require.Equal(t, 1, metric.Fields["count"])
|
||||
require.Equal(t, true, metric.Fields["enabled"])
|
||||
require.Equal(t, 2, metric.Fields["max_count"])
|
||||
require.Equal(t, 72, metric.Fields["timeout"])
|
||||
}
|
||||
|
||||
func readJSON(t *testing.T, jsonFilePath string) []byte {
|
||||
data, err := os.ReadFile(jsonFilePath)
|
||||
require.NoErrorf(t, err, "could not read from data file %s", jsonFilePath)
|
||||
|
|
|
|||
|
|
@ -25,7 +25,11 @@
|
|||
## Whether to collect cluster-wide bucket statistics
|
||||
## It is recommended to disable this in favor of node_stats
|
||||
## to get a better view of the cluster.
|
||||
cluster_bucket_stats = true
|
||||
# cluster_bucket_stats = true
|
||||
|
||||
## Whether to collect bucket stats for each individual node
|
||||
node_bucket_stats = false
|
||||
# node_bucket_stats = false
|
||||
|
||||
## List of additional stats to collect, choose from:
|
||||
## * autofailover
|
||||
# additional_stats = []
|
||||
|
|
|
|||
|
|
@ -0,0 +1,11 @@
|
|||
{
|
||||
"enabled": true,
|
||||
"timeout": 72,
|
||||
"count": 1,
|
||||
"failoverOnDataDiskIssues": {
|
||||
"enabled": true,
|
||||
"timePeriod": 89
|
||||
},
|
||||
"maxCount": 2,
|
||||
"canAbortRebalance": true
|
||||
}
|
||||
Loading…
Reference in New Issue