feat: added metrics for member and replica-set avg health of MongoDB (#11516)

This commit is contained in:
Sven Burkard 2022-07-20 21:02:38 +02:00 committed by GitHub
parent ba36cfe676
commit 46f059ebfd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 31 additions and 0 deletions

View File

@ -159,6 +159,8 @@ by running Telegraf with the `--debug` argument.
- repl_updates (integer)
- repl_oplog_window_sec (integer)
- repl_state (integer)
- repl_member_health (integer)
- repl_health_avg (float)
- resident_megabytes (integer)
- state (string)
- storage_freelist_search_bucket_exhausted (integer)

View File

@ -149,6 +149,8 @@ var defaultReplStats = map[string]string{
"member_status": "NodeType",
"state": "NodeState",
"repl_state": "NodeStateInt",
"repl_member_health": "NodeHealthInt",
"repl_health_avg": "ReplHealthAvg",
"repl_lag": "ReplLag",
"repl_network_bytes": "ReplNetworkBytes",
"repl_network_getmores_num": "ReplNetworkGetmoresNum",

View File

@ -447,6 +447,8 @@ func TestStateTag(t *testing.T) {
"repl_updates": int64(0),
"repl_updates_per_sec": int64(0),
"repl_state": int64(0),
"repl_member_health": int64(0),
"repl_health_avg": float64(0),
"resident_megabytes": int64(0),
"state": "PRIMARY",
"storage_freelist_search_bucket_exhausted": int64(0),

View File

@ -139,6 +139,7 @@ type OplogStats struct {
// ReplSetMember stores information related to a replica set member
type ReplSetMember struct {
Name string `bson:"name"`
Health int64 `bson:"health"`
State int64 `bson:"state"`
StateStr string `bson:"stateStr"`
OptimeDate time.Time `bson:"optimeDate"`
@ -783,9 +784,11 @@ type StatLine struct {
NetOut, NetOutCnt int64
NumConnections int64
ReplSetName string
ReplHealthAvg float64
NodeType string
NodeState string
NodeStateInt int64
NodeHealthInt int64
// Replicated Metrics fields
ReplNetworkBytes int64
@ -1332,6 +1335,8 @@ func NewStatLine(oldMongo, newMongo MongoStatus, key string, all bool, sampleSec
returnVal.NodeState = member.StateStr
// Store my state integer
returnVal.NodeStateInt = member.State
// Store my health integer
returnVal.NodeHealthInt = member.Health
if member.State == 1 {
// I'm the master
@ -1356,6 +1361,26 @@ func NewStatLine(oldMongo, newMongo MongoStatus, key string, all bool, sampleSec
returnVal.ReplLag = lag
}
}
// Prepartions for the average health state of the replica-set
replMemberCount := len(newReplStat.Members)
replMemberHealthyCount := 0
// Second for-loop is needed, because of break-construct above
for _, member := range newReplStat.Members {
// Count only healthy members for the average health state of the replica-set
if member.Health == 1 {
replMemberHealthyCount++
}
}
// Calculate the average health state of the replica-set (For precise monitoring alerts)
// To detect if a member is unhealthy from the perspective of another member and also how bad the replica-set health is
if replMemberCount > 0 {
returnVal.ReplHealthAvg = float64(replMemberHealthyCount) / float64(replMemberCount)
} else {
returnVal.ReplHealthAvg = 0.00
}
}
}