feat: added metrics for member and replica-set avg health of MongoDB (#11516)
This commit is contained in:
parent
ba36cfe676
commit
46f059ebfd
|
|
@ -159,6 +159,8 @@ by running Telegraf with the `--debug` argument.
|
|||
- repl_updates (integer)
|
||||
- repl_oplog_window_sec (integer)
|
||||
- repl_state (integer)
|
||||
- repl_member_health (integer)
|
||||
- repl_health_avg (float)
|
||||
- resident_megabytes (integer)
|
||||
- state (string)
|
||||
- storage_freelist_search_bucket_exhausted (integer)
|
||||
|
|
|
|||
|
|
@ -149,6 +149,8 @@ var defaultReplStats = map[string]string{
|
|||
"member_status": "NodeType",
|
||||
"state": "NodeState",
|
||||
"repl_state": "NodeStateInt",
|
||||
"repl_member_health": "NodeHealthInt",
|
||||
"repl_health_avg": "ReplHealthAvg",
|
||||
"repl_lag": "ReplLag",
|
||||
"repl_network_bytes": "ReplNetworkBytes",
|
||||
"repl_network_getmores_num": "ReplNetworkGetmoresNum",
|
||||
|
|
|
|||
|
|
@ -447,6 +447,8 @@ func TestStateTag(t *testing.T) {
|
|||
"repl_updates": int64(0),
|
||||
"repl_updates_per_sec": int64(0),
|
||||
"repl_state": int64(0),
|
||||
"repl_member_health": int64(0),
|
||||
"repl_health_avg": float64(0),
|
||||
"resident_megabytes": int64(0),
|
||||
"state": "PRIMARY",
|
||||
"storage_freelist_search_bucket_exhausted": int64(0),
|
||||
|
|
|
|||
|
|
@ -139,6 +139,7 @@ type OplogStats struct {
|
|||
// ReplSetMember stores information related to a replica set member
|
||||
type ReplSetMember struct {
|
||||
Name string `bson:"name"`
|
||||
Health int64 `bson:"health"`
|
||||
State int64 `bson:"state"`
|
||||
StateStr string `bson:"stateStr"`
|
||||
OptimeDate time.Time `bson:"optimeDate"`
|
||||
|
|
@ -783,9 +784,11 @@ type StatLine struct {
|
|||
NetOut, NetOutCnt int64
|
||||
NumConnections int64
|
||||
ReplSetName string
|
||||
ReplHealthAvg float64
|
||||
NodeType string
|
||||
NodeState string
|
||||
NodeStateInt int64
|
||||
NodeHealthInt int64
|
||||
|
||||
// Replicated Metrics fields
|
||||
ReplNetworkBytes int64
|
||||
|
|
@ -1332,6 +1335,8 @@ func NewStatLine(oldMongo, newMongo MongoStatus, key string, all bool, sampleSec
|
|||
returnVal.NodeState = member.StateStr
|
||||
// Store my state integer
|
||||
returnVal.NodeStateInt = member.State
|
||||
// Store my health integer
|
||||
returnVal.NodeHealthInt = member.Health
|
||||
|
||||
if member.State == 1 {
|
||||
// I'm the master
|
||||
|
|
@ -1356,6 +1361,26 @@ func NewStatLine(oldMongo, newMongo MongoStatus, key string, all bool, sampleSec
|
|||
returnVal.ReplLag = lag
|
||||
}
|
||||
}
|
||||
|
||||
// Prepartions for the average health state of the replica-set
|
||||
replMemberCount := len(newReplStat.Members)
|
||||
replMemberHealthyCount := 0
|
||||
|
||||
// Second for-loop is needed, because of break-construct above
|
||||
for _, member := range newReplStat.Members {
|
||||
// Count only healthy members for the average health state of the replica-set
|
||||
if member.Health == 1 {
|
||||
replMemberHealthyCount++
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate the average health state of the replica-set (For precise monitoring alerts)
|
||||
// To detect if a member is unhealthy from the perspective of another member and also how bad the replica-set health is
|
||||
if replMemberCount > 0 {
|
||||
returnVal.ReplHealthAvg = float64(replMemberHealthyCount) / float64(replMemberCount)
|
||||
} else {
|
||||
returnVal.ReplHealthAvg = 0.00
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue