feat(inputs.nvidia_smi): Add additional fields (#13783)
This commit is contained in:
parent
ebceed6157
commit
cb488ad0f8
|
|
@ -70,6 +70,8 @@ func TestGatherValidXML(t *testing.T) {
|
||||||
"clocks_current_sm": 300,
|
"clocks_current_sm": 300,
|
||||||
"clocks_current_video": 540,
|
"clocks_current_video": 540,
|
||||||
"cuda_version": "10.1",
|
"cuda_version": "10.1",
|
||||||
|
"display_active": "Disabled",
|
||||||
|
"display_mode": "Disabled",
|
||||||
"driver_version": "418.43",
|
"driver_version": "418.43",
|
||||||
"encoder_stats_average_fps": 0,
|
"encoder_stats_average_fps": 0,
|
||||||
"encoder_stats_average_latency": 0,
|
"encoder_stats_average_latency": 0,
|
||||||
|
|
@ -89,6 +91,7 @@ func TestGatherValidXML(t *testing.T) {
|
||||||
"utilization_memory": 1,
|
"utilization_memory": 1,
|
||||||
"utilization_encoder": 0,
|
"utilization_encoder": 0,
|
||||||
"utilization_decoder": 0,
|
"utilization_decoder": 0,
|
||||||
|
"vbios_version": "90.16.25.00.4C",
|
||||||
},
|
},
|
||||||
time.Unix(0, 0)),
|
time.Unix(0, 0)),
|
||||||
},
|
},
|
||||||
|
|
@ -112,6 +115,8 @@ func TestGatherValidXML(t *testing.T) {
|
||||||
"clocks_current_sm": 139,
|
"clocks_current_sm": 139,
|
||||||
"clocks_current_video": 544,
|
"clocks_current_video": 544,
|
||||||
"cuda_version": "10.1",
|
"cuda_version": "10.1",
|
||||||
|
"display_active": "Disabled",
|
||||||
|
"display_mode": "Disabled",
|
||||||
"driver_version": "418.43",
|
"driver_version": "418.43",
|
||||||
"encoder_stats_average_fps": 0,
|
"encoder_stats_average_fps": 0,
|
||||||
"encoder_stats_average_latency": 0,
|
"encoder_stats_average_latency": 0,
|
||||||
|
|
@ -125,11 +130,13 @@ func TestGatherValidXML(t *testing.T) {
|
||||||
"memory_used": 0,
|
"memory_used": 0,
|
||||||
"pcie_link_gen_current": 1,
|
"pcie_link_gen_current": 1,
|
||||||
"pcie_link_width_current": 16,
|
"pcie_link_width_current": 16,
|
||||||
|
"serial": "0424418054852",
|
||||||
"temperature_gpu": 33,
|
"temperature_gpu": 33,
|
||||||
"utilization_gpu": 0,
|
"utilization_gpu": 0,
|
||||||
"utilization_memory": 3,
|
"utilization_memory": 3,
|
||||||
"utilization_encoder": 0,
|
"utilization_encoder": 0,
|
||||||
"utilization_decoder": 0,
|
"utilization_decoder": 0,
|
||||||
|
"vbios_version": "86.07.3B.00.4A",
|
||||||
},
|
},
|
||||||
time.Unix(0, 0)),
|
time.Unix(0, 0)),
|
||||||
},
|
},
|
||||||
|
|
@ -153,6 +160,9 @@ func TestGatherValidXML(t *testing.T) {
|
||||||
"clocks_current_sm": 585,
|
"clocks_current_sm": 585,
|
||||||
"clocks_current_video": 810,
|
"clocks_current_video": 810,
|
||||||
"cuda_version": "11.7",
|
"cuda_version": "11.7",
|
||||||
|
"current_ecc": "Enabled",
|
||||||
|
"display_active": "Disabled",
|
||||||
|
"display_mode": "Disabled",
|
||||||
"driver_version": "515.105.01",
|
"driver_version": "515.105.01",
|
||||||
"encoder_stats_average_fps": 0,
|
"encoder_stats_average_fps": 0,
|
||||||
"encoder_stats_average_latency": 0,
|
"encoder_stats_average_latency": 0,
|
||||||
|
|
@ -171,11 +181,13 @@ func TestGatherValidXML(t *testing.T) {
|
||||||
"retired_pages_pending": "No",
|
"retired_pages_pending": "No",
|
||||||
"pcie_link_gen_current": 3,
|
"pcie_link_gen_current": 3,
|
||||||
"pcie_link_width_current": 8,
|
"pcie_link_width_current": 8,
|
||||||
|
"serial": "0000000000000",
|
||||||
"temperature_gpu": 40,
|
"temperature_gpu": 40,
|
||||||
"utilization_gpu": 0,
|
"utilization_gpu": 0,
|
||||||
"utilization_memory": 0,
|
"utilization_memory": 0,
|
||||||
"utilization_encoder": 0,
|
"utilization_encoder": 0,
|
||||||
"utilization_decoder": 0,
|
"utilization_decoder": 0,
|
||||||
|
"vbios_version": "90.04.84.00.06",
|
||||||
},
|
},
|
||||||
time.Unix(0, 0)),
|
time.Unix(0, 0)),
|
||||||
},
|
},
|
||||||
|
|
@ -199,6 +211,9 @@ func TestGatherValidXML(t *testing.T) {
|
||||||
"clocks_current_sm": 210,
|
"clocks_current_sm": 210,
|
||||||
"clocks_current_video": 555,
|
"clocks_current_video": 555,
|
||||||
"cuda_version": "11.7",
|
"cuda_version": "11.7",
|
||||||
|
"current_ecc": "Enabled",
|
||||||
|
"display_active": "Disabled",
|
||||||
|
"display_mode": "Disabled",
|
||||||
"driver_version": "515.105.01",
|
"driver_version": "515.105.01",
|
||||||
"encoder_stats_average_fps": 0,
|
"encoder_stats_average_fps": 0,
|
||||||
"encoder_stats_average_latency": 0,
|
"encoder_stats_average_latency": 0,
|
||||||
|
|
@ -218,11 +233,13 @@ func TestGatherValidXML(t *testing.T) {
|
||||||
"remapped_rows_failure": "No",
|
"remapped_rows_failure": "No",
|
||||||
"pcie_link_gen_current": 1,
|
"pcie_link_gen_current": 1,
|
||||||
"pcie_link_width_current": 8,
|
"pcie_link_width_current": 8,
|
||||||
|
"serial": "0000000000000",
|
||||||
"temperature_gpu": 17,
|
"temperature_gpu": 17,
|
||||||
"utilization_gpu": 0,
|
"utilization_gpu": 0,
|
||||||
"utilization_memory": 0,
|
"utilization_memory": 0,
|
||||||
"utilization_encoder": 0,
|
"utilization_encoder": 0,
|
||||||
"utilization_decoder": 0,
|
"utilization_decoder": 0,
|
||||||
|
"vbios_version": "94.02.75.00.01",
|
||||||
},
|
},
|
||||||
time.Unix(0, 0)),
|
time.Unix(0, 0)),
|
||||||
},
|
},
|
||||||
|
|
@ -247,6 +264,8 @@ func TestGatherValidXML(t *testing.T) {
|
||||||
"clocks_current_sm": 210,
|
"clocks_current_sm": 210,
|
||||||
"clocks_current_video": 555,
|
"clocks_current_video": 555,
|
||||||
"cuda_version": "12.2",
|
"cuda_version": "12.2",
|
||||||
|
"display_active": "Enabled",
|
||||||
|
"display_mode": "Enabled",
|
||||||
"driver_version": "536.40",
|
"driver_version": "536.40",
|
||||||
"encoder_stats_average_fps": 0,
|
"encoder_stats_average_fps": 0,
|
||||||
"encoder_stats_average_latency": 0,
|
"encoder_stats_average_latency": 0,
|
||||||
|
|
@ -264,9 +283,12 @@ func TestGatherValidXML(t *testing.T) {
|
||||||
"pcie_link_width_current": 16,
|
"pcie_link_width_current": 16,
|
||||||
"temperature_gpu": 31,
|
"temperature_gpu": 31,
|
||||||
"utilization_gpu": 0,
|
"utilization_gpu": 0,
|
||||||
|
"utilization_jpeg": 0,
|
||||||
"utilization_memory": 37,
|
"utilization_memory": 37,
|
||||||
"utilization_encoder": 0,
|
"utilization_encoder": 0,
|
||||||
"utilization_decoder": 0,
|
"utilization_decoder": 0,
|
||||||
|
"utilization_ofa": 0,
|
||||||
|
"vbios_version": "94.02.71.40.72",
|
||||||
},
|
},
|
||||||
time.Unix(1689872450, 0)),
|
time.Unix(1689872450, 0)),
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,11 @@ func Parse(acc telegraf.Accumulator, buf []byte) error {
|
||||||
|
|
||||||
common.SetIfUsed("str", fields, "driver_version", s.DriverVersion)
|
common.SetIfUsed("str", fields, "driver_version", s.DriverVersion)
|
||||||
common.SetIfUsed("str", fields, "cuda_version", s.CUDAVersion)
|
common.SetIfUsed("str", fields, "cuda_version", s.CUDAVersion)
|
||||||
|
common.SetIfUsed("str", fields, "serial", gpu.Serial)
|
||||||
|
common.SetIfUsed("str", fields, "vbios_version", gpu.VbiosVersion)
|
||||||
|
common.SetIfUsed("str", fields, "display_active", gpu.DisplayActive)
|
||||||
|
common.SetIfUsed("str", fields, "display_mode", gpu.DisplayMode)
|
||||||
|
common.SetIfUsed("str", fields, "current_ecc", gpu.EccMode.CurrentEcc)
|
||||||
common.SetIfUsed("int", fields, "fan_speed", gpu.FanSpeed)
|
common.SetIfUsed("int", fields, "fan_speed", gpu.FanSpeed)
|
||||||
common.SetIfUsed("int", fields, "memory_total", gpu.Memory.Total)
|
common.SetIfUsed("int", fields, "memory_total", gpu.Memory.Total)
|
||||||
common.SetIfUsed("int", fields, "memory_used", gpu.Memory.Used)
|
common.SetIfUsed("int", fields, "memory_used", gpu.Memory.Used)
|
||||||
|
|
|
||||||
|
|
@ -9,21 +9,32 @@ type smi struct {
|
||||||
|
|
||||||
// GPU defines the structure of the GPU portion of the smi output.
|
// GPU defines the structure of the GPU portion of the smi output.
|
||||||
type GPU struct {
|
type GPU struct {
|
||||||
FanSpeed string `xml:"fan_speed"` // int
|
Clocks ClockStats `xml:"clocks"`
|
||||||
Memory MemoryStats `xml:"fb_memory_usage"`
|
ComputeMode string `xml:"compute_mode"`
|
||||||
RetiredPages MemoryRetiredPages `xml:"retired_pages"`
|
DisplayActive string `xml:"display_active"`
|
||||||
RemappedRows MemoryRemappedRows `xml:"remapped_rows"`
|
DisplayMode string `xml:"display_mode"`
|
||||||
PState string `xml:"performance_state"`
|
EccMode ECCMode `xml:"ecc_mode"`
|
||||||
Temp TempStats `xml:"temperature"`
|
Encoder EncoderStats `xml:"encoder_stats"`
|
||||||
ProdName string `xml:"product_name"`
|
FanSpeed string `xml:"fan_speed"` // int
|
||||||
UUID string `xml:"uuid"`
|
FBC FBCStats `xml:"fbc_stats"`
|
||||||
ComputeMode string `xml:"compute_mode"`
|
Memory MemoryStats `xml:"fb_memory_usage"`
|
||||||
Utilization UtilizationStats `xml:"utilization"`
|
PCI PCI `xml:"pci"`
|
||||||
Power PowerReadings `xml:"power_readings"`
|
Power PowerReadings `xml:"power_readings"`
|
||||||
PCI PCI `xml:"pci"`
|
ProdName string `xml:"product_name"`
|
||||||
Encoder EncoderStats `xml:"encoder_stats"`
|
PState string `xml:"performance_state"`
|
||||||
FBC FBCStats `xml:"fbc_stats"`
|
RemappedRows MemoryRemappedRows `xml:"remapped_rows"`
|
||||||
Clocks ClockStats `xml:"clocks"`
|
RetiredPages MemoryRetiredPages `xml:"retired_pages"`
|
||||||
|
Serial string `xml:"serial"`
|
||||||
|
Temp TempStats `xml:"temperature"`
|
||||||
|
Utilization UtilizationStats `xml:"utilization"`
|
||||||
|
UUID string `xml:"uuid"`
|
||||||
|
VbiosVersion string `xml:"vbios_version"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ECCMode defines the structure of the ecc portions in the smi output.
|
||||||
|
type ECCMode struct {
|
||||||
|
CurrentEcc string `xml:"current_ecc"` // Enabled, Disabled, N/A
|
||||||
|
PendingEcc string `xml:"pending_ecc"` // Enabled, Disabled, N/A
|
||||||
}
|
}
|
||||||
|
|
||||||
// MemoryStats defines the structure of the memory portions in the smi output.
|
// MemoryStats defines the structure of the memory portions in the smi output.
|
||||||
|
|
|
||||||
|
|
@ -36,6 +36,11 @@ func Parse(acc telegraf.Accumulator, buf []byte) error {
|
||||||
|
|
||||||
common.SetIfUsed("str", fields, "driver_version", s.DriverVersion)
|
common.SetIfUsed("str", fields, "driver_version", s.DriverVersion)
|
||||||
common.SetIfUsed("str", fields, "cuda_version", s.CudaVersion)
|
common.SetIfUsed("str", fields, "cuda_version", s.CudaVersion)
|
||||||
|
common.SetIfUsed("str", fields, "serial", gpu.Serial)
|
||||||
|
common.SetIfUsed("str", fields, "vbios_version", gpu.VbiosVersion)
|
||||||
|
common.SetIfUsed("str", fields, "display_active", gpu.DisplayActive)
|
||||||
|
common.SetIfUsed("str", fields, "display_mode", gpu.DisplayMode)
|
||||||
|
common.SetIfUsed("str", fields, "current_ecc", gpu.EccMode.CurrentEcc)
|
||||||
common.SetIfUsed("int", fields, "fan_speed", gpu.FanSpeed)
|
common.SetIfUsed("int", fields, "fan_speed", gpu.FanSpeed)
|
||||||
common.SetIfUsed("int", fields, "memory_total", gpu.FbMemoryUsage.Total)
|
common.SetIfUsed("int", fields, "memory_total", gpu.FbMemoryUsage.Total)
|
||||||
common.SetIfUsed("int", fields, "memory_used", gpu.FbMemoryUsage.Used)
|
common.SetIfUsed("int", fields, "memory_used", gpu.FbMemoryUsage.Used)
|
||||||
|
|
@ -54,6 +59,8 @@ func Parse(acc telegraf.Accumulator, buf []byte) error {
|
||||||
common.SetIfUsed("int", fields, "utilization_memory", gpu.Utilization.MemoryUtil)
|
common.SetIfUsed("int", fields, "utilization_memory", gpu.Utilization.MemoryUtil)
|
||||||
common.SetIfUsed("int", fields, "utilization_encoder", gpu.Utilization.EncoderUtil)
|
common.SetIfUsed("int", fields, "utilization_encoder", gpu.Utilization.EncoderUtil)
|
||||||
common.SetIfUsed("int", fields, "utilization_decoder", gpu.Utilization.DecoderUtil)
|
common.SetIfUsed("int", fields, "utilization_decoder", gpu.Utilization.DecoderUtil)
|
||||||
|
common.SetIfUsed("int", fields, "utilization_jpeg", gpu.Utilization.JpegUtil)
|
||||||
|
common.SetIfUsed("int", fields, "utilization_ofa", gpu.Utilization.OfaUtil)
|
||||||
common.SetIfUsed("int", fields, "pcie_link_gen_current", gpu.Pci.PciGpuLinkInfo.PcieGen.CurrentLinkGen)
|
common.SetIfUsed("int", fields, "pcie_link_gen_current", gpu.Pci.PciGpuLinkInfo.PcieGen.CurrentLinkGen)
|
||||||
common.SetIfUsed("int", fields, "pcie_link_width_current", gpu.Pci.PciGpuLinkInfo.LinkWidths.CurrentLinkWidth)
|
common.SetIfUsed("int", fields, "pcie_link_width_current", gpu.Pci.PciGpuLinkInfo.LinkWidths.CurrentLinkWidth)
|
||||||
common.SetIfUsed("int", fields, "encoder_stats_session_count", gpu.EncoderStats.SessionCount)
|
common.SetIfUsed("int", fields, "encoder_stats_session_count", gpu.EncoderStats.SessionCount)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue