diff --git a/plugins/inputs/nvidia_smi/nvidia_smi_test.go b/plugins/inputs/nvidia_smi/nvidia_smi_test.go index c7b4cb025..1524a892d 100644 --- a/plugins/inputs/nvidia_smi/nvidia_smi_test.go +++ b/plugins/inputs/nvidia_smi/nvidia_smi_test.go @@ -70,6 +70,8 @@ func TestGatherValidXML(t *testing.T) { "clocks_current_sm": 300, "clocks_current_video": 540, "cuda_version": "10.1", + "display_active": "Disabled", + "display_mode": "Disabled", "driver_version": "418.43", "encoder_stats_average_fps": 0, "encoder_stats_average_latency": 0, @@ -89,6 +91,7 @@ func TestGatherValidXML(t *testing.T) { "utilization_memory": 1, "utilization_encoder": 0, "utilization_decoder": 0, + "vbios_version": "90.16.25.00.4C", }, time.Unix(0, 0)), }, @@ -112,6 +115,8 @@ func TestGatherValidXML(t *testing.T) { "clocks_current_sm": 139, "clocks_current_video": 544, "cuda_version": "10.1", + "display_active": "Disabled", + "display_mode": "Disabled", "driver_version": "418.43", "encoder_stats_average_fps": 0, "encoder_stats_average_latency": 0, @@ -125,11 +130,13 @@ func TestGatherValidXML(t *testing.T) { "memory_used": 0, "pcie_link_gen_current": 1, "pcie_link_width_current": 16, + "serial": "0424418054852", "temperature_gpu": 33, "utilization_gpu": 0, "utilization_memory": 3, "utilization_encoder": 0, "utilization_decoder": 0, + "vbios_version": "86.07.3B.00.4A", }, time.Unix(0, 0)), }, @@ -153,6 +160,9 @@ func TestGatherValidXML(t *testing.T) { "clocks_current_sm": 585, "clocks_current_video": 810, "cuda_version": "11.7", + "current_ecc": "Enabled", + "display_active": "Disabled", + "display_mode": "Disabled", "driver_version": "515.105.01", "encoder_stats_average_fps": 0, "encoder_stats_average_latency": 0, @@ -171,11 +181,13 @@ func TestGatherValidXML(t *testing.T) { "retired_pages_pending": "No", "pcie_link_gen_current": 3, "pcie_link_width_current": 8, + "serial": "0000000000000", "temperature_gpu": 40, "utilization_gpu": 0, "utilization_memory": 0, "utilization_encoder": 0, "utilization_decoder": 0, + "vbios_version": "90.04.84.00.06", }, time.Unix(0, 0)), }, @@ -199,6 +211,9 @@ func TestGatherValidXML(t *testing.T) { "clocks_current_sm": 210, "clocks_current_video": 555, "cuda_version": "11.7", + "current_ecc": "Enabled", + "display_active": "Disabled", + "display_mode": "Disabled", "driver_version": "515.105.01", "encoder_stats_average_fps": 0, "encoder_stats_average_latency": 0, @@ -218,11 +233,13 @@ func TestGatherValidXML(t *testing.T) { "remapped_rows_failure": "No", "pcie_link_gen_current": 1, "pcie_link_width_current": 8, + "serial": "0000000000000", "temperature_gpu": 17, "utilization_gpu": 0, "utilization_memory": 0, "utilization_encoder": 0, "utilization_decoder": 0, + "vbios_version": "94.02.75.00.01", }, time.Unix(0, 0)), }, @@ -247,6 +264,8 @@ func TestGatherValidXML(t *testing.T) { "clocks_current_sm": 210, "clocks_current_video": 555, "cuda_version": "12.2", + "display_active": "Enabled", + "display_mode": "Enabled", "driver_version": "536.40", "encoder_stats_average_fps": 0, "encoder_stats_average_latency": 0, @@ -264,9 +283,12 @@ func TestGatherValidXML(t *testing.T) { "pcie_link_width_current": 16, "temperature_gpu": 31, "utilization_gpu": 0, + "utilization_jpeg": 0, "utilization_memory": 37, "utilization_encoder": 0, "utilization_decoder": 0, + "utilization_ofa": 0, + "vbios_version": "94.02.71.40.72", }, time.Unix(1689872450, 0)), }, diff --git a/plugins/inputs/nvidia_smi/schema_v11/parser.go b/plugins/inputs/nvidia_smi/schema_v11/parser.go index 1e16bd660..f708e8988 100644 --- a/plugins/inputs/nvidia_smi/schema_v11/parser.go +++ b/plugins/inputs/nvidia_smi/schema_v11/parser.go @@ -27,6 +27,11 @@ func Parse(acc telegraf.Accumulator, buf []byte) error { common.SetIfUsed("str", fields, "driver_version", s.DriverVersion) common.SetIfUsed("str", fields, "cuda_version", s.CUDAVersion) + common.SetIfUsed("str", fields, "serial", gpu.Serial) + common.SetIfUsed("str", fields, "vbios_version", gpu.VbiosVersion) + common.SetIfUsed("str", fields, "display_active", gpu.DisplayActive) + common.SetIfUsed("str", fields, "display_mode", gpu.DisplayMode) + common.SetIfUsed("str", fields, "current_ecc", gpu.EccMode.CurrentEcc) common.SetIfUsed("int", fields, "fan_speed", gpu.FanSpeed) common.SetIfUsed("int", fields, "memory_total", gpu.Memory.Total) common.SetIfUsed("int", fields, "memory_used", gpu.Memory.Used) diff --git a/plugins/inputs/nvidia_smi/schema_v11/types.go b/plugins/inputs/nvidia_smi/schema_v11/types.go index 9e022c13f..c930f090e 100644 --- a/plugins/inputs/nvidia_smi/schema_v11/types.go +++ b/plugins/inputs/nvidia_smi/schema_v11/types.go @@ -9,21 +9,32 @@ type smi struct { // GPU defines the structure of the GPU portion of the smi output. type GPU struct { - FanSpeed string `xml:"fan_speed"` // int - Memory MemoryStats `xml:"fb_memory_usage"` - RetiredPages MemoryRetiredPages `xml:"retired_pages"` - RemappedRows MemoryRemappedRows `xml:"remapped_rows"` - PState string `xml:"performance_state"` - Temp TempStats `xml:"temperature"` - ProdName string `xml:"product_name"` - UUID string `xml:"uuid"` - ComputeMode string `xml:"compute_mode"` - Utilization UtilizationStats `xml:"utilization"` - Power PowerReadings `xml:"power_readings"` - PCI PCI `xml:"pci"` - Encoder EncoderStats `xml:"encoder_stats"` - FBC FBCStats `xml:"fbc_stats"` - Clocks ClockStats `xml:"clocks"` + Clocks ClockStats `xml:"clocks"` + ComputeMode string `xml:"compute_mode"` + DisplayActive string `xml:"display_active"` + DisplayMode string `xml:"display_mode"` + EccMode ECCMode `xml:"ecc_mode"` + Encoder EncoderStats `xml:"encoder_stats"` + FanSpeed string `xml:"fan_speed"` // int + FBC FBCStats `xml:"fbc_stats"` + Memory MemoryStats `xml:"fb_memory_usage"` + PCI PCI `xml:"pci"` + Power PowerReadings `xml:"power_readings"` + ProdName string `xml:"product_name"` + PState string `xml:"performance_state"` + RemappedRows MemoryRemappedRows `xml:"remapped_rows"` + RetiredPages MemoryRetiredPages `xml:"retired_pages"` + Serial string `xml:"serial"` + Temp TempStats `xml:"temperature"` + Utilization UtilizationStats `xml:"utilization"` + UUID string `xml:"uuid"` + VbiosVersion string `xml:"vbios_version"` +} + +// ECCMode defines the structure of the ecc portions in the smi output. +type ECCMode struct { + CurrentEcc string `xml:"current_ecc"` // Enabled, Disabled, N/A + PendingEcc string `xml:"pending_ecc"` // Enabled, Disabled, N/A } // MemoryStats defines the structure of the memory portions in the smi output. diff --git a/plugins/inputs/nvidia_smi/schema_v12/parser.go b/plugins/inputs/nvidia_smi/schema_v12/parser.go index 1175e5d47..efab31577 100644 --- a/plugins/inputs/nvidia_smi/schema_v12/parser.go +++ b/plugins/inputs/nvidia_smi/schema_v12/parser.go @@ -36,6 +36,11 @@ func Parse(acc telegraf.Accumulator, buf []byte) error { common.SetIfUsed("str", fields, "driver_version", s.DriverVersion) common.SetIfUsed("str", fields, "cuda_version", s.CudaVersion) + common.SetIfUsed("str", fields, "serial", gpu.Serial) + common.SetIfUsed("str", fields, "vbios_version", gpu.VbiosVersion) + common.SetIfUsed("str", fields, "display_active", gpu.DisplayActive) + common.SetIfUsed("str", fields, "display_mode", gpu.DisplayMode) + common.SetIfUsed("str", fields, "current_ecc", gpu.EccMode.CurrentEcc) common.SetIfUsed("int", fields, "fan_speed", gpu.FanSpeed) common.SetIfUsed("int", fields, "memory_total", gpu.FbMemoryUsage.Total) common.SetIfUsed("int", fields, "memory_used", gpu.FbMemoryUsage.Used) @@ -54,6 +59,8 @@ func Parse(acc telegraf.Accumulator, buf []byte) error { common.SetIfUsed("int", fields, "utilization_memory", gpu.Utilization.MemoryUtil) common.SetIfUsed("int", fields, "utilization_encoder", gpu.Utilization.EncoderUtil) common.SetIfUsed("int", fields, "utilization_decoder", gpu.Utilization.DecoderUtil) + common.SetIfUsed("int", fields, "utilization_jpeg", gpu.Utilization.JpegUtil) + common.SetIfUsed("int", fields, "utilization_ofa", gpu.Utilization.OfaUtil) common.SetIfUsed("int", fields, "pcie_link_gen_current", gpu.Pci.PciGpuLinkInfo.PcieGen.CurrentLinkGen) common.SetIfUsed("int", fields, "pcie_link_width_current", gpu.Pci.PciGpuLinkInfo.LinkWidths.CurrentLinkWidth) common.SetIfUsed("int", fields, "encoder_stats_session_count", gpu.EncoderStats.SessionCount)