feat(inputs.nvidia_smi): Implement probing (#16305)

This commit is contained in:
Landon Clipp 2025-01-27 22:47:49 -06:00 committed by GitHub
parent 0670c7f471
commit c2c9a09803
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 66 additions and 5 deletions

View File

@ -31,8 +31,9 @@ type NvidiaSMI struct {
Timeout config.Duration `toml:"timeout"` Timeout config.Duration `toml:"timeout"`
Log telegraf.Logger `toml:"-"` Log telegraf.Logger `toml:"-"`
ignorePlugin bool nvidiaSMIArgs []string
once sync.Once ignorePlugin bool
once sync.Once
} }
func (*NvidiaSMI) SampleConfig() string { func (*NvidiaSMI) SampleConfig() string {
@ -53,6 +54,15 @@ func (smi *NvidiaSMI) Start(telegraf.Accumulator) error {
func (*NvidiaSMI) Stop() {} func (*NvidiaSMI) Stop() {}
func (smi *NvidiaSMI) Probe() error {
// Construct and execute metrics query
_, err := internal.CombinedOutputTimeout(exec.Command(smi.BinPath, smi.nvidiaSMIArgs...), time.Duration(smi.Timeout))
if err != nil {
return fmt.Errorf("calling %q failed: %w", smi.BinPath, err)
}
return nil
}
// Gather implements the telegraf interface // Gather implements the telegraf interface
func (smi *NvidiaSMI) Gather(acc telegraf.Accumulator) error { func (smi *NvidiaSMI) Gather(acc telegraf.Accumulator) error {
if smi.ignorePlugin { if smi.ignorePlugin {
@ -60,7 +70,7 @@ func (smi *NvidiaSMI) Gather(acc telegraf.Accumulator) error {
} }
// Construct and execute metrics query // Construct and execute metrics query
data, err := internal.CombinedOutputTimeout(exec.Command(smi.BinPath, "-q", "-x"), time.Duration(smi.Timeout)) data, err := internal.CombinedOutputTimeout(exec.Command(smi.BinPath, smi.nvidiaSMIArgs...), time.Duration(smi.Timeout))
if err != nil { if err != nil {
return fmt.Errorf("calling %q failed: %w", smi.BinPath, err) return fmt.Errorf("calling %q failed: %w", smi.BinPath, err)
} }
@ -119,8 +129,9 @@ func (smi *NvidiaSMI) parse(acc telegraf.Accumulator, data []byte) error {
func init() { func init() {
inputs.Add("nvidia_smi", func() telegraf.Input { inputs.Add("nvidia_smi", func() telegraf.Input {
return &NvidiaSMI{ return &NvidiaSMI{
BinPath: "/usr/bin/nvidia-smi", BinPath: "/usr/bin/nvidia-smi",
Timeout: config.Duration(5 * time.Second), Timeout: config.Duration(5 * time.Second),
nvidiaSMIArgs: []string{"-q", "-x"},
} }
}) })
} }

View File

@ -4,16 +4,66 @@ import (
"errors" "errors"
"os" "os"
"path/filepath" "path/filepath"
"runtime"
"testing" "testing"
"time" "time"
"github.com/influxdata/telegraf" "github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/config"
"github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/models" "github.com/influxdata/telegraf/models"
"github.com/influxdata/telegraf/testutil" "github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
func TestProbe(t *testing.T) {
var binPath string
var nvidiaSMIArgsPrefix []string
if runtime.GOOS == "windows" {
binPath = `C:\Windows\System32\WindowsPowerShell\v1.0\powershell.exe`
nvidiaSMIArgsPrefix = []string{"-Command"}
} else {
binPath = "/bin/bash"
nvidiaSMIArgsPrefix = []string{"-c"}
}
for _, tt := range []struct {
name string
args string
expectError bool
}{
{
name: "probe success",
args: "exit 0",
expectError: false,
},
{
name: "probe error",
args: "exit 1",
expectError: true,
},
} {
t.Run(tt.name, func(t *testing.T) {
plugin := &NvidiaSMI{
BinPath: binPath,
nvidiaSMIArgs: append(nvidiaSMIArgsPrefix, tt.args),
Log: &testutil.Logger{},
Timeout: config.Duration(5 * time.Second),
}
model := models.NewRunningInput(plugin, &models.InputConfig{
Name: "nvidia_smi",
StartupErrorBehavior: "probe",
})
err := model.Probe()
if tt.expectError {
require.Error(t, err)
} else {
require.NoError(t, err)
}
})
}
}
func TestErrorBehaviorDefault(t *testing.T) { func TestErrorBehaviorDefault(t *testing.T) {
// make sure we can't find nvidia-smi in $PATH somewhere // make sure we can't find nvidia-smi in $PATH somewhere
os.Unsetenv("PATH") os.Unsetenv("PATH")