fix: patched intel rdt to allow sudo (#9527)
Co-authored-by: Joe Guo <joe.guo@canonical.com>
This commit is contained in:
parent
7c16822030
commit
4321f8ae67
|
|
@ -24,6 +24,29 @@ Note: pqos tool needs root privileges to work properly.
|
|||
|
||||
Metrics will be constantly reported from the following `pqos` commands within the given interval:
|
||||
|
||||
#### If telegraf does not run as the root user
|
||||
|
||||
The `pqos` binary needs to run as root. If telegraf is running as a non-root user, you may enable sudo
|
||||
to allow `pqos` to run correctly.
|
||||
The `pqos` command requires root level access to run. There are two options to
|
||||
overcome this if you run telegraf as a non-root user.
|
||||
|
||||
It is possible to update the pqos binary with setuid using `chmod u+s
|
||||
/path/to/pqos`. This approach is simple and requires no modification to the
|
||||
Telegraf configuration, however pqos is not a read-only tool and there are
|
||||
security implications for making such a command setuid root.
|
||||
|
||||
Alternately, you may enable sudo to allow `pqos` to run correctly, as follows:
|
||||
|
||||
Add the following to your sudoers file (assumes telegraf runs as a user named `telegraf`):
|
||||
|
||||
```
|
||||
telegraf ALL=(ALL) NOPASSWD:/usr/sbin/pqos -r --iface-os --mon-file-type=csv --mon-interval=*
|
||||
```
|
||||
|
||||
If you wish to use sudo, you must also add `use_sudo = true` to the Telegraf
|
||||
configuration (see below).
|
||||
|
||||
#### In case of cores monitoring:
|
||||
```
|
||||
pqos -r --iface-os --mon-file-type=csv --mon-interval=INTERVAL --mon-core=all:[CORES]\;mbt:[CORES]
|
||||
|
|
@ -76,6 +99,10 @@ More about Intel RDT: https://www.intel.com/content/www/us/en/architecture-and-t
|
|||
## Mandatory if cores aren't set and forbidden if cores are specified.
|
||||
## e.g. ["qemu", "pmd"]
|
||||
# processes = ["process"]
|
||||
|
||||
## Specify if the pqos process should be called with sudo.
|
||||
## Mandatory if the telegraf process does not run as root.
|
||||
# use_sudo = false
|
||||
```
|
||||
|
||||
### Exposed metrics
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ import (
|
|||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
|
|
@ -46,6 +47,7 @@ type IntelRDT struct {
|
|||
Processes []string `toml:"processes"`
|
||||
SamplingInterval int32 `toml:"sampling_interval"`
|
||||
ShortenedMetrics bool `toml:"shortened_metrics"`
|
||||
UseSudo bool `toml:"use_sudo"`
|
||||
|
||||
Log telegraf.Logger `toml:"-"`
|
||||
Publisher Publisher `toml:"-"`
|
||||
|
|
@ -97,6 +99,10 @@ func (r *IntelRDT) SampleConfig() string {
|
|||
## Mandatory if cores aren't set and forbidden if cores are specified.
|
||||
## e.g. ["qemu", "pmd"]
|
||||
# processes = ["process"]
|
||||
|
||||
## Specify if the pqos process should be called with sudo.
|
||||
## Mandatory if the telegraf process does not run as root.
|
||||
# use_sudo = false
|
||||
`
|
||||
}
|
||||
|
||||
|
|
@ -254,6 +260,12 @@ func (r *IntelRDT) readData(ctx context.Context, args []string, processesPIDsAss
|
|||
|
||||
cmd := exec.Command(r.PqosPath, append(args)...)
|
||||
|
||||
if r.UseSudo {
|
||||
// run pqos with `/bin/sh -c "sudo /path/to/pqos ..."`
|
||||
args = []string{"-c", fmt.Sprintf("sudo %s %s", r.PqosPath, strings.Replace(strings.Join(args, " "), ";", "\\;", -1))}
|
||||
cmd = exec.Command("/bin/sh", args...)
|
||||
}
|
||||
|
||||
cmdReader, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
r.errorChan <- err
|
||||
|
|
@ -334,14 +346,30 @@ func (r *IntelRDT) processOutput(cmdReader io.ReadCloser, processesPIDsAssociati
|
|||
}
|
||||
|
||||
func shutDownPqos(pqos *exec.Cmd) error {
|
||||
timeout := time.Second * 2
|
||||
|
||||
if pqos.Process != nil {
|
||||
err := pqos.Process.Signal(os.Interrupt)
|
||||
if err != nil {
|
||||
err = pqos.Process.Kill()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to shut down pqos: %v", err)
|
||||
// try to send interrupt signal, ignore err for now
|
||||
_ = pqos.Process.Signal(os.Interrupt)
|
||||
|
||||
// wait and constantly check if pqos is still running
|
||||
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||
defer cancel()
|
||||
for {
|
||||
if err := pqos.Process.Signal(syscall.Signal(0)); err == os.ErrProcessDone {
|
||||
return nil
|
||||
} else if ctx.Err() != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// if pqos is still running after some period, try to kill it
|
||||
// this will send SIGTERM to pqos, and leave garbage in `/sys/fs/resctrl/mon_groups`
|
||||
// fixed in https://github.com/intel/intel-cmt-cat/issues/197
|
||||
err := pqos.Process.Kill()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to shut down pqos: %v", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue