fix: patched intel rdt to allow sudo (#9527)

Co-authored-by: Joe Guo <joe.guo@canonical.com>
This commit is contained in:
xavpaice 2021-10-12 14:31:33 +13:00 committed by GitHub
parent 7c16822030
commit 4321f8ae67
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 60 additions and 5 deletions

View File

@ -24,6 +24,29 @@ Note: pqos tool needs root privileges to work properly.
Metrics will be constantly reported from the following `pqos` commands within the given interval: Metrics will be constantly reported from the following `pqos` commands within the given interval:
#### If telegraf does not run as the root user
The `pqos` binary needs to run as root. If telegraf is running as a non-root user, you may enable sudo
to allow `pqos` to run correctly.
The `pqos` command requires root level access to run. There are two options to
overcome this if you run telegraf as a non-root user.
It is possible to update the pqos binary with setuid using `chmod u+s
/path/to/pqos`. This approach is simple and requires no modification to the
Telegraf configuration, however pqos is not a read-only tool and there are
security implications for making such a command setuid root.
Alternately, you may enable sudo to allow `pqos` to run correctly, as follows:
Add the following to your sudoers file (assumes telegraf runs as a user named `telegraf`):
```
telegraf ALL=(ALL) NOPASSWD:/usr/sbin/pqos -r --iface-os --mon-file-type=csv --mon-interval=*
```
If you wish to use sudo, you must also add `use_sudo = true` to the Telegraf
configuration (see below).
#### In case of cores monitoring: #### In case of cores monitoring:
``` ```
pqos -r --iface-os --mon-file-type=csv --mon-interval=INTERVAL --mon-core=all:[CORES]\;mbt:[CORES] pqos -r --iface-os --mon-file-type=csv --mon-interval=INTERVAL --mon-core=all:[CORES]\;mbt:[CORES]
@ -76,6 +99,10 @@ More about Intel RDT: https://www.intel.com/content/www/us/en/architecture-and-t
## Mandatory if cores aren't set and forbidden if cores are specified. ## Mandatory if cores aren't set and forbidden if cores are specified.
## e.g. ["qemu", "pmd"] ## e.g. ["qemu", "pmd"]
# processes = ["process"] # processes = ["process"]
## Specify if the pqos process should be called with sudo.
## Mandatory if the telegraf process does not run as root.
# use_sudo = false
``` ```
### Exposed metrics ### Exposed metrics

View File

@ -14,6 +14,7 @@ import (
"strconv" "strconv"
"strings" "strings"
"sync" "sync"
"syscall"
"time" "time"
"github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp"
@ -46,6 +47,7 @@ type IntelRDT struct {
Processes []string `toml:"processes"` Processes []string `toml:"processes"`
SamplingInterval int32 `toml:"sampling_interval"` SamplingInterval int32 `toml:"sampling_interval"`
ShortenedMetrics bool `toml:"shortened_metrics"` ShortenedMetrics bool `toml:"shortened_metrics"`
UseSudo bool `toml:"use_sudo"`
Log telegraf.Logger `toml:"-"` Log telegraf.Logger `toml:"-"`
Publisher Publisher `toml:"-"` Publisher Publisher `toml:"-"`
@ -97,6 +99,10 @@ func (r *IntelRDT) SampleConfig() string {
## Mandatory if cores aren't set and forbidden if cores are specified. ## Mandatory if cores aren't set and forbidden if cores are specified.
## e.g. ["qemu", "pmd"] ## e.g. ["qemu", "pmd"]
# processes = ["process"] # processes = ["process"]
## Specify if the pqos process should be called with sudo.
## Mandatory if the telegraf process does not run as root.
# use_sudo = false
` `
} }
@ -254,6 +260,12 @@ func (r *IntelRDT) readData(ctx context.Context, args []string, processesPIDsAss
cmd := exec.Command(r.PqosPath, append(args)...) cmd := exec.Command(r.PqosPath, append(args)...)
if r.UseSudo {
// run pqos with `/bin/sh -c "sudo /path/to/pqos ..."`
args = []string{"-c", fmt.Sprintf("sudo %s %s", r.PqosPath, strings.Replace(strings.Join(args, " "), ";", "\\;", -1))}
cmd = exec.Command("/bin/sh", args...)
}
cmdReader, err := cmd.StdoutPipe() cmdReader, err := cmd.StdoutPipe()
if err != nil { if err != nil {
r.errorChan <- err r.errorChan <- err
@ -334,14 +346,30 @@ func (r *IntelRDT) processOutput(cmdReader io.ReadCloser, processesPIDsAssociati
} }
func shutDownPqos(pqos *exec.Cmd) error { func shutDownPqos(pqos *exec.Cmd) error {
timeout := time.Second * 2
if pqos.Process != nil { if pqos.Process != nil {
err := pqos.Process.Signal(os.Interrupt) // try to send interrupt signal, ignore err for now
if err != nil { _ = pqos.Process.Signal(os.Interrupt)
err = pqos.Process.Kill()
if err != nil { // wait and constantly check if pqos is still running
return fmt.Errorf("failed to shut down pqos: %v", err) ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
for {
if err := pqos.Process.Signal(syscall.Signal(0)); err == os.ErrProcessDone {
return nil
} else if ctx.Err() != nil {
break
} }
} }
// if pqos is still running after some period, try to kill it
// this will send SIGTERM to pqos, and leave garbage in `/sys/fs/resctrl/mon_groups`
// fixed in https://github.com/intel/intel-cmt-cat/issues/197
err := pqos.Process.Kill()
if err != nil {
return fmt.Errorf("failed to shut down pqos: %v", err)
}
} }
return nil return nil
} }