telegraf/internal/process/process.go

192 lines
3.8 KiB
Go
Raw Normal View History

2020-06-05 07:09:22 +08:00
package process
import (
"context"
2020-06-27 04:38:07 +08:00
"errors"
2020-06-05 07:09:22 +08:00
"fmt"
"io"
"io/ioutil"
"os/exec"
"sync"
2020-07-02 23:59:29 +08:00
"sync/atomic"
2020-06-05 07:09:22 +08:00
"time"
2020-06-27 04:38:07 +08:00
"github.com/influxdata/telegraf"
2020-06-05 07:09:22 +08:00
)
// Process is a long-running process manager that will restart processes if they stop.
type Process struct {
Cmd *exec.Cmd
Stdin io.WriteCloser
Stdout io.ReadCloser
Stderr io.ReadCloser
ReadStdoutFn func(io.Reader)
ReadStderrFn func(io.Reader)
RestartDelay time.Duration
2020-06-27 04:38:07 +08:00
Log telegraf.Logger
2020-06-05 07:09:22 +08:00
2020-07-02 23:59:29 +08:00
name string
args []string
pid int32
2020-06-05 07:09:22 +08:00
cancel context.CancelFunc
mainLoopWg sync.WaitGroup
}
// New creates a new process wrapper
func New(command []string) (*Process, error) {
2020-06-27 04:38:07 +08:00
if len(command) == 0 {
return nil, errors.New("no command")
}
2020-06-05 07:09:22 +08:00
p := &Process{
RestartDelay: 5 * time.Second,
2020-07-02 23:59:29 +08:00
name: command[0],
args: []string{},
2020-06-05 07:09:22 +08:00
}
2020-07-02 23:59:29 +08:00
if len(command) > 1 {
p.args = command[1:]
2020-06-05 07:09:22 +08:00
}
return p, nil
}
2020-07-02 23:59:29 +08:00
// Start the process. A &Process can only be started once. It will restart itself
// as necessary.
2020-06-05 07:09:22 +08:00
func (p *Process) Start() error {
ctx, cancel := context.WithCancel(context.Background())
p.cancel = cancel
if err := p.cmdStart(); err != nil {
return err
}
2020-06-27 04:38:07 +08:00
p.mainLoopWg.Add(1)
2020-06-05 07:09:22 +08:00
go func() {
if err := p.cmdLoop(ctx); err != nil {
2020-06-27 04:38:07 +08:00
p.Log.Errorf("Process quit with message: %v", err)
2020-06-05 07:09:22 +08:00
}
p.mainLoopWg.Done()
}()
return nil
}
2020-07-02 23:59:29 +08:00
// Stop is called when the process isn't needed anymore
2020-06-05 07:09:22 +08:00
func (p *Process) Stop() {
if p.cancel != nil {
2020-07-02 23:59:29 +08:00
// signal our intent to shutdown and not restart the process
2020-06-05 07:09:22 +08:00
p.cancel()
}
2020-07-02 23:59:29 +08:00
// close stdin so the app can shut down gracefully.
p.Stdin.Close()
2020-06-05 07:09:22 +08:00
p.mainLoopWg.Wait()
}
func (p *Process) cmdStart() error {
2020-07-02 23:59:29 +08:00
p.Cmd = exec.Command(p.name, p.args...)
var err error
p.Stdin, err = p.Cmd.StdinPipe()
if err != nil {
return fmt.Errorf("error opening stdin pipe: %w", err)
}
p.Stdout, err = p.Cmd.StdoutPipe()
if err != nil {
return fmt.Errorf("error opening stdout pipe: %w", err)
}
p.Stderr, err = p.Cmd.StderrPipe()
if err != nil {
return fmt.Errorf("error opening stderr pipe: %w", err)
}
p.Log.Infof("Starting process: %s %s", p.name, p.args)
2020-06-05 07:09:22 +08:00
if err := p.Cmd.Start(); err != nil {
2020-06-27 04:38:07 +08:00
return fmt.Errorf("error starting process: %s", err)
2020-06-05 07:09:22 +08:00
}
2020-07-02 23:59:29 +08:00
atomic.StoreInt32(&p.pid, int32(p.Cmd.Process.Pid))
2020-06-05 07:09:22 +08:00
return nil
}
2020-07-02 23:59:29 +08:00
func (p *Process) Pid() int {
pid := atomic.LoadInt32(&p.pid)
return int(pid)
}
2020-06-05 07:09:22 +08:00
// cmdLoop watches an already running process, restarting it when appropriate.
func (p *Process) cmdLoop(ctx context.Context) error {
for {
2020-07-02 23:59:29 +08:00
err := p.cmdWait(ctx)
2020-06-05 07:09:22 +08:00
if isQuitting(ctx) {
2020-06-27 04:38:07 +08:00
p.Log.Infof("Process %s shut down", p.Cmd.Path)
2020-06-05 07:09:22 +08:00
return nil
}
2020-06-27 04:38:07 +08:00
p.Log.Errorf("Process %s exited: %v", p.Cmd.Path, err)
p.Log.Infof("Restarting in %s...", time.Duration(p.RestartDelay))
2020-06-05 07:09:22 +08:00
select {
case <-ctx.Done():
return nil
case <-time.After(time.Duration(p.RestartDelay)):
// Continue the loop and restart the process
if err := p.cmdStart(); err != nil {
return err
}
}
}
}
2020-07-02 23:59:29 +08:00
// cmdWait waits for the process to finish.
func (p *Process) cmdWait(ctx context.Context) error {
2020-06-05 07:09:22 +08:00
var wg sync.WaitGroup
if p.ReadStdoutFn == nil {
p.ReadStdoutFn = defaultReadPipe
}
if p.ReadStderrFn == nil {
p.ReadStderrFn = defaultReadPipe
}
2020-07-02 23:59:29 +08:00
processCtx, processCancel := context.WithCancel(context.Background())
defer processCancel()
2020-06-05 07:09:22 +08:00
wg.Add(1)
go func() {
p.ReadStdoutFn(p.Stdout)
wg.Done()
}()
wg.Add(1)
go func() {
p.ReadStderrFn(p.Stderr)
wg.Done()
}()
2020-07-02 23:59:29 +08:00
wg.Add(1)
go func() {
select {
case <-ctx.Done():
gracefulStop(processCtx, p.Cmd, 5*time.Second)
case <-processCtx.Done():
}
wg.Done()
}()
err := p.Cmd.Wait()
processCancel()
2020-06-05 07:09:22 +08:00
wg.Wait()
2020-07-02 23:59:29 +08:00
return err
2020-06-05 07:09:22 +08:00
}
func isQuitting(ctx context.Context) bool {
return ctx.Err() != nil
}
func defaultReadPipe(r io.Reader) {
io.Copy(ioutil.Discard, r)
}