chore(windows): Rework Windows service handling (#15372)
This commit is contained in:
parent
0e636b729a
commit
274fbd4b62
|
|
@ -0,0 +1,203 @@
|
|||
//go:build windows
|
||||
|
||||
// Command handling for configuration "service" command
|
||||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
"golang.org/x/sys/windows"
|
||||
)
|
||||
|
||||
func cliFlags() []cli.Flag {
|
||||
return []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "service",
|
||||
Usage: "operate on the service (windows only)",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "service-name",
|
||||
Value: "telegraf",
|
||||
Usage: "service name (windows only)",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "service-display-name",
|
||||
Value: "Telegraf Data Collector Service",
|
||||
Usage: "service display name (windows only)",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "service-restart-delay",
|
||||
Value: "5m",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "service-auto-restart",
|
||||
Usage: "auto restart service on failure (windows only)",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "console",
|
||||
Usage: "run as console application (windows only)",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func getServiceCommands(outputBuffer io.Writer) []*cli.Command {
|
||||
return []*cli.Command{
|
||||
{
|
||||
Name: "service",
|
||||
Usage: "commands for operate on the Windows service",
|
||||
Flags: nil,
|
||||
Subcommands: []*cli.Command{
|
||||
{
|
||||
Name: "install",
|
||||
Usage: "install Telegraf as a Windows service",
|
||||
Description: `
|
||||
The 'install' command with create a Windows service for automatically starting
|
||||
Telegraf with the specified configuration and service parameters. If no
|
||||
configuration(s) is specified the service will use the file in
|
||||
"C:\Program Files\Telegraf\telegraf.conf".
|
||||
|
||||
To install Telegraf as a service use
|
||||
|
||||
> telegraf service install
|
||||
|
||||
In case you are planning to start multiple Telegraf instances as a service,
|
||||
you must use distrinctive service-names for each instance. To install two
|
||||
services with different configurations use
|
||||
|
||||
> telegraf --config "C:\Program Files\Telegraf\telegraf-machine.conf" --service-name telegraf-machine service install
|
||||
> telegraf --config "C:\Program Files\Telegraf\telegraf-service.conf" --service-name telegraf-service service install
|
||||
`,
|
||||
Flags: []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "display-name",
|
||||
Value: "Telegraf Data Collector Service",
|
||||
Usage: "service name as displayed in the service manager",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "restart-delay",
|
||||
Value: "5m",
|
||||
Usage: "duration for delaying the service restart on failure",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "auto-restart",
|
||||
Usage: "enable automatic service restart on failure",
|
||||
},
|
||||
},
|
||||
Action: func(cCtx *cli.Context) error {
|
||||
cfg := &serviceConfig{
|
||||
displayName: cCtx.String("display-name"),
|
||||
restartDelay: cCtx.String("restart-delay"),
|
||||
autoRestart: cCtx.Bool("auto-restart"),
|
||||
|
||||
configs: cCtx.StringSlice("config"),
|
||||
configDirs: cCtx.StringSlice("config-directory"),
|
||||
}
|
||||
name := cCtx.String("service-name")
|
||||
if err := installService(name, cfg); err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Fprintf(outputBuffer, "Successfully installed service %q\n", name)
|
||||
return nil
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "uninstall",
|
||||
Usage: "remove the Telegraf Windows service",
|
||||
Description: `
|
||||
The 'uninstall' command removes the Telegraf service with the given name. To
|
||||
remove a service use
|
||||
|
||||
> telegraf service uninstall
|
||||
|
||||
In case you specified a custom service-name during install use
|
||||
|
||||
> telegraf --service-name telegraf-machine service uninstall
|
||||
`,
|
||||
Action: func(cCtx *cli.Context) error {
|
||||
name := cCtx.String("service-name")
|
||||
if err := uninstallService(name); err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Fprintf(outputBuffer, "Successfully uninstalled service %q\n", name)
|
||||
return nil
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "start",
|
||||
Usage: "start the Telegraf Windows service",
|
||||
Description: `
|
||||
The 'start' command triggers the start of the Windows service with the given
|
||||
name. To start the service either use the Windows service manager or run
|
||||
|
||||
> telegraf service start
|
||||
|
||||
In case you specified a custom service-name during install use
|
||||
|
||||
> telegraf --service-name telegraf-machine service start
|
||||
`,
|
||||
Action: func(cCtx *cli.Context) error {
|
||||
name := cCtx.String("service-name")
|
||||
if err := startService(name); err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Fprintf(outputBuffer, "Successfully started service %q\n", name)
|
||||
return nil
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "stop",
|
||||
Usage: "stop the Telegraf Windows service",
|
||||
Description: `
|
||||
The 'stop' command triggers the stop of the Windows service with the given
|
||||
name and will wait until the service is actually stopped. To stop the service
|
||||
either use the Windows service manager or run
|
||||
|
||||
> telegraf service stop
|
||||
|
||||
In case you specified a custom service-name during install use
|
||||
|
||||
> telegraf --service-name telegraf-machine service stop
|
||||
`,
|
||||
Action: func(cCtx *cli.Context) error {
|
||||
name := cCtx.String("service-name")
|
||||
if err := stopService(name); err != nil {
|
||||
if errors.Is(err, windows.ERROR_SERVICE_NOT_ACTIVE) {
|
||||
fmt.Fprintf(outputBuffer, "Service %q not started\n", name)
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
fmt.Fprintf(outputBuffer, "Successfully stopped service %q\n", name)
|
||||
return nil
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "status",
|
||||
Usage: "query the Telegraf Windows service status",
|
||||
Description: `
|
||||
The 'status' command queries the current state of the Windows service with the
|
||||
given name. To query the service either check the Windows service manager or run
|
||||
|
||||
> telegraf service status
|
||||
|
||||
In case you specified a custom service-name during install use
|
||||
|
||||
> telegraf --service-name telegraf-machine service status
|
||||
`,
|
||||
Action: func(cCtx *cli.Context) error {
|
||||
name := cCtx.String("service-name")
|
||||
status, err := queryService(name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Fprintf(outputBuffer, "Service %q is in %q state\n", name, status)
|
||||
return nil
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
//go:build !windows
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"io"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
func cliFlags() []cli.Flag {
|
||||
return []cli.Flag{}
|
||||
}
|
||||
|
||||
func getServiceCommands(io.Writer) []*cli.Command {
|
||||
return nil
|
||||
}
|
||||
|
|
@ -256,6 +256,7 @@ func runApp(args []string, outputBuffer io.Writer, pprof Server, c TelegrafConfi
|
|||
getSecretStoreCommands(m)...,
|
||||
)
|
||||
commands = append(commands, getPluginCommands(outputBuffer)...)
|
||||
commands = append(commands, getServiceCommands(outputBuffer)...)
|
||||
|
||||
app := &cli.App{
|
||||
Name: "Telegraf",
|
||||
|
|
|
|||
|
|
@ -77,6 +77,8 @@ type Telegraf struct {
|
|||
configFiles []string
|
||||
secretstoreFilters []string
|
||||
|
||||
cfg *config.Config
|
||||
|
||||
GlobalFlags
|
||||
WindowFlags
|
||||
}
|
||||
|
|
@ -134,11 +136,6 @@ func (t *Telegraf) GetSecretStore(id string) (telegraf.SecretStore, error) {
|
|||
|
||||
func (t *Telegraf) reloadLoop() error {
|
||||
reloadConfig := false
|
||||
cfg, err := t.loadConfiguration()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
reload := make(chan bool, 1)
|
||||
reload <- true
|
||||
for <-reload {
|
||||
|
|
@ -189,7 +186,7 @@ func (t *Telegraf) reloadLoop() error {
|
|||
}
|
||||
}()
|
||||
|
||||
err := t.runAgent(ctx, cfg, reloadConfig)
|
||||
err := t.runAgent(ctx, reloadConfig)
|
||||
if err != nil && !errors.Is(err, context.Canceled) {
|
||||
return fmt.Errorf("[telegraf] Error running agent: %w", err)
|
||||
}
|
||||
|
|
@ -314,7 +311,8 @@ func (t *Telegraf) loadConfiguration() (*config.Config, error) {
|
|||
return c, nil
|
||||
}
|
||||
|
||||
func (t *Telegraf) runAgent(ctx context.Context, c *config.Config, reloadConfig bool) error {
|
||||
func (t *Telegraf) runAgent(ctx context.Context, reloadConfig bool) error {
|
||||
c := t.cfg
|
||||
var err error
|
||||
if reloadConfig {
|
||||
if c, err = t.loadConfiguration(); err != nil {
|
||||
|
|
|
|||
|
|
@ -5,17 +5,18 @@ package main
|
|||
import (
|
||||
"log"
|
||||
"syscall"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
func (t *Telegraf) Run() error {
|
||||
stop = make(chan struct{})
|
||||
return t.reloadLoop()
|
||||
}
|
||||
defer close(stop)
|
||||
|
||||
func cliFlags() []cli.Flag {
|
||||
return []cli.Flag{}
|
||||
cfg, err := t.loadConfiguration()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
t.cfg = cfg
|
||||
return t.reloadLoop()
|
||||
}
|
||||
|
||||
func getLockedMemoryLimit() uint64 {
|
||||
|
|
|
|||
|
|
@ -6,46 +6,20 @@ package main
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/kardianos/service"
|
||||
"github.com/urfave/cli/v2"
|
||||
"golang.org/x/sys/windows"
|
||||
"golang.org/x/sys/windows/svc"
|
||||
"golang.org/x/sys/windows/svc/eventlog"
|
||||
"golang.org/x/sys/windows/svc/mgr"
|
||||
|
||||
"github.com/influxdata/telegraf/logger"
|
||||
)
|
||||
|
||||
func cliFlags() []cli.Flag {
|
||||
return []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "service",
|
||||
Usage: "operate on the service (windows only)",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "service-name",
|
||||
Value: "telegraf",
|
||||
Usage: "service name (windows only)",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "service-display-name",
|
||||
Value: "Telegraf Data Collector Service",
|
||||
Usage: "service display name (windows only)",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "service-restart-delay",
|
||||
Value: "5m",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "service-auto-restart",
|
||||
Usage: "auto restart service on failure (windows only)",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "console",
|
||||
Usage: "run as console application (windows only)",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func getLockedMemoryLimit() uint64 {
|
||||
handle := windows.CurrentProcess()
|
||||
|
||||
|
|
@ -58,114 +32,384 @@ func getLockedMemoryLimit() uint64 {
|
|||
|
||||
func (t *Telegraf) Run() error {
|
||||
// Register the eventlog logging target for windows.
|
||||
err := logger.RegisterEventLogger(t.serviceName)
|
||||
if err != nil {
|
||||
if err := logger.RegisterEventLogger(t.serviceName); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !t.windowsRunAsService() {
|
||||
stop = make(chan struct{})
|
||||
return t.reloadLoop()
|
||||
// Process the service commands
|
||||
if t.service != "" {
|
||||
fmt.Println("The use of --service is deprecated, please use the 'service' command instead!")
|
||||
switch t.service {
|
||||
case "install":
|
||||
cfg := &serviceConfig{
|
||||
displayName: t.serviceDisplayName,
|
||||
restartDelay: t.serviceRestartDelay,
|
||||
autoRestart: t.serviceAutoRestart,
|
||||
configs: t.config,
|
||||
configDirs: t.configDir,
|
||||
watchConfig: t.watchConfig,
|
||||
}
|
||||
if err := installService(t.serviceName, cfg); err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Printf("Successfully installed service %q\n", t.serviceName)
|
||||
case "uninstall":
|
||||
if err := uninstallService(t.serviceName); err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Printf("Successfully uninstalled service %q\n", t.serviceName)
|
||||
case "start":
|
||||
if err := startService(t.serviceName); err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Printf("Successfully started service %q\n", t.serviceName)
|
||||
case "stop":
|
||||
if err := stopService(t.serviceName); err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Printf("Successfully stopped service %q\n", t.serviceName)
|
||||
case "status":
|
||||
status, err := queryService(t.serviceName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Printf("Service %q is in %q state\n", t.serviceName, status)
|
||||
default:
|
||||
return fmt.Errorf("invalid service command %q", t.service)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
return t.runAsWindowsService()
|
||||
// Determine if Telegraf is started as a Windows service.
|
||||
isWinService, err := svc.IsWindowsService()
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot determine if run as Windows service: %w", err)
|
||||
}
|
||||
if !t.console && isWinService {
|
||||
return svc.Run(t.serviceName, t)
|
||||
}
|
||||
|
||||
// Load the configuration file(s)
|
||||
cfg, err := t.loadConfiguration()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
t.cfg = cfg
|
||||
|
||||
stop = make(chan struct{})
|
||||
defer close(stop)
|
||||
return t.reloadLoop()
|
||||
}
|
||||
|
||||
type program struct {
|
||||
*Telegraf
|
||||
}
|
||||
|
||||
func (p *program) Start(_ service.Service) error {
|
||||
go func() {
|
||||
stop = make(chan struct{})
|
||||
err := p.reloadLoop()
|
||||
if err != nil {
|
||||
fmt.Printf("E! %v\n", err)
|
||||
}
|
||||
close(stop)
|
||||
// Handler for the Windows service framework
|
||||
func (t *Telegraf) Execute(_ []string, r <-chan svc.ChangeRequest, changes chan<- svc.Status) (bool, uint32) {
|
||||
// Mark the status as startup pending until we are fully started
|
||||
const accepted = svc.AcceptStop | svc.AcceptShutdown
|
||||
changes <- svc.Status{State: svc.StartPending}
|
||||
defer func() {
|
||||
changes <- svc.Status{State: svc.Stopped}
|
||||
}()
|
||||
return nil
|
||||
|
||||
// Create a eventlog logger for all service related things
|
||||
svclog, err := eventlog.Open(t.serviceName)
|
||||
if err != nil {
|
||||
log.Printf("E! Initializing the service logger failed: %s", err)
|
||||
return true, 1
|
||||
}
|
||||
defer svclog.Close()
|
||||
|
||||
// Load the configuration file(s)
|
||||
cfg, err := t.loadConfiguration()
|
||||
if err != nil {
|
||||
if lerr := svclog.Error(100, err.Error()); lerr != nil {
|
||||
log.Printf("E! Logging error %q failed: %s", err, lerr)
|
||||
}
|
||||
return true, 2
|
||||
}
|
||||
t.cfg = cfg
|
||||
|
||||
// Actually start the processing loop in the background to be able to
|
||||
// react to service change requests
|
||||
loopErr := make(chan error)
|
||||
stop = make(chan struct{})
|
||||
defer close(loopErr)
|
||||
defer close(stop)
|
||||
go func() {
|
||||
loopErr <- t.reloadLoop()
|
||||
}()
|
||||
changes <- svc.Status{State: svc.Running, Accepts: accepted}
|
||||
|
||||
for {
|
||||
select {
|
||||
case err := <-loopErr:
|
||||
if err != nil {
|
||||
if lerr := svclog.Error(100, err.Error()); lerr != nil {
|
||||
log.Printf("E! Logging error %q failed: %s", err, lerr)
|
||||
}
|
||||
return true, 3
|
||||
}
|
||||
return false, 0
|
||||
case c := <-r:
|
||||
switch c.Cmd {
|
||||
case svc.Interrogate:
|
||||
changes <- c.CurrentStatus
|
||||
// Testing deadlock from https://code.google.com/p/winsvc/issues/detail?id=4
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
changes <- c.CurrentStatus
|
||||
case svc.Stop, svc.Shutdown:
|
||||
changes <- svc.Status{State: svc.StopPending}
|
||||
var empty struct{}
|
||||
stop <- empty // signal reloadLoop to finish (context cancel)
|
||||
default:
|
||||
msg := fmt.Sprintf("Unexpected control request #%d", c)
|
||||
if lerr := svclog.Error(100, msg); lerr != nil {
|
||||
log.Printf("E! Logging error %q failed: %s", msg, lerr)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (p *program) Stop(_ service.Service) error {
|
||||
var empty struct{}
|
||||
stop <- empty // signal reloadLoop to finish (context cancel)
|
||||
<-stop // wait for reloadLoop to finish and close channel
|
||||
return nil
|
||||
type serviceConfig struct {
|
||||
displayName string
|
||||
restartDelay string
|
||||
autoRestart bool
|
||||
|
||||
// Telegraf parameters
|
||||
configs []string
|
||||
configDirs []string
|
||||
watchConfig string
|
||||
}
|
||||
|
||||
func (t *Telegraf) runAsWindowsService() error {
|
||||
func installService(name string, cfg *serviceConfig) error {
|
||||
// Determine the executable to use in the service
|
||||
executable, err := os.Executable()
|
||||
if err != nil {
|
||||
return fmt.Errorf("determining executable failed: %w", err)
|
||||
}
|
||||
|
||||
// Determine the program files directory name
|
||||
programFiles := os.Getenv("ProgramFiles")
|
||||
if programFiles == "" { // Should never happen
|
||||
programFiles = "C:\\Program Files"
|
||||
}
|
||||
svcConfig := &service.Config{
|
||||
Name: t.serviceName,
|
||||
DisplayName: t.serviceDisplayName,
|
||||
Description: "Collects data using a series of plugins and publishes it to " +
|
||||
"another series of plugins.",
|
||||
Arguments: []string{"--config", programFiles + "\\Telegraf\\telegraf.conf"},
|
||||
|
||||
// Collect the command line arguments
|
||||
args := make([]string, 0, 2*(len(cfg.configs)+len(cfg.configDirs))+2)
|
||||
for _, fn := range cfg.configs {
|
||||
args = append(args, "--config", fn)
|
||||
}
|
||||
for _, dn := range cfg.configDirs {
|
||||
args = append(args, "--config-directory", dn)
|
||||
}
|
||||
if len(args) == 0 {
|
||||
args = append(args, "--config", filepath.Join(programFiles, "Telegraf", "telegraf.conf"))
|
||||
}
|
||||
if cfg.watchConfig != "" {
|
||||
args = append(args, "--watch-config", cfg.watchConfig)
|
||||
}
|
||||
// Pass the service name to the command line, to have a custom name when relaunching as a service
|
||||
args = append(args, "--service-name", name)
|
||||
|
||||
// Create a configuration for the service
|
||||
svccfg := mgr.Config{
|
||||
DisplayName: cfg.displayName,
|
||||
Description: "Collects, processes and publishes data using a series of plugins.",
|
||||
StartType: mgr.StartAutomatic,
|
||||
ServiceType: windows.SERVICE_WIN32_OWN_PROCESS,
|
||||
}
|
||||
|
||||
prg := &program{
|
||||
Telegraf: t,
|
||||
}
|
||||
s, err := service.New(prg, svcConfig)
|
||||
// Connect to the service manager and try to install the service if it
|
||||
// doesn't exist. Fail on existing service and stop installation.
|
||||
svcmgr, err := mgr.Connect()
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("connecting to service manager failed: %w", err)
|
||||
}
|
||||
// Handle the --service flag here to prevent any issues with tooling that
|
||||
// may not have an interactive session, e.g. installing from Ansible.
|
||||
if t.service != "" {
|
||||
if len(t.config) > 0 {
|
||||
svcConfig.Arguments = []string{}
|
||||
}
|
||||
for _, fConfig := range t.config {
|
||||
svcConfig.Arguments = append(svcConfig.Arguments, "--config", fConfig)
|
||||
}
|
||||
defer svcmgr.Disconnect()
|
||||
|
||||
for _, fConfigDirectory := range t.configDir {
|
||||
svcConfig.Arguments = append(svcConfig.Arguments, "--config-directory", fConfigDirectory)
|
||||
}
|
||||
if service, err := svcmgr.OpenService(name); err == nil {
|
||||
service.Close()
|
||||
return fmt.Errorf("service %q is already installed", name)
|
||||
}
|
||||
|
||||
if t.watchConfig != "" {
|
||||
svcConfig.Arguments = append(svcConfig.Arguments, "--watch-config", t.watchConfig)
|
||||
}
|
||||
service, err := svcmgr.CreateService(name, executable, svccfg, args...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating service failed: %w", err)
|
||||
}
|
||||
defer service.Close()
|
||||
|
||||
//set servicename to service cmd line, to have a custom name after relaunch as a service
|
||||
svcConfig.Arguments = append(svcConfig.Arguments, "--service-name", t.serviceName)
|
||||
|
||||
if t.serviceAutoRestart {
|
||||
svcConfig.Option = service.KeyValue{"OnFailure": "restart", "OnFailureDelayDuration": t.serviceRestartDelay}
|
||||
}
|
||||
|
||||
err := service.Control(s, t.service)
|
||||
// Set the recovery strategy to restart with a fixed period of 10 seconds
|
||||
// and the user specified delay if requested
|
||||
if cfg.autoRestart {
|
||||
delay, err := time.ParseDuration(cfg.restartDelay)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("cannot parse restart delay %q: %w", cfg.restartDelay, err)
|
||||
}
|
||||
} else {
|
||||
err = logger.SetupLogging(logger.Config{LogTarget: "eventlog"})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = s.Run()
|
||||
if err != nil {
|
||||
recovery := []mgr.RecoveryAction{{Type: mgr.ServiceRestart, Delay: delay}}
|
||||
if err := service.SetRecoveryActions(recovery, 10); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Register the event as a source of eventlog events
|
||||
events := uint32(eventlog.Error | eventlog.Warning | eventlog.Info)
|
||||
if err := eventlog.InstallAsEventCreate(name, events); err != nil {
|
||||
//nolint:errcheck // Try to remove the service on best effort basis as we cannot handle any error here
|
||||
service.Delete()
|
||||
return fmt.Errorf("setting up eventlog source failed: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Return true if Telegraf should create a Windows service.
|
||||
func (t *Telegraf) windowsRunAsService() bool {
|
||||
if t.service != "" {
|
||||
return true
|
||||
func uninstallService(name string) error {
|
||||
// Connect to the service manager and try to open the service. In case the
|
||||
// service is not installed, return with the corresponding error.
|
||||
svcmgr, err := mgr.Connect()
|
||||
if err != nil {
|
||||
return fmt.Errorf("connecting to service manager failed: %w", err)
|
||||
}
|
||||
defer svcmgr.Disconnect()
|
||||
|
||||
service, err := svcmgr.OpenService(name)
|
||||
if err != nil {
|
||||
return fmt.Errorf("opening service failed: %w", err)
|
||||
}
|
||||
defer service.Close()
|
||||
|
||||
// Uninstall the service and remove the eventlog source
|
||||
if err := service.Delete(); err != nil {
|
||||
return fmt.Errorf("uninstalling service failed: %w", err)
|
||||
}
|
||||
|
||||
if t.console {
|
||||
return false
|
||||
if err := eventlog.Remove(name); err != nil {
|
||||
return fmt.Errorf("removing eventlog source failed: %w", err)
|
||||
}
|
||||
|
||||
return !service.Interactive()
|
||||
return nil
|
||||
}
|
||||
|
||||
func startService(name string) error {
|
||||
nameUTF16, err := syscall.UTF16PtrFromString(name)
|
||||
if err != nil {
|
||||
return fmt.Errorf("conversion of service name %q to UTF16 failed: %w", name, err)
|
||||
}
|
||||
|
||||
// Open the service manager and service with the least privileges required to start the service
|
||||
mgrhandle, err := windows.OpenSCManager(nil, nil, windows.SC_MANAGER_CONNECT|windows.SC_MANAGER_ENUMERATE_SERVICE)
|
||||
if err != nil {
|
||||
return fmt.Errorf("opening service manager failed: %w", err)
|
||||
}
|
||||
defer windows.CloseServiceHandle(mgrhandle)
|
||||
|
||||
svchandle, err := windows.OpenService(mgrhandle, nameUTF16, windows.SERVICE_QUERY_STATUS|windows.SERVICE_START)
|
||||
if err != nil {
|
||||
return fmt.Errorf("opening service failed: %w", err)
|
||||
}
|
||||
service := &mgr.Service{Handle: svchandle, Name: name}
|
||||
defer service.Close()
|
||||
|
||||
// Check if the service is actually stopped
|
||||
status, err := service.Query()
|
||||
if err != nil {
|
||||
return fmt.Errorf("querying service state failed: %w", err)
|
||||
}
|
||||
if status.State != svc.Stopped {
|
||||
return fmt.Errorf("service is not stopped but in state %q", stateDescription(status.State))
|
||||
}
|
||||
|
||||
return service.Start()
|
||||
}
|
||||
|
||||
func stopService(name string) error {
|
||||
nameUTF16, err := syscall.UTF16PtrFromString(name)
|
||||
if err != nil {
|
||||
return fmt.Errorf("conversion of service name %q to UTF16 failed: %w", name, err)
|
||||
}
|
||||
|
||||
// Open the service manager and service with the least privileges required to start the service
|
||||
mgrhandle, err := windows.OpenSCManager(nil, nil, windows.SC_MANAGER_CONNECT|windows.SC_MANAGER_ENUMERATE_SERVICE)
|
||||
if err != nil {
|
||||
return fmt.Errorf("opening service manager failed: %w", err)
|
||||
}
|
||||
defer windows.CloseServiceHandle(mgrhandle)
|
||||
|
||||
svchandle, err := windows.OpenService(mgrhandle, nameUTF16, windows.SERVICE_QUERY_STATUS|windows.SERVICE_STOP)
|
||||
if err != nil {
|
||||
return fmt.Errorf("opening service failed: %w", err)
|
||||
}
|
||||
service := &mgr.Service{Handle: svchandle, Name: name}
|
||||
defer service.Close()
|
||||
|
||||
// Stop the service and wait for it to finish
|
||||
status, err := service.Control(svc.Stop)
|
||||
if err != nil {
|
||||
return fmt.Errorf("stopping service failed: %w", err)
|
||||
}
|
||||
for status.State != svc.Stopped {
|
||||
// Wait for the hinted time, but clip it to prevent stalling operation
|
||||
wait := time.Duration(status.WaitHint) * time.Millisecond
|
||||
if wait < 100*time.Millisecond {
|
||||
wait = 100 * time.Millisecond
|
||||
} else if wait > 10*time.Second {
|
||||
wait = 10 * time.Second
|
||||
}
|
||||
time.Sleep(wait)
|
||||
|
||||
status, err = service.Query()
|
||||
if err != nil {
|
||||
return fmt.Errorf("querying service state failed: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func queryService(name string) (string, error) {
|
||||
nameUTF16, err := syscall.UTF16PtrFromString(name)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("conversion of service name %q to UTF16 failed: %w", name, err)
|
||||
}
|
||||
|
||||
// Open the service manager and service with the least privileges required to start the service
|
||||
mgrhandle, err := windows.OpenSCManager(nil, nil, windows.SC_MANAGER_CONNECT|windows.SC_MANAGER_ENUMERATE_SERVICE)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("opening service manager failed: %w", err)
|
||||
}
|
||||
defer windows.CloseServiceHandle(mgrhandle)
|
||||
|
||||
svchandle, err := windows.OpenService(mgrhandle, nameUTF16, windows.SERVICE_QUERY_STATUS)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("opening service failed: %w", err)
|
||||
}
|
||||
service := &mgr.Service{Handle: svchandle, Name: name}
|
||||
defer service.Close()
|
||||
|
||||
// Query the service state and report it to the user
|
||||
status, err := service.Query()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("querying service state failed: %w", err)
|
||||
}
|
||||
|
||||
return stateDescription(status.State), nil
|
||||
}
|
||||
|
||||
func stateDescription(state svc.State) string {
|
||||
switch state {
|
||||
case svc.Stopped:
|
||||
return "stopped"
|
||||
case svc.StartPending:
|
||||
return "start pending"
|
||||
case svc.StopPending:
|
||||
return "stop pending"
|
||||
case svc.Running:
|
||||
return "running"
|
||||
case svc.ContinuePending:
|
||||
return "continue pending"
|
||||
case svc.PausePending:
|
||||
return "pause pending"
|
||||
case svc.Paused:
|
||||
return "paused"
|
||||
}
|
||||
return fmt.Sprintf("unknown %v", state)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -234,7 +234,6 @@ following works:
|
|||
- github.com/josharian/native [MIT License](https://github.com/josharian/native/blob/main/license)
|
||||
- github.com/jpillora/backoff [MIT License](https://github.com/jpillora/backoff/blob/master/LICENSE)
|
||||
- github.com/json-iterator/go [MIT License](https://github.com/json-iterator/go/blob/master/LICENSE)
|
||||
- github.com/kardianos/service [zlib License](https://github.com/kardianos/service/blob/master/LICENSE)
|
||||
- github.com/karrick/godirwalk [BSD 2-Clause "Simplified" License](https://github.com/karrick/godirwalk/blob/master/LICENSE)
|
||||
- github.com/kballard/go-shellquote [MIT License](https://github.com/kballard/go-shellquote/blob/master/LICENSE)
|
||||
- github.com/klauspost/compress [BSD 3-Clause Clear License](https://github.com/klauspost/compress/blob/master/LICENSE)
|
||||
|
|
|
|||
|
|
@ -4,20 +4,25 @@ Telegraf natively supports running as a Windows Service. Outlined below is are
|
|||
the general steps to set it up.
|
||||
|
||||
1. Obtain the telegraf windows distribution
|
||||
2. Create the directory `C:\Program Files\Telegraf` (if you install in a different
|
||||
location simply specify the `--config` parameter with the desired location)
|
||||
3. Place the telegraf.exe and the telegraf.conf config file into `C:\Program Files\Telegraf`
|
||||
4. To install the service into the Windows Service Manager, run the following in PowerShell as an administrator (If necessary, you can wrap any spaces in the file paths in double quotes ""):
|
||||
2. Create the directory `C:\Program Files\Telegraf` or use a custom directory
|
||||
if desired
|
||||
3. Place the telegraf.exe and the telegraf.conf config file into the directory,
|
||||
either `C:\Program Files\Telegraf` or the custom directory of your choice.
|
||||
If you install in a different location simply specify the `--config`
|
||||
parameter with the desired location.
|
||||
4. To install the service into the Windows Service Manager, run the command
|
||||
as administrator. Make sure to wrap parameters containing spaces in double
|
||||
quotes:
|
||||
|
||||
```shell
|
||||
> C:\"Program Files"\Telegraf\telegraf.exe --service install
|
||||
> "C:Program Files\Telegraf\telegraf.exe" service install
|
||||
```
|
||||
|
||||
5. Edit the configuration file to meet your needs
|
||||
6. To check that it works, run:
|
||||
|
||||
```shell
|
||||
> C:\"Program Files"\Telegraf\telegraf.exe --config C:\"Program Files"\Telegraf\telegraf.conf --test
|
||||
> "C:\Program Files\Telegraf\telegraf.exe" --config "C:\Program Files\Telegraf\telegraf.conf" --test
|
||||
```
|
||||
|
||||
7. To start collecting data, run:
|
||||
|
|
@ -26,6 +31,17 @@ the general steps to set it up.
|
|||
> net start telegraf
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```shell
|
||||
> "C:\Program Files\Telegraf\telegraf.exe" service start
|
||||
```
|
||||
|
||||
or use the Windows service manager to start the service
|
||||
|
||||
Please also check the Windows event log or your configured log-file for errors
|
||||
during startup.
|
||||
|
||||
## Config Directory
|
||||
|
||||
You can also specify a `--config-directory` for the service to use:
|
||||
|
|
@ -34,44 +50,80 @@ You can also specify a `--config-directory` for the service to use:
|
|||
2. Include the `--config-directory` option when registering the service:
|
||||
|
||||
```shell
|
||||
> C:\"Program Files"\Telegraf\telegraf.exe --service install --config C:\"Program Files"\Telegraf\telegraf.conf --config-directory C:\"Program Files"\Telegraf\telegraf.d
|
||||
> "C:\Program Files\Telegraf\telegraf.exe" --config C:\"Program Files"\Telegraf\telegraf.conf --config-directory C:\"Program Files"\Telegraf\telegraf.d service install
|
||||
```
|
||||
|
||||
## Other supported operations
|
||||
|
||||
Telegraf can manage its own service through the --service flag:
|
||||
|
||||
| Command | Effect |
|
||||
|------------------------------------|-------------------------------|
|
||||
| `telegraf.exe --service install` | Install telegraf as a service |
|
||||
| `telegraf.exe --service uninstall` | Remove the telegraf service |
|
||||
| `telegraf.exe --service start` | Start the telegraf service |
|
||||
| `telegraf.exe --service stop` | Stop the telegraf service |
|
||||
| Command | Effect |
|
||||
|----------------------------------|------------------------------------------|
|
||||
| `telegraf.exe service install` | Install telegraf as a service |
|
||||
| `telegraf.exe service uninstall` | Remove the telegraf service |
|
||||
| `telegraf.exe service start` | Start the telegraf service |
|
||||
| `telegraf.exe service stop` | Stop the telegraf service |
|
||||
| `telegraf.exe service status` | Query the status of the telegraf service |
|
||||
|
||||
## Install multiple services
|
||||
|
||||
Running multiple instances of Telegraf is seldom needed, as you can run
|
||||
multiple instances of each plugin and route metric flow using the metric
|
||||
filtering options. However, if you do need to run multiple telegraf instances
|
||||
filtering options. However, if you do need to run multiple telegraf instances
|
||||
on a single system, you can install the service with the `--service-name` and
|
||||
`--service-display-name` flags to give the services unique names:
|
||||
`--display-name` flags to give the services unique names:
|
||||
|
||||
```shell
|
||||
> C:\"Program Files"\Telegraf\telegraf.exe --service install --service-name telegraf-1 --service-display-name "Telegraf 1"
|
||||
> C:\"Program Files"\Telegraf\telegraf.exe --service install --service-name telegraf-2 --service-display-name "Telegraf 2"
|
||||
> "C:\Program Files\Telegraf\telegraf.exe" --service-name telegraf-1 service install --display-name "Telegraf 1"
|
||||
> "C:\Program Files\Telegraf\telegraf.exe" --service-name telegraf-2 service install --display-name "Telegraf 2"
|
||||
```
|
||||
|
||||
## Auto restart and restart delay
|
||||
|
||||
By default the service will not automatically restart on failure. Providing the `--service-auto-restart` flag during installation will always restart the service with a default delay of 5 minutes. To modify this to for example 3 minutes, provide the additional flag `--service-restart-delay 3m`. The delay can be any valid `time.Duration` string.
|
||||
By default the service will not automatically restart on failure. Providing the
|
||||
`--auto-restart` flag during installation will always restart the service with
|
||||
a default delay of 5 minutes. To modify this to for example 3 minutes,
|
||||
additionally provide `--restart-delay 3m` flag. The delay can be any valid
|
||||
`time.Duration` string.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
When Telegraf runs as a Windows service, Telegraf logs messages to Windows events log before configuration file with logging settings is loaded.
|
||||
Check event log for an error reported by `telegraf` service in case of Telegraf service reports failure on its start: Event Viewer->Windows Logs->Application
|
||||
When Telegraf runs as a Windows service, Telegraf logs all messages concerning
|
||||
the service startup to the Windows event log. All messages and errors occuring
|
||||
during runtime will be logged to the log-target you configured.
|
||||
Check the event log for errors reported by the `telegraf` service (or the
|
||||
service-name you configured) during service startup:
|
||||
`Event Viewer -> Windows Logs -> Application`
|
||||
|
||||
### common error #1067
|
||||
### Common error #1067
|
||||
|
||||
When installing as service in Windows, always double check to specify full path of the config file, otherwise windows service will fail to start
|
||||
When installing as service in Windows, always double check to specify full path
|
||||
of the config file, otherwise windows service will fail to start. Use
|
||||
|
||||
--config "C:\Program Files\Telegraf\telegraf.conf"
|
||||
```shell
|
||||
> "C:\Program Files\Telegraf\telegraf.exe" --config "C:\MyConfigs\telegraf.conf" service install
|
||||
```
|
||||
|
||||
instead of
|
||||
|
||||
```shell
|
||||
> "C:\Program Files\Telegraf\telegraf.exe" --config "telegraf.conf" service install
|
||||
```
|
||||
|
||||
### Service is killed during shutdown
|
||||
|
||||
When shuting down Windows the Telegraf service tries to cleanly stop when
|
||||
receiving the corresponding notification from the Windows service manager. The
|
||||
exit process involves stopping all inputs, processors and aggregators and
|
||||
finally to flush all remaining metrics to the output(s). In case many metrics
|
||||
are not yet flushed this final step might take some time. However, Windows will
|
||||
kill the service and the corresponding process after a predefined timeout
|
||||
(usually 5 seconds).
|
||||
|
||||
You can change that timeout in the registry under
|
||||
|
||||
````text
|
||||
HKLM\SYSTEM\CurrentControlSet\Control\WaitToKillServiceTimeout
|
||||
```
|
||||
|
||||
**NOTE:** The value is in milliseconds and applies to **all** services!
|
||||
|
|
|
|||
1
go.mod
1
go.mod
|
|
@ -123,7 +123,6 @@ require (
|
|||
github.com/jeremywohl/flatten/v2 v2.0.0-20211013061545-07e4a09fb8e4
|
||||
github.com/jhump/protoreflect v1.16.0
|
||||
github.com/jmespath/go-jmespath v0.4.0
|
||||
github.com/kardianos/service v1.2.2
|
||||
github.com/karrick/godirwalk v1.16.2
|
||||
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51
|
||||
github.com/klauspost/compress v1.17.7
|
||||
|
|
|
|||
3
go.sum
3
go.sum
|
|
@ -1669,8 +1669,6 @@ github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7V
|
|||
github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM=
|
||||
github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
|
||||
github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
|
||||
github.com/kardianos/service v1.2.2 h1:ZvePhAHfvo0A7Mftk/tEzqEZ7Q4lgnR8sGz4xu1YX60=
|
||||
github.com/kardianos/service v1.2.2/go.mod h1:CIMRFEJVL+0DS1a3Nx06NaMn4Dz63Ng6O7dl0qH0zVM=
|
||||
github.com/karrick/godirwalk v1.16.2 h1:eY2INUWoB2ZfpF/kXasyjWJ3Ncuof6qZuNWYZFN3kAI=
|
||||
github.com/karrick/godirwalk v1.16.2/go.mod h1:j4mkqPuvaLI8mp1DroR3P6ad7cyYd4c1qeJ3RV7ULlk=
|
||||
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs=
|
||||
|
|
@ -2679,7 +2677,6 @@ golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7w
|
|||
golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20201009025420-dfb3f7c4e634/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20201015000850-e3ed0017c211/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20201118182958-a01c418693c7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20201201145000-ef89a241ccb3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
|
|
|
|||
Loading…
Reference in New Issue