chore(windows): Rework Windows service handling (#15372)

This commit is contained in:
Sven Rebhan 2024-06-03 17:05:39 -04:00 committed by GitHub
parent 0e636b729a
commit 274fbd4b62
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 660 additions and 149 deletions

View File

@ -0,0 +1,203 @@
//go:build windows
// Command handling for configuration "service" command
package main
import (
"errors"
"fmt"
"io"
"github.com/urfave/cli/v2"
"golang.org/x/sys/windows"
)
func cliFlags() []cli.Flag {
return []cli.Flag{
&cli.StringFlag{
Name: "service",
Usage: "operate on the service (windows only)",
},
&cli.StringFlag{
Name: "service-name",
Value: "telegraf",
Usage: "service name (windows only)",
},
&cli.StringFlag{
Name: "service-display-name",
Value: "Telegraf Data Collector Service",
Usage: "service display name (windows only)",
},
&cli.StringFlag{
Name: "service-restart-delay",
Value: "5m",
},
&cli.BoolFlag{
Name: "service-auto-restart",
Usage: "auto restart service on failure (windows only)",
},
&cli.BoolFlag{
Name: "console",
Usage: "run as console application (windows only)",
},
}
}
func getServiceCommands(outputBuffer io.Writer) []*cli.Command {
return []*cli.Command{
{
Name: "service",
Usage: "commands for operate on the Windows service",
Flags: nil,
Subcommands: []*cli.Command{
{
Name: "install",
Usage: "install Telegraf as a Windows service",
Description: `
The 'install' command with create a Windows service for automatically starting
Telegraf with the specified configuration and service parameters. If no
configuration(s) is specified the service will use the file in
"C:\Program Files\Telegraf\telegraf.conf".
To install Telegraf as a service use
> telegraf service install
In case you are planning to start multiple Telegraf instances as a service,
you must use distrinctive service-names for each instance. To install two
services with different configurations use
> telegraf --config "C:\Program Files\Telegraf\telegraf-machine.conf" --service-name telegraf-machine service install
> telegraf --config "C:\Program Files\Telegraf\telegraf-service.conf" --service-name telegraf-service service install
`,
Flags: []cli.Flag{
&cli.StringFlag{
Name: "display-name",
Value: "Telegraf Data Collector Service",
Usage: "service name as displayed in the service manager",
},
&cli.StringFlag{
Name: "restart-delay",
Value: "5m",
Usage: "duration for delaying the service restart on failure",
},
&cli.BoolFlag{
Name: "auto-restart",
Usage: "enable automatic service restart on failure",
},
},
Action: func(cCtx *cli.Context) error {
cfg := &serviceConfig{
displayName: cCtx.String("display-name"),
restartDelay: cCtx.String("restart-delay"),
autoRestart: cCtx.Bool("auto-restart"),
configs: cCtx.StringSlice("config"),
configDirs: cCtx.StringSlice("config-directory"),
}
name := cCtx.String("service-name")
if err := installService(name, cfg); err != nil {
return err
}
fmt.Fprintf(outputBuffer, "Successfully installed service %q\n", name)
return nil
},
},
{
Name: "uninstall",
Usage: "remove the Telegraf Windows service",
Description: `
The 'uninstall' command removes the Telegraf service with the given name. To
remove a service use
> telegraf service uninstall
In case you specified a custom service-name during install use
> telegraf --service-name telegraf-machine service uninstall
`,
Action: func(cCtx *cli.Context) error {
name := cCtx.String("service-name")
if err := uninstallService(name); err != nil {
return err
}
fmt.Fprintf(outputBuffer, "Successfully uninstalled service %q\n", name)
return nil
},
},
{
Name: "start",
Usage: "start the Telegraf Windows service",
Description: `
The 'start' command triggers the start of the Windows service with the given
name. To start the service either use the Windows service manager or run
> telegraf service start
In case you specified a custom service-name during install use
> telegraf --service-name telegraf-machine service start
`,
Action: func(cCtx *cli.Context) error {
name := cCtx.String("service-name")
if err := startService(name); err != nil {
return err
}
fmt.Fprintf(outputBuffer, "Successfully started service %q\n", name)
return nil
},
},
{
Name: "stop",
Usage: "stop the Telegraf Windows service",
Description: `
The 'stop' command triggers the stop of the Windows service with the given
name and will wait until the service is actually stopped. To stop the service
either use the Windows service manager or run
> telegraf service stop
In case you specified a custom service-name during install use
> telegraf --service-name telegraf-machine service stop
`,
Action: func(cCtx *cli.Context) error {
name := cCtx.String("service-name")
if err := stopService(name); err != nil {
if errors.Is(err, windows.ERROR_SERVICE_NOT_ACTIVE) {
fmt.Fprintf(outputBuffer, "Service %q not started\n", name)
return nil
}
return err
}
fmt.Fprintf(outputBuffer, "Successfully stopped service %q\n", name)
return nil
},
},
{
Name: "status",
Usage: "query the Telegraf Windows service status",
Description: `
The 'status' command queries the current state of the Windows service with the
given name. To query the service either check the Windows service manager or run
> telegraf service status
In case you specified a custom service-name during install use
> telegraf --service-name telegraf-machine service status
`,
Action: func(cCtx *cli.Context) error {
name := cCtx.String("service-name")
status, err := queryService(name)
if err != nil {
return err
}
fmt.Fprintf(outputBuffer, "Service %q is in %q state\n", name, status)
return nil
},
},
},
},
}
}

View File

@ -0,0 +1,17 @@
//go:build !windows
package main
import (
"io"
"github.com/urfave/cli/v2"
)
func cliFlags() []cli.Flag {
return []cli.Flag{}
}
func getServiceCommands(io.Writer) []*cli.Command {
return nil
}

View File

@ -256,6 +256,7 @@ func runApp(args []string, outputBuffer io.Writer, pprof Server, c TelegrafConfi
getSecretStoreCommands(m)...,
)
commands = append(commands, getPluginCommands(outputBuffer)...)
commands = append(commands, getServiceCommands(outputBuffer)...)
app := &cli.App{
Name: "Telegraf",

View File

@ -77,6 +77,8 @@ type Telegraf struct {
configFiles []string
secretstoreFilters []string
cfg *config.Config
GlobalFlags
WindowFlags
}
@ -134,11 +136,6 @@ func (t *Telegraf) GetSecretStore(id string) (telegraf.SecretStore, error) {
func (t *Telegraf) reloadLoop() error {
reloadConfig := false
cfg, err := t.loadConfiguration()
if err != nil {
return err
}
reload := make(chan bool, 1)
reload <- true
for <-reload {
@ -189,7 +186,7 @@ func (t *Telegraf) reloadLoop() error {
}
}()
err := t.runAgent(ctx, cfg, reloadConfig)
err := t.runAgent(ctx, reloadConfig)
if err != nil && !errors.Is(err, context.Canceled) {
return fmt.Errorf("[telegraf] Error running agent: %w", err)
}
@ -314,7 +311,8 @@ func (t *Telegraf) loadConfiguration() (*config.Config, error) {
return c, nil
}
func (t *Telegraf) runAgent(ctx context.Context, c *config.Config, reloadConfig bool) error {
func (t *Telegraf) runAgent(ctx context.Context, reloadConfig bool) error {
c := t.cfg
var err error
if reloadConfig {
if c, err = t.loadConfiguration(); err != nil {

View File

@ -5,17 +5,18 @@ package main
import (
"log"
"syscall"
"github.com/urfave/cli/v2"
)
func (t *Telegraf) Run() error {
stop = make(chan struct{})
return t.reloadLoop()
}
defer close(stop)
func cliFlags() []cli.Flag {
return []cli.Flag{}
cfg, err := t.loadConfiguration()
if err != nil {
return err
}
t.cfg = cfg
return t.reloadLoop()
}
func getLockedMemoryLimit() uint64 {

View File

@ -6,46 +6,20 @@ package main
import (
"fmt"
"log"
"os"
"path/filepath"
"syscall"
"time"
"github.com/kardianos/service"
"github.com/urfave/cli/v2"
"golang.org/x/sys/windows"
"golang.org/x/sys/windows/svc"
"golang.org/x/sys/windows/svc/eventlog"
"golang.org/x/sys/windows/svc/mgr"
"github.com/influxdata/telegraf/logger"
)
func cliFlags() []cli.Flag {
return []cli.Flag{
&cli.StringFlag{
Name: "service",
Usage: "operate on the service (windows only)",
},
&cli.StringFlag{
Name: "service-name",
Value: "telegraf",
Usage: "service name (windows only)",
},
&cli.StringFlag{
Name: "service-display-name",
Value: "Telegraf Data Collector Service",
Usage: "service display name (windows only)",
},
&cli.StringFlag{
Name: "service-restart-delay",
Value: "5m",
},
&cli.BoolFlag{
Name: "service-auto-restart",
Usage: "auto restart service on failure (windows only)",
},
&cli.BoolFlag{
Name: "console",
Usage: "run as console application (windows only)",
},
}
}
func getLockedMemoryLimit() uint64 {
handle := windows.CurrentProcess()
@ -58,114 +32,384 @@ func getLockedMemoryLimit() uint64 {
func (t *Telegraf) Run() error {
// Register the eventlog logging target for windows.
err := logger.RegisterEventLogger(t.serviceName)
if err != nil {
if err := logger.RegisterEventLogger(t.serviceName); err != nil {
return err
}
if !t.windowsRunAsService() {
stop = make(chan struct{})
return t.reloadLoop()
// Process the service commands
if t.service != "" {
fmt.Println("The use of --service is deprecated, please use the 'service' command instead!")
switch t.service {
case "install":
cfg := &serviceConfig{
displayName: t.serviceDisplayName,
restartDelay: t.serviceRestartDelay,
autoRestart: t.serviceAutoRestart,
configs: t.config,
configDirs: t.configDir,
watchConfig: t.watchConfig,
}
if err := installService(t.serviceName, cfg); err != nil {
return err
}
fmt.Printf("Successfully installed service %q\n", t.serviceName)
case "uninstall":
if err := uninstallService(t.serviceName); err != nil {
return err
}
fmt.Printf("Successfully uninstalled service %q\n", t.serviceName)
case "start":
if err := startService(t.serviceName); err != nil {
return err
}
fmt.Printf("Successfully started service %q\n", t.serviceName)
case "stop":
if err := stopService(t.serviceName); err != nil {
return err
}
fmt.Printf("Successfully stopped service %q\n", t.serviceName)
case "status":
status, err := queryService(t.serviceName)
if err != nil {
return err
}
fmt.Printf("Service %q is in %q state\n", t.serviceName, status)
default:
return fmt.Errorf("invalid service command %q", t.service)
}
return nil
}
return t.runAsWindowsService()
// Determine if Telegraf is started as a Windows service.
isWinService, err := svc.IsWindowsService()
if err != nil {
return fmt.Errorf("cannot determine if run as Windows service: %w", err)
}
if !t.console && isWinService {
return svc.Run(t.serviceName, t)
}
// Load the configuration file(s)
cfg, err := t.loadConfiguration()
if err != nil {
return err
}
t.cfg = cfg
stop = make(chan struct{})
defer close(stop)
return t.reloadLoop()
}
type program struct {
*Telegraf
}
func (p *program) Start(_ service.Service) error {
go func() {
stop = make(chan struct{})
err := p.reloadLoop()
if err != nil {
fmt.Printf("E! %v\n", err)
}
close(stop)
// Handler for the Windows service framework
func (t *Telegraf) Execute(_ []string, r <-chan svc.ChangeRequest, changes chan<- svc.Status) (bool, uint32) {
// Mark the status as startup pending until we are fully started
const accepted = svc.AcceptStop | svc.AcceptShutdown
changes <- svc.Status{State: svc.StartPending}
defer func() {
changes <- svc.Status{State: svc.Stopped}
}()
return nil
// Create a eventlog logger for all service related things
svclog, err := eventlog.Open(t.serviceName)
if err != nil {
log.Printf("E! Initializing the service logger failed: %s", err)
return true, 1
}
defer svclog.Close()
// Load the configuration file(s)
cfg, err := t.loadConfiguration()
if err != nil {
if lerr := svclog.Error(100, err.Error()); lerr != nil {
log.Printf("E! Logging error %q failed: %s", err, lerr)
}
return true, 2
}
t.cfg = cfg
// Actually start the processing loop in the background to be able to
// react to service change requests
loopErr := make(chan error)
stop = make(chan struct{})
defer close(loopErr)
defer close(stop)
go func() {
loopErr <- t.reloadLoop()
}()
changes <- svc.Status{State: svc.Running, Accepts: accepted}
for {
select {
case err := <-loopErr:
if err != nil {
if lerr := svclog.Error(100, err.Error()); lerr != nil {
log.Printf("E! Logging error %q failed: %s", err, lerr)
}
return true, 3
}
return false, 0
case c := <-r:
switch c.Cmd {
case svc.Interrogate:
changes <- c.CurrentStatus
// Testing deadlock from https://code.google.com/p/winsvc/issues/detail?id=4
time.Sleep(100 * time.Millisecond)
changes <- c.CurrentStatus
case svc.Stop, svc.Shutdown:
changes <- svc.Status{State: svc.StopPending}
var empty struct{}
stop <- empty // signal reloadLoop to finish (context cancel)
default:
msg := fmt.Sprintf("Unexpected control request #%d", c)
if lerr := svclog.Error(100, msg); lerr != nil {
log.Printf("E! Logging error %q failed: %s", msg, lerr)
}
}
}
}
}
func (p *program) Stop(_ service.Service) error {
var empty struct{}
stop <- empty // signal reloadLoop to finish (context cancel)
<-stop // wait for reloadLoop to finish and close channel
return nil
type serviceConfig struct {
displayName string
restartDelay string
autoRestart bool
// Telegraf parameters
configs []string
configDirs []string
watchConfig string
}
func (t *Telegraf) runAsWindowsService() error {
func installService(name string, cfg *serviceConfig) error {
// Determine the executable to use in the service
executable, err := os.Executable()
if err != nil {
return fmt.Errorf("determining executable failed: %w", err)
}
// Determine the program files directory name
programFiles := os.Getenv("ProgramFiles")
if programFiles == "" { // Should never happen
programFiles = "C:\\Program Files"
}
svcConfig := &service.Config{
Name: t.serviceName,
DisplayName: t.serviceDisplayName,
Description: "Collects data using a series of plugins and publishes it to " +
"another series of plugins.",
Arguments: []string{"--config", programFiles + "\\Telegraf\\telegraf.conf"},
// Collect the command line arguments
args := make([]string, 0, 2*(len(cfg.configs)+len(cfg.configDirs))+2)
for _, fn := range cfg.configs {
args = append(args, "--config", fn)
}
for _, dn := range cfg.configDirs {
args = append(args, "--config-directory", dn)
}
if len(args) == 0 {
args = append(args, "--config", filepath.Join(programFiles, "Telegraf", "telegraf.conf"))
}
if cfg.watchConfig != "" {
args = append(args, "--watch-config", cfg.watchConfig)
}
// Pass the service name to the command line, to have a custom name when relaunching as a service
args = append(args, "--service-name", name)
// Create a configuration for the service
svccfg := mgr.Config{
DisplayName: cfg.displayName,
Description: "Collects, processes and publishes data using a series of plugins.",
StartType: mgr.StartAutomatic,
ServiceType: windows.SERVICE_WIN32_OWN_PROCESS,
}
prg := &program{
Telegraf: t,
}
s, err := service.New(prg, svcConfig)
// Connect to the service manager and try to install the service if it
// doesn't exist. Fail on existing service and stop installation.
svcmgr, err := mgr.Connect()
if err != nil {
return err
return fmt.Errorf("connecting to service manager failed: %w", err)
}
// Handle the --service flag here to prevent any issues with tooling that
// may not have an interactive session, e.g. installing from Ansible.
if t.service != "" {
if len(t.config) > 0 {
svcConfig.Arguments = []string{}
}
for _, fConfig := range t.config {
svcConfig.Arguments = append(svcConfig.Arguments, "--config", fConfig)
}
defer svcmgr.Disconnect()
for _, fConfigDirectory := range t.configDir {
svcConfig.Arguments = append(svcConfig.Arguments, "--config-directory", fConfigDirectory)
}
if service, err := svcmgr.OpenService(name); err == nil {
service.Close()
return fmt.Errorf("service %q is already installed", name)
}
if t.watchConfig != "" {
svcConfig.Arguments = append(svcConfig.Arguments, "--watch-config", t.watchConfig)
}
service, err := svcmgr.CreateService(name, executable, svccfg, args...)
if err != nil {
return fmt.Errorf("creating service failed: %w", err)
}
defer service.Close()
//set servicename to service cmd line, to have a custom name after relaunch as a service
svcConfig.Arguments = append(svcConfig.Arguments, "--service-name", t.serviceName)
if t.serviceAutoRestart {
svcConfig.Option = service.KeyValue{"OnFailure": "restart", "OnFailureDelayDuration": t.serviceRestartDelay}
}
err := service.Control(s, t.service)
// Set the recovery strategy to restart with a fixed period of 10 seconds
// and the user specified delay if requested
if cfg.autoRestart {
delay, err := time.ParseDuration(cfg.restartDelay)
if err != nil {
return err
return fmt.Errorf("cannot parse restart delay %q: %w", cfg.restartDelay, err)
}
} else {
err = logger.SetupLogging(logger.Config{LogTarget: "eventlog"})
if err != nil {
return err
}
err = s.Run()
if err != nil {
recovery := []mgr.RecoveryAction{{Type: mgr.ServiceRestart, Delay: delay}}
if err := service.SetRecoveryActions(recovery, 10); err != nil {
return err
}
}
// Register the event as a source of eventlog events
events := uint32(eventlog.Error | eventlog.Warning | eventlog.Info)
if err := eventlog.InstallAsEventCreate(name, events); err != nil {
//nolint:errcheck // Try to remove the service on best effort basis as we cannot handle any error here
service.Delete()
return fmt.Errorf("setting up eventlog source failed: %w", err)
}
return nil
}
// Return true if Telegraf should create a Windows service.
func (t *Telegraf) windowsRunAsService() bool {
if t.service != "" {
return true
func uninstallService(name string) error {
// Connect to the service manager and try to open the service. In case the
// service is not installed, return with the corresponding error.
svcmgr, err := mgr.Connect()
if err != nil {
return fmt.Errorf("connecting to service manager failed: %w", err)
}
defer svcmgr.Disconnect()
service, err := svcmgr.OpenService(name)
if err != nil {
return fmt.Errorf("opening service failed: %w", err)
}
defer service.Close()
// Uninstall the service and remove the eventlog source
if err := service.Delete(); err != nil {
return fmt.Errorf("uninstalling service failed: %w", err)
}
if t.console {
return false
if err := eventlog.Remove(name); err != nil {
return fmt.Errorf("removing eventlog source failed: %w", err)
}
return !service.Interactive()
return nil
}
func startService(name string) error {
nameUTF16, err := syscall.UTF16PtrFromString(name)
if err != nil {
return fmt.Errorf("conversion of service name %q to UTF16 failed: %w", name, err)
}
// Open the service manager and service with the least privileges required to start the service
mgrhandle, err := windows.OpenSCManager(nil, nil, windows.SC_MANAGER_CONNECT|windows.SC_MANAGER_ENUMERATE_SERVICE)
if err != nil {
return fmt.Errorf("opening service manager failed: %w", err)
}
defer windows.CloseServiceHandle(mgrhandle)
svchandle, err := windows.OpenService(mgrhandle, nameUTF16, windows.SERVICE_QUERY_STATUS|windows.SERVICE_START)
if err != nil {
return fmt.Errorf("opening service failed: %w", err)
}
service := &mgr.Service{Handle: svchandle, Name: name}
defer service.Close()
// Check if the service is actually stopped
status, err := service.Query()
if err != nil {
return fmt.Errorf("querying service state failed: %w", err)
}
if status.State != svc.Stopped {
return fmt.Errorf("service is not stopped but in state %q", stateDescription(status.State))
}
return service.Start()
}
func stopService(name string) error {
nameUTF16, err := syscall.UTF16PtrFromString(name)
if err != nil {
return fmt.Errorf("conversion of service name %q to UTF16 failed: %w", name, err)
}
// Open the service manager and service with the least privileges required to start the service
mgrhandle, err := windows.OpenSCManager(nil, nil, windows.SC_MANAGER_CONNECT|windows.SC_MANAGER_ENUMERATE_SERVICE)
if err != nil {
return fmt.Errorf("opening service manager failed: %w", err)
}
defer windows.CloseServiceHandle(mgrhandle)
svchandle, err := windows.OpenService(mgrhandle, nameUTF16, windows.SERVICE_QUERY_STATUS|windows.SERVICE_STOP)
if err != nil {
return fmt.Errorf("opening service failed: %w", err)
}
service := &mgr.Service{Handle: svchandle, Name: name}
defer service.Close()
// Stop the service and wait for it to finish
status, err := service.Control(svc.Stop)
if err != nil {
return fmt.Errorf("stopping service failed: %w", err)
}
for status.State != svc.Stopped {
// Wait for the hinted time, but clip it to prevent stalling operation
wait := time.Duration(status.WaitHint) * time.Millisecond
if wait < 100*time.Millisecond {
wait = 100 * time.Millisecond
} else if wait > 10*time.Second {
wait = 10 * time.Second
}
time.Sleep(wait)
status, err = service.Query()
if err != nil {
return fmt.Errorf("querying service state failed: %w", err)
}
}
return nil
}
func queryService(name string) (string, error) {
nameUTF16, err := syscall.UTF16PtrFromString(name)
if err != nil {
return "", fmt.Errorf("conversion of service name %q to UTF16 failed: %w", name, err)
}
// Open the service manager and service with the least privileges required to start the service
mgrhandle, err := windows.OpenSCManager(nil, nil, windows.SC_MANAGER_CONNECT|windows.SC_MANAGER_ENUMERATE_SERVICE)
if err != nil {
return "", fmt.Errorf("opening service manager failed: %w", err)
}
defer windows.CloseServiceHandle(mgrhandle)
svchandle, err := windows.OpenService(mgrhandle, nameUTF16, windows.SERVICE_QUERY_STATUS)
if err != nil {
return "", fmt.Errorf("opening service failed: %w", err)
}
service := &mgr.Service{Handle: svchandle, Name: name}
defer service.Close()
// Query the service state and report it to the user
status, err := service.Query()
if err != nil {
return "", fmt.Errorf("querying service state failed: %w", err)
}
return stateDescription(status.State), nil
}
func stateDescription(state svc.State) string {
switch state {
case svc.Stopped:
return "stopped"
case svc.StartPending:
return "start pending"
case svc.StopPending:
return "stop pending"
case svc.Running:
return "running"
case svc.ContinuePending:
return "continue pending"
case svc.PausePending:
return "pause pending"
case svc.Paused:
return "paused"
}
return fmt.Sprintf("unknown %v", state)
}

View File

@ -234,7 +234,6 @@ following works:
- github.com/josharian/native [MIT License](https://github.com/josharian/native/blob/main/license)
- github.com/jpillora/backoff [MIT License](https://github.com/jpillora/backoff/blob/master/LICENSE)
- github.com/json-iterator/go [MIT License](https://github.com/json-iterator/go/blob/master/LICENSE)
- github.com/kardianos/service [zlib License](https://github.com/kardianos/service/blob/master/LICENSE)
- github.com/karrick/godirwalk [BSD 2-Clause "Simplified" License](https://github.com/karrick/godirwalk/blob/master/LICENSE)
- github.com/kballard/go-shellquote [MIT License](https://github.com/kballard/go-shellquote/blob/master/LICENSE)
- github.com/klauspost/compress [BSD 3-Clause Clear License](https://github.com/klauspost/compress/blob/master/LICENSE)

View File

@ -4,20 +4,25 @@ Telegraf natively supports running as a Windows Service. Outlined below is are
the general steps to set it up.
1. Obtain the telegraf windows distribution
2. Create the directory `C:\Program Files\Telegraf` (if you install in a different
location simply specify the `--config` parameter with the desired location)
3. Place the telegraf.exe and the telegraf.conf config file into `C:\Program Files\Telegraf`
4. To install the service into the Windows Service Manager, run the following in PowerShell as an administrator (If necessary, you can wrap any spaces in the file paths in double quotes ""):
2. Create the directory `C:\Program Files\Telegraf` or use a custom directory
if desired
3. Place the telegraf.exe and the telegraf.conf config file into the directory,
either `C:\Program Files\Telegraf` or the custom directory of your choice.
If you install in a different location simply specify the `--config`
parameter with the desired location.
4. To install the service into the Windows Service Manager, run the command
as administrator. Make sure to wrap parameters containing spaces in double
quotes:
```shell
> C:\"Program Files"\Telegraf\telegraf.exe --service install
> "C:Program Files\Telegraf\telegraf.exe" service install
```
5. Edit the configuration file to meet your needs
6. To check that it works, run:
```shell
> C:\"Program Files"\Telegraf\telegraf.exe --config C:\"Program Files"\Telegraf\telegraf.conf --test
> "C:\Program Files\Telegraf\telegraf.exe" --config "C:\Program Files\Telegraf\telegraf.conf" --test
```
7. To start collecting data, run:
@ -26,6 +31,17 @@ the general steps to set it up.
> net start telegraf
```
or
```shell
> "C:\Program Files\Telegraf\telegraf.exe" service start
```
or use the Windows service manager to start the service
Please also check the Windows event log or your configured log-file for errors
during startup.
## Config Directory
You can also specify a `--config-directory` for the service to use:
@ -34,44 +50,80 @@ You can also specify a `--config-directory` for the service to use:
2. Include the `--config-directory` option when registering the service:
```shell
> C:\"Program Files"\Telegraf\telegraf.exe --service install --config C:\"Program Files"\Telegraf\telegraf.conf --config-directory C:\"Program Files"\Telegraf\telegraf.d
> "C:\Program Files\Telegraf\telegraf.exe" --config C:\"Program Files"\Telegraf\telegraf.conf --config-directory C:\"Program Files"\Telegraf\telegraf.d service install
```
## Other supported operations
Telegraf can manage its own service through the --service flag:
| Command | Effect |
|------------------------------------|-------------------------------|
| `telegraf.exe --service install` | Install telegraf as a service |
| `telegraf.exe --service uninstall` | Remove the telegraf service |
| `telegraf.exe --service start` | Start the telegraf service |
| `telegraf.exe --service stop` | Stop the telegraf service |
| Command | Effect |
|----------------------------------|------------------------------------------|
| `telegraf.exe service install` | Install telegraf as a service |
| `telegraf.exe service uninstall` | Remove the telegraf service |
| `telegraf.exe service start` | Start the telegraf service |
| `telegraf.exe service stop` | Stop the telegraf service |
| `telegraf.exe service status` | Query the status of the telegraf service |
## Install multiple services
Running multiple instances of Telegraf is seldom needed, as you can run
multiple instances of each plugin and route metric flow using the metric
filtering options. However, if you do need to run multiple telegraf instances
filtering options. However, if you do need to run multiple telegraf instances
on a single system, you can install the service with the `--service-name` and
`--service-display-name` flags to give the services unique names:
`--display-name` flags to give the services unique names:
```shell
> C:\"Program Files"\Telegraf\telegraf.exe --service install --service-name telegraf-1 --service-display-name "Telegraf 1"
> C:\"Program Files"\Telegraf\telegraf.exe --service install --service-name telegraf-2 --service-display-name "Telegraf 2"
> "C:\Program Files\Telegraf\telegraf.exe" --service-name telegraf-1 service install --display-name "Telegraf 1"
> "C:\Program Files\Telegraf\telegraf.exe" --service-name telegraf-2 service install --display-name "Telegraf 2"
```
## Auto restart and restart delay
By default the service will not automatically restart on failure. Providing the `--service-auto-restart` flag during installation will always restart the service with a default delay of 5 minutes. To modify this to for example 3 minutes, provide the additional flag `--service-restart-delay 3m`. The delay can be any valid `time.Duration` string.
By default the service will not automatically restart on failure. Providing the
`--auto-restart` flag during installation will always restart the service with
a default delay of 5 minutes. To modify this to for example 3 minutes,
additionally provide `--restart-delay 3m` flag. The delay can be any valid
`time.Duration` string.
## Troubleshooting
When Telegraf runs as a Windows service, Telegraf logs messages to Windows events log before configuration file with logging settings is loaded.
Check event log for an error reported by `telegraf` service in case of Telegraf service reports failure on its start: Event Viewer->Windows Logs->Application
When Telegraf runs as a Windows service, Telegraf logs all messages concerning
the service startup to the Windows event log. All messages and errors occuring
during runtime will be logged to the log-target you configured.
Check the event log for errors reported by the `telegraf` service (or the
service-name you configured) during service startup:
`Event Viewer -> Windows Logs -> Application`
### common error #1067
### Common error #1067
When installing as service in Windows, always double check to specify full path of the config file, otherwise windows service will fail to start
When installing as service in Windows, always double check to specify full path
of the config file, otherwise windows service will fail to start. Use
--config "C:\Program Files\Telegraf\telegraf.conf"
```shell
> "C:\Program Files\Telegraf\telegraf.exe" --config "C:\MyConfigs\telegraf.conf" service install
```
instead of
```shell
> "C:\Program Files\Telegraf\telegraf.exe" --config "telegraf.conf" service install
```
### Service is killed during shutdown
When shuting down Windows the Telegraf service tries to cleanly stop when
receiving the corresponding notification from the Windows service manager. The
exit process involves stopping all inputs, processors and aggregators and
finally to flush all remaining metrics to the output(s). In case many metrics
are not yet flushed this final step might take some time. However, Windows will
kill the service and the corresponding process after a predefined timeout
(usually 5 seconds).
You can change that timeout in the registry under
````text
HKLM\SYSTEM\CurrentControlSet\Control\WaitToKillServiceTimeout
```
**NOTE:** The value is in milliseconds and applies to **all** services!

1
go.mod
View File

@ -123,7 +123,6 @@ require (
github.com/jeremywohl/flatten/v2 v2.0.0-20211013061545-07e4a09fb8e4
github.com/jhump/protoreflect v1.16.0
github.com/jmespath/go-jmespath v0.4.0
github.com/kardianos/service v1.2.2
github.com/karrick/godirwalk v1.16.2
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51
github.com/klauspost/compress v1.17.7

3
go.sum
View File

@ -1669,8 +1669,6 @@ github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7V
github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM=
github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
github.com/kardianos/service v1.2.2 h1:ZvePhAHfvo0A7Mftk/tEzqEZ7Q4lgnR8sGz4xu1YX60=
github.com/kardianos/service v1.2.2/go.mod h1:CIMRFEJVL+0DS1a3Nx06NaMn4Dz63Ng6O7dl0qH0zVM=
github.com/karrick/godirwalk v1.16.2 h1:eY2INUWoB2ZfpF/kXasyjWJ3Ncuof6qZuNWYZFN3kAI=
github.com/karrick/godirwalk v1.16.2/go.mod h1:j4mkqPuvaLI8mp1DroR3P6ad7cyYd4c1qeJ3RV7ULlk=
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs=
@ -2679,7 +2677,6 @@ golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201009025420-dfb3f7c4e634/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201015000850-e3ed0017c211/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201118182958-a01c418693c7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201201145000-ef89a241ccb3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=