reverse dns lookup processor (#7639)

This commit is contained in:
Steven Soroka 2020-06-26 18:40:05 -04:00 committed by GitHub
parent d75ca67e47
commit 9190f2e659
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 1016 additions and 1 deletions

2
go.mod
View File

@ -131,7 +131,7 @@ require (
golang.org/x/lint v0.0.0-20200302205851-738671d3881b // indirect
golang.org/x/net v0.0.0-20200301022130-244492dfa37a
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d
golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a // indirect
golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a
golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4
golang.org/x/tools v0.0.0-20200317043434-63da46f3035e // indirect
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20200205215550-e35592f146e4

View File

@ -16,6 +16,7 @@ import (
_ "github.com/influxdata/telegraf/plugins/processors/printer"
_ "github.com/influxdata/telegraf/plugins/processors/regex"
_ "github.com/influxdata/telegraf/plugins/processors/rename"
_ "github.com/influxdata/telegraf/plugins/processors/reverse_dns"
_ "github.com/influxdata/telegraf/plugins/processors/s2geo"
_ "github.com/influxdata/telegraf/plugins/processors/starlark"
_ "github.com/influxdata/telegraf/plugins/processors/strings"

View File

@ -0,0 +1,72 @@
# Reverse DNS Processor Plugin
The `reverse_dns` processor does a reverse-dns lookup on tags (or fields) with
IPs in them.
### Configuration:
```toml
[[processors.reverse_dns]]
## For optimal performance, you may want to limit which metrics are passed to this
## processor. eg:
## namepass = ["my_metric_*"]
## cache_ttl is how long the dns entries should stay cached for.
## generally longer is better, but if you expect a large number of diverse lookups
## you'll want to consider memory use.
cache_ttl = "24h"
## lookup_timeout is how long should you wait for a single dns request to repsond.
## this is also the maximum acceptable latency for a metric travelling through
## the reverse_dns processor. After lookup_timeout is exceeded, a metric will
## be passed on unaltered.
## multiple simultaneous resolution requests for the same IP will only make a
## single rDNS request, and they will all wait for the answer for this long.
lookup_timeout = "3s"
## max_parallel_lookups is the maximum number of dns requests to be in flight
## at the same time. Requesting hitting cached values do not count against this
## total, and neither do mulptiple requests for the same IP.
## It's probably best to keep this number fairly low.
max_parallel_lookups = 10
## ordered controls whether or not the metrics need to stay in the same order
## this plugin received them in. If false, this plugin will change the order
## with requests hitting cached results moving through immediately and not
## waiting on slower lookups. This may cause issues for you if you are
## depending on the order of metrics staying the same. If so, set this to true.
## keeping the metrics ordered may be slightly slower.
ordered = false
[[processors.reverse_dns.lookup]]
## get the ip from the field "source_ip", and put the result in the field "source_name"
field = "source_ip"
dest = "source_name"
[[processors.reverse_dns.lookup]]
## get the ip from the tag "destination_ip", and put the result in the tag
## "destination_name".
tag = "destination_ip"
dest = "destination_name"
## If you would prefer destination_name to be a field instead, you can use a
## processors.converter after this one, specifying the order attribute.
```
### Example processing:
example config:
```toml
[[processors.reverse_dns]]
[[processors.reverse_dns.lookup]]
tag = "ip"
dest = "domain"
```
```diff
- ping,ip=8.8.8.8 elapsed=300i 1502489900000000000
+ ping,ip=8.8.8.8,domain=dns.google. elapsed=300i 1502489900000000000
```

View File

@ -0,0 +1,89 @@
package parallel
import (
"sync"
"github.com/influxdata/telegraf"
)
type Ordered struct {
wg sync.WaitGroup
fn func(telegraf.Metric) []telegraf.Metric
// queue of jobs coming in. Workers pick jobs off this queue for processing
workerQueue chan job
// queue of ordered metrics going out
queue chan futureMetric
}
func NewOrdered(
acc telegraf.Accumulator,
fn func(telegraf.Metric) []telegraf.Metric,
orderedQueueSize int,
workerCount int,
) *Ordered {
p := &Ordered{
fn: fn,
workerQueue: make(chan job, workerCount),
queue: make(chan futureMetric, orderedQueueSize),
}
p.startWorkers(workerCount)
p.wg.Add(1)
go func() {
p.readQueue(acc)
p.wg.Done()
}()
return p
}
func (p *Ordered) Enqueue(metric telegraf.Metric) {
future := make(futureMetric)
p.queue <- future
// write the future to the worker pool. Order doesn't matter now because the
// outgoing p.queue will enforce order regardless of the order the jobs are
// completed in
p.workerQueue <- job{
future: future,
metric: metric,
}
}
func (p *Ordered) readQueue(acc telegraf.Accumulator) {
// wait for the response from each worker in order
for mCh := range p.queue {
// allow each worker to write out multiple metrics
for metrics := range mCh {
for _, m := range metrics {
acc.AddMetric(m)
}
}
}
}
func (p *Ordered) startWorkers(count int) {
p.wg.Add(count)
for i := 0; i < count; i++ {
go func() {
for job := range p.workerQueue {
job.future <- p.fn(job.metric)
close(job.future)
}
p.wg.Done()
}()
}
}
func (p *Ordered) Stop() {
close(p.queue)
close(p.workerQueue)
p.wg.Wait()
}
type futureMetric chan []telegraf.Metric
type job struct {
future futureMetric
metric telegraf.Metric
}

View File

@ -0,0 +1,8 @@
package parallel
import "github.com/influxdata/telegraf"
type Parallel interface {
Enqueue(telegraf.Metric)
Stop()
}

View File

@ -0,0 +1,119 @@
package parallel_test
import (
"fmt"
"testing"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/metric"
"github.com/influxdata/telegraf/plugins/processors/reverse_dns/parallel"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/require"
)
func TestOrderedJobsStayOrdered(t *testing.T) {
acc := &testutil.Accumulator{}
p := parallel.NewOrdered(acc, jobFunc, 10000, 10)
now := time.Now()
for i := 0; i < 20000; i++ {
m, err := metric.New("test",
map[string]string{},
map[string]interface{}{
"val": i,
},
now,
)
require.NoError(t, err)
now = now.Add(1)
p.Enqueue(m)
}
p.Stop()
i := 0
require.Len(t, acc.Metrics, 20000, fmt.Sprintf("expected 20k metrics but got %d", len(acc.GetTelegrafMetrics())))
for _, m := range acc.GetTelegrafMetrics() {
v, ok := m.GetField("val")
require.True(t, ok)
require.EqualValues(t, i, v)
i++
}
}
func TestUnorderedJobsDontDropAnyJobs(t *testing.T) {
acc := &testutil.Accumulator{}
p := parallel.NewUnordered(acc, jobFunc, 10)
now := time.Now()
expectedTotal := 0
for i := 0; i < 20000; i++ {
expectedTotal += i
m, err := metric.New("test",
map[string]string{},
map[string]interface{}{
"val": i,
},
now,
)
require.NoError(t, err)
now = now.Add(1)
p.Enqueue(m)
}
p.Stop()
actualTotal := int64(0)
require.Len(t, acc.Metrics, 20000, fmt.Sprintf("expected 20k metrics but got %d", len(acc.GetTelegrafMetrics())))
for _, m := range acc.GetTelegrafMetrics() {
v, ok := m.GetField("val")
require.True(t, ok)
actualTotal += v.(int64)
}
require.EqualValues(t, expectedTotal, actualTotal)
}
func BenchmarkOrdered(b *testing.B) {
acc := &testutil.Accumulator{}
p := parallel.NewOrdered(acc, jobFunc, 10000, 10)
m, _ := metric.New("test",
map[string]string{},
map[string]interface{}{
"val": 1,
},
time.Now(),
)
b.ResetTimer()
for i := 0; i < b.N; i++ {
p.Enqueue(m)
}
p.Stop()
}
func BenchmarkUnordered(b *testing.B) {
acc := &testutil.Accumulator{}
p := parallel.NewUnordered(acc, jobFunc, 10)
m, _ := metric.New("test",
map[string]string{},
map[string]interface{}{
"val": 1,
},
time.Now(),
)
b.ResetTimer()
for i := 0; i < b.N; i++ {
p.Enqueue(m)
}
p.Stop()
}
func jobFunc(m telegraf.Metric) []telegraf.Metric {
return []telegraf.Metric{m}
}

View File

@ -0,0 +1,60 @@
package parallel
import (
"sync"
"github.com/influxdata/telegraf"
)
type Unordered struct {
wg sync.WaitGroup
acc telegraf.Accumulator
fn func(telegraf.Metric) []telegraf.Metric
inQueue chan telegraf.Metric
}
func NewUnordered(
acc telegraf.Accumulator,
fn func(telegraf.Metric) []telegraf.Metric,
workerCount int,
) *Unordered {
p := &Unordered{
acc: acc,
inQueue: make(chan telegraf.Metric, workerCount),
fn: fn,
}
// start workers
p.wg.Add(1)
go func() {
p.startWorkers(workerCount)
p.wg.Done()
}()
return p
}
func (p *Unordered) startWorkers(count int) {
wg := sync.WaitGroup{}
wg.Add(count)
for i := 0; i < count; i++ {
go func() {
for metric := range p.inQueue {
for _, m := range p.fn(metric) {
p.acc.AddMetric(m)
}
}
wg.Done()
}()
}
wg.Wait()
}
func (p *Unordered) Stop() {
close(p.inQueue)
p.wg.Wait()
}
func (p *Unordered) Enqueue(m telegraf.Metric) {
p.inQueue <- m
}

View File

@ -0,0 +1,319 @@
package reverse_dns
import (
"context"
"errors"
"net"
"sync"
"sync/atomic"
"time"
"golang.org/x/sync/semaphore"
)
const defaultMaxWorkers = 10
var (
ErrTimeout = errors.New("request timed out")
)
// AnyResolver is for the net.Resolver
type AnyResolver interface {
LookupAddr(ctx context.Context, addr string) (names []string, err error)
}
// ReverseDNSCache is safe to use across multiple goroutines.
// if multiple goroutines request the same IP at the same time, one of the
// requests will trigger the lookup and the rest will wait for its response.
type ReverseDNSCache struct {
Resolver AnyResolver
stats RDNSCacheStats
// settings
ttl time.Duration
lookupTimeout time.Duration
maxWorkers int
// internal
rwLock sync.RWMutex
sem *semaphore.Weighted
cancelCleanupWorker context.CancelFunc
cache map[string]*dnslookup
// keep an ordered list of what needs to be worked on and what is due to expire.
// We can use this list for both with a job position marker, and by popping items
// off the list as they expire. This avoids iterating over the whole map to find
// things to do.
// As a bonus, we only have to read the first item to know if anything in the
// map has expired.
// must lock to get access to this.
expireList []*dnslookup
expireListLock sync.Mutex
}
type RDNSCacheStats struct {
CacheHit uint64
CacheMiss uint64
CacheExpire uint64
RequestsAbandoned uint64
RequestsFilled uint64
}
func NewReverseDNSCache(ttl, lookupTimeout time.Duration, workerPoolSize int) *ReverseDNSCache {
if workerPoolSize <= 0 {
workerPoolSize = defaultMaxWorkers
}
ctx, cancel := context.WithCancel(context.Background())
d := &ReverseDNSCache{
ttl: ttl,
lookupTimeout: lookupTimeout,
cache: map[string]*dnslookup{},
expireList: []*dnslookup{},
maxWorkers: workerPoolSize,
sem: semaphore.NewWeighted(int64(workerPoolSize)),
cancelCleanupWorker: cancel,
Resolver: net.DefaultResolver,
}
d.startCleanupWorker(ctx)
return d
}
// dnslookup represents a lookup request/response. It may or may not be answered yet.
// interested parties register themselves with existing requests or create new ones
// to get their dns query answered. Answers will be pushed out to callbacks.
type dnslookup struct {
ip string // keep a copy for the expireList.
domains []string
expiresAt time.Time
completed bool
callbacks []callbackChannelType
}
type lookupResult struct {
domains []string
err error
}
type callbackChannelType chan lookupResult
// Lookup takes a string representing a parseable ipv4 or ipv6 IP, and blocks
// until it has resolved to 0-n results, or until its lookup timeout has elapsed.
// if the lookup timeout elapses, it returns an empty slice.
func (d *ReverseDNSCache) Lookup(ip string) ([]string, error) {
if len(ip) == 0 {
return nil, nil
}
return d.lookup(ip)
}
func (d *ReverseDNSCache) lookup(ip string) ([]string, error) {
// check if the value is cached
d.rwLock.RLock()
result, found := d.lockedGetFromCache(ip)
if found && result.completed && result.expiresAt.After(time.Now()) {
defer d.rwLock.RUnlock()
atomic.AddUint64(&d.stats.CacheHit, 1)
// cache is valid
return result.domains, nil
}
d.rwLock.RUnlock()
// if it's not cached, kick off a lookup job and subscribe to the result.
lookupChan := d.subscribeTo(ip)
timer := time.NewTimer(d.lookupTimeout)
defer timer.Stop()
// timer is still necessary even if doLookup respects timeout due to worker
// pool starvation.
select {
case result := <-lookupChan:
return result.domains, result.err
case <-timer.C:
return nil, ErrTimeout
}
}
func (d *ReverseDNSCache) subscribeTo(ip string) callbackChannelType {
callback := make(callbackChannelType, 1)
d.rwLock.Lock()
defer d.rwLock.Unlock()
// confirm it's still not in the cache. This needs to be done under an active lock.
result, found := d.lockedGetFromCache(ip)
if found {
atomic.AddUint64(&d.stats.CacheHit, 1)
// has the request been answered since we last checked?
if result.completed {
// we can return the answer with the channel.
callback <- lookupResult{domains: result.domains}
return callback
}
// there's a request but it hasn't been answered yet;
// add yourself to the subscribers and return that.
result.callbacks = append(result.callbacks, callback)
d.lockedSaveToCache(result)
return callback
}
atomic.AddUint64(&d.stats.CacheMiss, 1)
// otherwise we need to register the request
l := &dnslookup{
ip: ip,
expiresAt: time.Now().Add(d.ttl),
callbacks: []callbackChannelType{callback},
}
d.lockedSaveToCache(l)
go d.doLookup(l.ip)
return callback
}
// lockedGetFromCache fetches from the correct internal ip cache.
// you MUST first do a read or write lock before calling it, and keep locks around
// the dnslookup that is returned until you clone it.
func (d *ReverseDNSCache) lockedGetFromCache(ip string) (lookup *dnslookup, found bool) {
lookup, found = d.cache[ip]
if found && lookup.expiresAt.Before(time.Now()) {
return nil, false
}
return lookup, found
}
// lockedSaveToCache stores a lookup in the correct internal ip cache.
// you MUST first do a write lock before calling it.
func (d *ReverseDNSCache) lockedSaveToCache(lookup *dnslookup) {
if lookup.expiresAt.Before(time.Now()) {
return // don't cache.
}
d.cache[lookup.ip] = lookup
}
func (d *ReverseDNSCache) startCleanupWorker(ctx context.Context) {
go func() {
cleanupTick := time.NewTicker(10 * time.Second)
for {
select {
case <-cleanupTick.C:
d.cleanup()
case <-ctx.Done():
return
}
}
}()
}
func (d *ReverseDNSCache) doLookup(ip string) {
ctx, cancel := context.WithTimeout(context.Background(), d.lookupTimeout)
defer cancel()
if err := d.sem.Acquire(ctx, 1); err != nil {
// lookup timeout
d.abandonLookup(ip, ErrTimeout)
return
}
defer d.sem.Release(1)
names, err := d.Resolver.LookupAddr(ctx, ip)
if err != nil {
d.abandonLookup(ip, err)
return
}
d.rwLock.Lock()
lookup, found := d.lockedGetFromCache(ip)
if !found {
d.rwLock.Unlock()
return
}
lookup.domains = names
lookup.completed = true
lookup.expiresAt = time.Now().Add(d.ttl) // extend the ttl now that we have a reply.
callbacks := lookup.callbacks
lookup.callbacks = nil
d.lockedSaveToCache(lookup)
d.rwLock.Unlock()
d.expireListLock.Lock()
// add it to the expireList.
d.expireList = append(d.expireList, lookup)
d.expireListLock.Unlock()
atomic.AddUint64(&d.stats.RequestsFilled, uint64(len(callbacks)))
for _, cb := range callbacks {
cb <- lookupResult{domains: names}
close(cb)
}
}
func (d *ReverseDNSCache) abandonLookup(ip string, err error) {
d.rwLock.Lock()
lookup, found := d.lockedGetFromCache(ip)
if !found {
d.rwLock.Unlock()
return
}
callbacks := lookup.callbacks
delete(d.cache, lookup.ip)
d.rwLock.Unlock()
// resolve the remaining callbacks to free the resources.
atomic.AddUint64(&d.stats.RequestsAbandoned, uint64(len(callbacks)))
for _, cb := range callbacks {
cb <- lookupResult{err: err}
close(cb)
}
}
func (d *ReverseDNSCache) cleanup() {
now := time.Now()
d.expireListLock.Lock()
if len(d.expireList) == 0 {
d.expireListLock.Unlock()
return
}
ipsToDelete := []string{}
for i := 0; i < len(d.expireList); i++ {
if d.expireList[i].expiresAt.After(now) {
break // done. Nothing after this point is expired.
}
ipsToDelete = append(ipsToDelete, d.expireList[i].ip)
}
if len(ipsToDelete) == 0 {
d.expireListLock.Unlock()
return
}
d.expireList = d.expireList[len(ipsToDelete):]
d.expireListLock.Unlock()
atomic.AddUint64(&d.stats.CacheExpire, uint64(len(ipsToDelete)))
d.rwLock.Lock()
defer d.rwLock.Unlock()
for _, ip := range ipsToDelete {
delete(d.cache, ip)
}
}
// blockAllWorkers is a test function that eats up all the worker pool space to
// make sure workers are done running and there's no room to acquire a new worker.
func (d *ReverseDNSCache) blockAllWorkers() {
d.sem.Acquire(context.Background(), int64(d.maxWorkers))
}
func (d *ReverseDNSCache) Stats() RDNSCacheStats {
stats := RDNSCacheStats{}
stats.CacheHit = atomic.LoadUint64(&d.stats.CacheHit)
stats.CacheMiss = atomic.LoadUint64(&d.stats.CacheMiss)
stats.CacheExpire = atomic.LoadUint64(&d.stats.CacheExpire)
stats.RequestsAbandoned = atomic.LoadUint64(&d.stats.RequestsAbandoned)
stats.RequestsFilled = atomic.LoadUint64(&d.stats.RequestsFilled)
return stats
}
func (d *ReverseDNSCache) Stop() {
d.cancelCleanupWorker()
}

View File

@ -0,0 +1,136 @@
package reverse_dns
import (
"context"
"errors"
"sync"
"testing"
"time"
"github.com/stretchr/testify/require"
)
func TestSimpleReverseDNSLookup(t *testing.T) {
d := NewReverseDNSCache(60*time.Second, 1*time.Second, -1)
defer d.Stop()
d.Resolver = &localResolver{}
answer, err := d.Lookup("127.0.0.1")
require.NoError(t, err)
require.Equal(t, []string{"localhost"}, answer)
d.blockAllWorkers()
// do another request with no workers available.
// it should read from cache instantly.
answer, err = d.Lookup("127.0.0.1")
require.NoError(t, err)
require.Equal(t, []string{"localhost"}, answer)
require.Len(t, d.cache, 1)
require.Len(t, d.expireList, 1)
d.cleanup()
require.Len(t, d.expireList, 1) // ttl hasn't hit yet.
stats := d.Stats()
require.EqualValues(t, 0, stats.CacheExpire)
require.EqualValues(t, 1, stats.CacheMiss)
require.EqualValues(t, 1, stats.CacheHit)
require.EqualValues(t, 1, stats.RequestsFilled)
require.EqualValues(t, 0, stats.RequestsAbandoned)
}
func TestParallelReverseDNSLookup(t *testing.T) {
d := NewReverseDNSCache(1*time.Second, 1*time.Second, -1)
defer d.Stop()
d.Resolver = &localResolver{}
var answer1 []string
var answer2 []string
wg := &sync.WaitGroup{}
wg.Add(2)
go func() {
answer, err := d.Lookup("127.0.0.1")
require.NoError(t, err)
answer1 = answer
wg.Done()
}()
go func() {
answer, err := d.Lookup("127.0.0.1")
require.NoError(t, err)
answer2 = answer
wg.Done()
}()
wg.Wait()
t.Log(answer1)
t.Log(answer2)
require.Equal(t, []string{"localhost"}, answer1)
require.Equal(t, []string{"localhost"}, answer2)
require.Len(t, d.cache, 1)
stats := d.Stats()
require.EqualValues(t, 1, stats.CacheMiss)
require.EqualValues(t, 1, stats.CacheHit)
}
func TestUnavailableDNSServerRespectsTimeout(t *testing.T) {
d := NewReverseDNSCache(0, 1, -1)
defer d.Stop()
d.Resolver = &timeoutResolver{}
result, err := d.Lookup("192.153.33.3")
require.Error(t, err)
require.Equal(t, ErrTimeout, err)
require.Nil(t, result)
}
func TestCleanupHappens(t *testing.T) {
ttl := 100 * time.Millisecond
d := NewReverseDNSCache(ttl, 1*time.Second, -1)
defer d.Stop()
d.Resolver = &localResolver{}
_, err := d.Lookup("127.0.0.1")
require.NoError(t, err)
require.Len(t, d.cache, 1)
time.Sleep(ttl) // wait for cache entry to expire.
d.cleanup()
require.Len(t, d.expireList, 0)
stats := d.Stats()
require.EqualValues(t, 1, stats.CacheExpire)
require.EqualValues(t, 1, stats.CacheMiss)
require.EqualValues(t, 0, stats.CacheHit)
}
func TestLookupTimeout(t *testing.T) {
d := NewReverseDNSCache(10*time.Second, 10*time.Second, -1)
defer d.Stop()
d.Resolver = &timeoutResolver{}
_, err := d.Lookup("127.0.0.1")
require.Error(t, err)
require.EqualValues(t, 1, d.Stats().RequestsAbandoned)
}
type timeoutResolver struct{}
func (r *timeoutResolver) LookupAddr(ctx context.Context, addr string) (names []string, err error) {
return nil, errors.New("timeout")
}
type localResolver struct{}
func (r *localResolver) LookupAddr(ctx context.Context, addr string) (names []string, err error) {
return []string{"localhost"}, nil
}

View File

@ -0,0 +1,156 @@
package reverse_dns
import (
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/config"
"github.com/influxdata/telegraf/plugins/processors"
"github.com/influxdata/telegraf/plugins/processors/reverse_dns/parallel"
)
const sampleConfig = `
## For optimal performance, you may want to limit which metrics are passed to this
## processor. eg:
## namepass = ["my_metric_*"]
## cache_ttl is how long the dns entries should stay cached for.
## generally longer is better, but if you expect a large number of diverse lookups
## you'll want to consider memory use.
cache_ttl = "24h"
## lookup_timeout is how long should you wait for a single dns request to repsond.
## this is also the maximum acceptable latency for a metric travelling through
## the reverse_dns processor. After lookup_timeout is exceeded, a metric will
## be passed on unaltered.
## multiple simultaneous resolution requests for the same IP will only make a
## single rDNS request, and they will all wait for the answer for this long.
lookup_timeout = "3s"
## max_parallel_lookups is the maximum number of dns requests to be in flight
## at the same time. Requesting hitting cached values do not count against this
## total, and neither do mulptiple requests for the same IP.
## It's probably best to keep this number fairly low.
max_parallel_lookups = 10
## ordered controls whether or not the metrics need to stay in the same order
## this plugin received them in. If false, this plugin will change the order
## with requests hitting cached results moving through immediately and not
## waiting on slower lookups. This may cause issues for you if you are
## depending on the order of metrics staying the same. If so, set this to true.
## keeping the metrics ordered may be slightly slower.
ordered = false
[[processors.reverse_dns.lookup]]
## get the ip from the field "source_ip", and put the result in the field "source_name"
field = "source_ip"
dest = "source_name"
[[processors.reverse_dns.lookup]]
## get the ip from the tag "destination_ip", and put the result in the tag
## "destination_name".
tag = "destination_ip"
dest = "destination_name"
## If you would prefer destination_name to be a field instead, you can use a
## processors.converter after this one, specifying the order attribute.
`
type lookupEntry struct {
Tag string `toml:"tag"`
Field string `toml:"field"`
Dest string `toml:"dest"`
}
type ReverseDNS struct {
reverseDNSCache *ReverseDNSCache
acc telegraf.Accumulator
parallel parallel.Parallel
Lookups []lookupEntry `toml:"lookup"`
CacheTTL config.Duration `toml:"cache_ttl"`
LookupTimeout config.Duration `toml:"lookup_timeout"`
MaxParallelLookups int `toml:"max_parallel_lookups"`
Ordered bool `toml:"ordered"`
Log telegraf.Logger `toml:"-"`
}
func (r *ReverseDNS) SampleConfig() string {
return sampleConfig
}
func (r *ReverseDNS) Description() string {
return "ReverseDNS does a reverse lookup on IP addresses to retrieve the DNS name"
}
func (r *ReverseDNS) Start(acc telegraf.Accumulator) error {
r.acc = acc
r.reverseDNSCache = NewReverseDNSCache(
time.Duration(r.CacheTTL),
time.Duration(r.LookupTimeout),
r.MaxParallelLookups, // max parallel reverse-dns lookups
)
if r.Ordered {
r.parallel = parallel.NewOrdered(acc, r.asyncAdd, 10000, r.MaxParallelLookups)
} else {
r.parallel = parallel.NewUnordered(acc, r.asyncAdd, r.MaxParallelLookups)
}
return nil
}
func (r *ReverseDNS) Stop() error {
r.parallel.Stop()
r.reverseDNSCache.Stop()
return nil
}
func (r *ReverseDNS) Add(metric telegraf.Metric, acc telegraf.Accumulator) error {
r.parallel.Enqueue(metric)
return nil
}
func (r *ReverseDNS) asyncAdd(metric telegraf.Metric) []telegraf.Metric {
for _, lookup := range r.Lookups {
if len(lookup.Field) > 0 {
if ipField, ok := metric.GetField(lookup.Field); ok {
if ip, ok := ipField.(string); ok {
result, err := r.reverseDNSCache.Lookup(ip)
if err != nil {
r.Log.Errorf("lookup error: %v", err)
continue
}
if len(result) > 0 {
metric.AddField(lookup.Dest, result[0])
}
}
}
}
if len(lookup.Tag) > 0 {
if ipTag, ok := metric.GetTag(lookup.Tag); ok {
result, err := r.reverseDNSCache.Lookup(ipTag)
if err != nil {
r.Log.Errorf("lookup error: %v", err)
continue
}
if len(result) > 0 {
metric.AddTag(lookup.Dest, result[0])
}
}
}
}
return []telegraf.Metric{metric}
}
func init() {
processors.AddStreaming("reverse_dns", func() telegraf.StreamingProcessor {
return newReverseDNS()
})
}
func newReverseDNS() *ReverseDNS {
return &ReverseDNS{
CacheTTL: config.Duration(24 * time.Hour),
LookupTimeout: config.Duration(1 * time.Minute),
MaxParallelLookups: 10,
}
}

View File

@ -0,0 +1,55 @@
package reverse_dns
import (
"testing"
"time"
"github.com/influxdata/telegraf/config"
"github.com/influxdata/telegraf/metric"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/require"
)
func TestSimpleReverseLookup(t *testing.T) {
now := time.Now()
m, _ := metric.New("name", map[string]string{
"dest_ip": "8.8.8.8",
}, map[string]interface{}{
"source_ip": "127.0.0.1",
}, now)
dns := newReverseDNS()
dns.Lookups = []lookupEntry{
{
Field: "source_ip",
Dest: "source_name",
},
{
Tag: "dest_ip",
Dest: "dest_name",
},
}
acc := &testutil.Accumulator{}
dns.Start(acc)
dns.Add(m, acc)
dns.Stop()
// should be processed now.
require.Len(t, acc.GetTelegrafMetrics(), 1)
processedMetric := acc.GetTelegrafMetrics()[0]
f, ok := processedMetric.GetField("source_name")
require.True(t, ok)
require.EqualValues(t, "localhost", f)
tag, ok := processedMetric.GetTag("dest_name")
require.True(t, ok)
require.EqualValues(t, "dns.google.", tag)
}
func TestLoadingConfig(t *testing.T) {
c := config.NewConfig()
err := c.LoadConfigData([]byte("[[processors.reverse_dns]]\n" + sampleConfig))
require.NoError(t, err)
require.Len(t, c.Processors, 1)
}