Fix mutex locking around ifname cache (#8873)

This commit is contained in:
reimda 2021-02-19 11:31:25 -07:00 committed by GitHub
parent 660eb5b63c
commit 4d61935dec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 22 additions and 21 deletions

View File

@ -100,8 +100,8 @@ type IfName struct {
ifTable *si.Table `toml:"-"` ifTable *si.Table `toml:"-"`
ifXTable *si.Table `toml:"-"` ifXTable *si.Table `toml:"-"`
rwLock sync.RWMutex `toml:"-"` lock sync.Mutex `toml:"-"`
cache *TTLCache `toml:"-"` cache *TTLCache `toml:"-"`
parallel parallel.Parallel `toml:"-"` parallel parallel.Parallel `toml:"-"`
acc telegraf.Accumulator `toml:"-"` acc telegraf.Accumulator `toml:"-"`
@ -187,9 +187,9 @@ func (d *IfName) addTag(metric telegraf.Metric) error {
} }
func (d *IfName) invalidate(agent string) { func (d *IfName) invalidate(agent string) {
d.rwLock.RLock() d.lock.Lock()
d.cache.Delete(agent) d.cache.Delete(agent)
d.rwLock.RUnlock() d.lock.Unlock()
} }
func (d *IfName) Start(acc telegraf.Accumulator) error { func (d *IfName) Start(acc telegraf.Accumulator) error {
@ -241,31 +241,34 @@ func (d *IfName) Stop() error {
func (d *IfName) getMap(agent string) (entry nameMap, age time.Duration, err error) { func (d *IfName) getMap(agent string) (entry nameMap, age time.Duration, err error) {
var sig chan struct{} var sig chan struct{}
d.lock.Lock()
// Check cache // Check cache
d.rwLock.RLock()
m, ok, age := d.cache.Get(agent) m, ok, age := d.cache.Get(agent)
d.rwLock.RUnlock()
if ok { if ok {
d.lock.Unlock()
return m, age, nil return m, age, nil
} }
// Is this the first request for this agent? // cache miss. Is this the first request for this agent?
d.rwLock.Lock()
sig, found := d.sigs[agent] sig, found := d.sigs[agent]
if !found { if !found {
// This is the first request. Make signal for subsequent requests to wait on
s := make(chan struct{}) s := make(chan struct{})
d.sigs[agent] = s d.sigs[agent] = s
sig = s sig = s
} }
d.rwLock.Unlock()
d.lock.Unlock()
if found { if found {
// This is not the first request. Wait for first to finish. // This is not the first request. Wait for first to finish.
<-sig <-sig
// Check cache again // Check cache again
d.rwLock.RLock() d.lock.Lock()
m, ok, age := d.cache.Get(agent) m, ok, age := d.cache.Get(agent)
d.rwLock.RUnlock() d.lock.Unlock()
if ok { if ok {
return m, age, nil return m, age, nil
} }
@ -273,28 +276,26 @@ func (d *IfName) getMap(agent string) (entry nameMap, age time.Duration, err err
} }
// The cache missed and this is the first request for this // The cache missed and this is the first request for this
// agent. // agent. Make the SNMP request
// Make the SNMP request
m, err = d.getMapRemote(agent) m, err = d.getMapRemote(agent)
d.lock.Lock()
if err != nil { if err != nil {
//failure. signal without saving to cache //snmp failure. signal without saving to cache
d.rwLock.Lock()
close(sig) close(sig)
delete(d.sigs, agent) delete(d.sigs, agent)
d.rwLock.Unlock()
d.lock.Unlock()
return nil, 0, fmt.Errorf("getting remote table: %w", err) return nil, 0, fmt.Errorf("getting remote table: %w", err)
} }
// Cache it, then signal any other waiting requests for this agent // snmp success. Cache response, then signal any other waiting
// and clean up // requests for this agent and clean up
d.rwLock.Lock()
d.cache.Put(agent, m) d.cache.Put(agent, m)
close(sig) close(sig)
delete(d.sigs, agent) delete(d.sigs, agent)
d.rwLock.Unlock()
d.lock.Unlock()
return m, 0, nil return m, 0, nil
} }