modelRT/task/retry_manager.go

219 lines
5.4 KiB
Go
Raw Normal View History

// Package task provides retry strategies for failed asynchronous tasks
package task
import (
"context"
"math"
"math/rand"
"strings"
"time"
"modelRT/logger"
)
// RetryStrategy defines the interface for task retry strategies
type RetryStrategy interface {
// ShouldRetry determines if a task should be retried and returns the delay before next retry
ShouldRetry(ctx context.Context, taskID string, retryCount int, lastError error) (bool, time.Duration)
// GetMaxRetries returns the maximum number of retry attempts
GetMaxRetries() int
}
// ExponentialBackoffRetry implements exponential backoff with jitter retry strategy
type ExponentialBackoffRetry struct {
MaxRetries int
InitialDelay time.Duration
MaxDelay time.Duration
RandomFactor float64 // Jitter factor to avoid thundering herd problem
}
// NewExponentialBackoffRetry creates a new exponential backoff retry strategy
func NewExponentialBackoffRetry(maxRetries int, initialDelay, maxDelay time.Duration, randomFactor float64) *ExponentialBackoffRetry {
if maxRetries < 0 {
maxRetries = 0
}
if initialDelay <= 0 {
initialDelay = 1 * time.Second
}
if maxDelay <= 0 {
maxDelay = 5 * time.Minute
}
if randomFactor < 0 {
randomFactor = 0
}
if randomFactor > 1 {
randomFactor = 1
}
return &ExponentialBackoffRetry{
MaxRetries: maxRetries,
InitialDelay: initialDelay,
MaxDelay: maxDelay,
RandomFactor: randomFactor,
}
}
// ShouldRetry implements exponential backoff with jitter
func (s *ExponentialBackoffRetry) ShouldRetry(ctx context.Context, taskID string, retryCount int, lastError error) (bool, time.Duration) {
if retryCount >= s.MaxRetries {
logger.Info(ctx, "Task reached maximum retry count",
"task_id", taskID,
"retry_count", retryCount,
"max_retries", s.MaxRetries,
"last_error", lastError,
)
return false, 0
}
// Calculate exponential backoff: initialDelay * 2^retryCount
delay := s.InitialDelay * time.Duration(math.Pow(2, float64(retryCount)))
// Apply maximum delay cap
if delay > s.MaxDelay {
delay = s.MaxDelay
}
// Add jitter to avoid thundering herd
if s.RandomFactor > 0 {
jitter := rand.Float64() * s.RandomFactor * float64(delay)
// Randomly add or subtract jitter
if rand.Intn(2) == 0 {
delay += time.Duration(jitter)
} else {
delay -= time.Duration(jitter)
}
// Ensure delay doesn't go below initial delay
if delay < s.InitialDelay {
delay = s.InitialDelay
}
}
logger.Info(ctx, "Task will be retried",
"task_id", taskID,
"retry_count", retryCount,
"next_retry_in", delay,
"max_retries", s.MaxRetries,
)
return true, delay
}
// GetMaxRetries returns the maximum number of retry attempts
func (s *ExponentialBackoffRetry) GetMaxRetries() int {
return s.MaxRetries
}
// FixedDelayRetry implements fixed delay retry strategy
type FixedDelayRetry struct {
MaxRetries int
Delay time.Duration
RandomFactor float64
}
// NewFixedDelayRetry creates a new fixed delay retry strategy
func NewFixedDelayRetry(maxRetries int, delay time.Duration, randomFactor float64) *FixedDelayRetry {
if maxRetries < 0 {
maxRetries = 0
}
if delay <= 0 {
delay = 5 * time.Second
}
return &FixedDelayRetry{
MaxRetries: maxRetries,
Delay: delay,
RandomFactor: randomFactor,
}
}
// ShouldRetry implements fixed delay with optional jitter
func (s *FixedDelayRetry) ShouldRetry(ctx context.Context, taskID string, retryCount int, lastError error) (bool, time.Duration) {
if retryCount >= s.MaxRetries {
return false, 0
}
delay := s.Delay
// Add jitter if random factor is specified
if s.RandomFactor > 0 {
jitter := rand.Float64() * s.RandomFactor * float64(delay)
if rand.Intn(2) == 0 {
delay += time.Duration(jitter)
} else {
delay -= time.Duration(jitter)
}
// Ensure positive delay
if delay <= 0 {
delay = s.Delay
}
}
return true, delay
}
// GetMaxRetries returns the maximum number of retry attempts
func (s *FixedDelayRetry) GetMaxRetries() int {
return s.MaxRetries
}
// NoRetryStrategy implements a strategy that never retries
type NoRetryStrategy struct{}
// NewNoRetryStrategy creates a new no-retry strategy
func NewNoRetryStrategy() *NoRetryStrategy {
return &NoRetryStrategy{}
}
// ShouldRetry always returns false
func (s *NoRetryStrategy) ShouldRetry(ctx context.Context, taskID string, retryCount int, lastError error) (bool, time.Duration) {
return false, 0
}
// GetMaxRetries returns 0
func (s *NoRetryStrategy) GetMaxRetries() int {
return 0
}
// DefaultRetryStrategy returns the default retry strategy (exponential backoff)
func DefaultRetryStrategy() RetryStrategy {
return NewExponentialBackoffRetry(
3, // max retries
1*time.Second, // initial delay
5*time.Minute, // max delay
0.1, // random factor (10% jitter)
)
}
// IsRetryableError checks if an error is retryable based on common patterns
func IsRetryableError(err error) bool {
if err == nil {
return false
}
errorMsg := err.Error()
// Check for transient errors that are typically retryable
retryablePatterns := []string{
"timeout",
"deadline exceeded",
"temporary",
"busy",
"connection refused",
"connection reset",
"network",
"too many connections",
"resource temporarily unavailable",
"rate limit",
"throttle",
"server unavailable",
"service unavailable",
}
for _, pattern := range retryablePatterns {
if strings.Contains(strings.ToLower(errorMsg), pattern) {
return true
}
}
return false
}