// Package task provides retry queue management for failed asynchronous tasks package task import ( "context" "time" "modelRT/database" "modelRT/logger" "github.com/gofrs/uuid" "gorm.io/gorm" ) // RetryQueue manages scheduling and execution of task retries type RetryQueue struct { db *gorm.DB producer *QueueProducer strategy RetryStrategy } // NewRetryQueue creates a new RetryQueue instance func NewRetryQueue(db *gorm.DB, producer *QueueProducer, strategy RetryStrategy) *RetryQueue { if strategy == nil { strategy = DefaultRetryStrategy() } return &RetryQueue{ db: db, producer: producer, strategy: strategy, } } // ScheduleRetry schedules a failed task for retry based on retry strategy func (q *RetryQueue) ScheduleRetry(ctx context.Context, taskID uuid.UUID, taskType TaskType, retryCount int, lastError error) error { // Check if task should be retried shouldRetry, delay := q.strategy.ShouldRetry(ctx, taskID.String(), retryCount, lastError) if !shouldRetry { // Mark task as permanently failed logger.Info(ctx, "Task will not be retried, marking as failed", "task_id", taskID, "retry_count", retryCount, "max_retries", q.strategy.GetMaxRetries(), "last_error", lastError, ) return database.FailAsyncTask(ctx, q.db, taskID, time.Now().Unix()) } // Calculate next retry time nextRetryTime := time.Now().Add(delay).Unix() // Update task retry information in database err := q.db.Transaction(func(tx *gorm.DB) error { if err := database.UpdateTaskRetryInfo(ctx, tx, taskID, retryCount+1, nextRetryTime); err != nil { return err } // Update error information errorMsg := "" if lastError != nil { errorMsg = lastError.Error() } if err := database.UpdateTaskErrorInfo(ctx, tx, taskID, errorMsg, ""); err != nil { // Log but don't fail the whole retry scheduling logger.Warn(ctx, "Failed to update task error info", "task_id", taskID, "error", err, ) } // Task will be picked up by ProcessRetryQueue when next_retry_time is reached return nil }) if err != nil { logger.Error(ctx, "Failed to schedule task retry", "task_id", taskID, "task_type", taskType, "retry_count", retryCount, "delay", delay, "error", err, ) return err } logger.Info(ctx, "Task scheduled for retry", "task_id", taskID, "task_type", taskType, "retry_count", retryCount+1, "next_retry_in", delay, "next_retry_time", time.Unix(nextRetryTime, 0).Format(time.RFC3339), ) return nil } // ProcessRetryQueue processes tasks that are due for retry func (q *RetryQueue) ProcessRetryQueue(ctx context.Context, batchSize int) error { // Get tasks due for retry tasks, err := database.GetTasksForRetry(ctx, q.db, batchSize) if err != nil { logger.Error(ctx, "Failed to get tasks for retry", "error", err) return err } if len(tasks) == 0 { return nil } logger.Info(ctx, "Processing retry queue", "task_count", len(tasks), "batch_size", batchSize, ) for _, task := range tasks { select { case <-ctx.Done(): return ctx.Err() default: // Publish task to queue for immediate processing taskType := TaskType(task.TaskType) if err := q.producer.PublishTask(ctx, task.TaskID, taskType, task.Priority); err != nil { logger.Error(ctx, "Failed to publish retry task to queue", "task_id", task.TaskID, "task_type", taskType, "error", err, ) // Continue with other tasks continue } // Update task status back to submitted if err := database.UpdateAsyncTaskStatus(ctx, q.db, task.TaskID, "SUBMITTED"); err != nil { logger.Warn(ctx, "Failed to update retry task status", "task_id", task.TaskID, "error", err, ) } // Clear next retry time since task is being retried now if err := database.UpdateTaskRetryInfo(ctx, q.db, task.TaskID, task.RetryCount, 0); err != nil { logger.Warn(ctx, "Failed to clear next retry time", "task_id", task.TaskID, "error", err, ) } logger.Info(ctx, "Retry task resubmitted", "task_id", task.TaskID, "task_type", taskType, "retry_count", task.RetryCount, ) } } return nil } // StartRetryScheduler starts a background goroutine to periodically process retry queue func (q *RetryQueue) StartRetryScheduler(ctx context.Context, interval time.Duration, batchSize int) { go func() { ticker := time.NewTicker(interval) defer ticker.Stop() for { select { case <-ctx.Done(): logger.Info(ctx, "Retry scheduler stopping") return case <-ticker.C: if err := q.ProcessRetryQueue(ctx, batchSize); err != nil { logger.Error(ctx, "Error processing retry queue", "error", err) } } } }() } // GetRetryStats returns statistics about retry queue func (q *RetryQueue) GetRetryStats(ctx context.Context) (int, error) { tasks, err := database.GetTasksForRetry(ctx, q.db, 1000) // Large limit to count if err != nil { return 0, err } return len(tasks), nil }