- Add app/utils/retry.py with configurable async retry decorator - Update DeliveryLog model to track attempt_count and latency_seconds - Apply @http_retry to engine._exec_forward and _exec_notify methods - Update save_logs to record retry metadata - Add comprehensive unit tests for retry functionality - Support configuration via environment variables (RETRY_*) This improves reliability for downstream HTTP calls by automatically retrying transient failures with exponential backoff and jitter.
100 lines
3.6 KiB
Python
100 lines
3.6 KiB
Python
"""
|
|
Async retry decorator with exponential backoff and configurable parameters.
|
|
"""
|
|
import asyncio
|
|
import logging
|
|
from functools import wraps
|
|
from typing import Callable, Any, Optional
|
|
import time
|
|
import os
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
def async_retry(
|
|
max_attempts: int = 3,
|
|
initial_delay: float = 1.0,
|
|
backoff_factor: float = 2.0,
|
|
max_delay: float = 60.0,
|
|
retry_on: tuple = (Exception,),
|
|
jitter: bool = True
|
|
):
|
|
"""
|
|
Decorator for async functions that implements exponential backoff retry logic.
|
|
|
|
Args:
|
|
max_attempts: Maximum number of retry attempts (including initial call)
|
|
initial_delay: Initial delay in seconds before first retry
|
|
backoff_factor: Factor by which delay increases each retry
|
|
max_delay: Maximum delay between retries
|
|
retry_on: Tuple of exception types to retry on
|
|
jitter: Add random jitter to delay to prevent thundering herd
|
|
"""
|
|
def decorator(func: Callable) -> Callable:
|
|
@wraps(func)
|
|
async def wrapper(*args, **kwargs) -> tuple[Any, dict]:
|
|
"""
|
|
Returns:
|
|
tuple: (result, metadata_dict)
|
|
metadata_dict contains: attempts, total_latency, last_error
|
|
"""
|
|
last_error = None
|
|
start_time = time.time()
|
|
|
|
for attempt in range(max_attempts):
|
|
try:
|
|
result = await func(*args, **kwargs)
|
|
total_latency = time.time() - start_time
|
|
return result, {
|
|
'attempts': attempt + 1,
|
|
'total_latency': round(total_latency, 3),
|
|
'last_error': None,
|
|
'success': True
|
|
}
|
|
except retry_on as e:
|
|
last_error = str(e)
|
|
if attempt < max_attempts - 1: # Don't sleep after last attempt
|
|
delay = min(initial_delay * (backoff_factor ** attempt), max_delay)
|
|
if jitter:
|
|
# Add random jitter (±25% of delay)
|
|
import random
|
|
jitter_range = delay * 0.25
|
|
delay += random.uniform(-jitter_range, jitter_range)
|
|
|
|
logger.warning(f"Attempt {attempt + 1}/{max_attempts} failed for {func.__name__}: {e}. Retrying in {delay:.2f}s")
|
|
await asyncio.sleep(delay)
|
|
else:
|
|
logger.error(f"All {max_attempts} attempts failed for {func.__name__}: {e}")
|
|
|
|
total_latency = time.time() - start_time
|
|
return None, {
|
|
'attempts': max_attempts,
|
|
'total_latency': round(total_latency, 3),
|
|
'last_error': last_error,
|
|
'success': False
|
|
}
|
|
|
|
return wrapper
|
|
return decorator
|
|
|
|
|
|
# Configuration from environment
|
|
def get_retry_config():
|
|
"""Get retry configuration from environment variables."""
|
|
return {
|
|
'max_attempts': int(os.getenv('RETRY_MAX_ATTEMPTS', '3')),
|
|
'initial_delay': float(os.getenv('RETRY_INITIAL_DELAY', '1.0')),
|
|
'backoff_factor': float(os.getenv('RETRY_BACKOFF_FACTOR', '2.0')),
|
|
'max_delay': float(os.getenv('RETRY_MAX_DELAY', '30.0')),
|
|
}
|
|
|
|
|
|
# Pre-configured decorators for common use cases
|
|
def http_retry(**kwargs):
|
|
"""Retry decorator specifically for HTTP operations."""
|
|
config = get_retry_config()
|
|
config.update(kwargs)
|
|
return async_retry(
|
|
retry_on=(Exception,), # Retry on any exception for HTTP calls
|
|
**config
|
|
)
|