WebhockTransfer/app/utils/retry.py
auto-bot b11c39f3bf feat: add async retry mechanism with exponential backoff
- Add app/utils/retry.py with configurable async retry decorator
- Update DeliveryLog model to track attempt_count and latency_seconds
- Apply @http_retry to engine._exec_forward and _exec_notify methods
- Update save_logs to record retry metadata
- Add comprehensive unit tests for retry functionality
- Support configuration via environment variables (RETRY_*)

This improves reliability for downstream HTTP calls by automatically
retrying transient failures with exponential backoff and jitter.
2025-12-24 11:04:41 +08:00

100 lines
3.6 KiB
Python

"""
Async retry decorator with exponential backoff and configurable parameters.
"""
import asyncio
import logging
from functools import wraps
from typing import Callable, Any, Optional
import time
import os
logger = logging.getLogger(__name__)
def async_retry(
max_attempts: int = 3,
initial_delay: float = 1.0,
backoff_factor: float = 2.0,
max_delay: float = 60.0,
retry_on: tuple = (Exception,),
jitter: bool = True
):
"""
Decorator for async functions that implements exponential backoff retry logic.
Args:
max_attempts: Maximum number of retry attempts (including initial call)
initial_delay: Initial delay in seconds before first retry
backoff_factor: Factor by which delay increases each retry
max_delay: Maximum delay between retries
retry_on: Tuple of exception types to retry on
jitter: Add random jitter to delay to prevent thundering herd
"""
def decorator(func: Callable) -> Callable:
@wraps(func)
async def wrapper(*args, **kwargs) -> tuple[Any, dict]:
"""
Returns:
tuple: (result, metadata_dict)
metadata_dict contains: attempts, total_latency, last_error
"""
last_error = None
start_time = time.time()
for attempt in range(max_attempts):
try:
result = await func(*args, **kwargs)
total_latency = time.time() - start_time
return result, {
'attempts': attempt + 1,
'total_latency': round(total_latency, 3),
'last_error': None,
'success': True
}
except retry_on as e:
last_error = str(e)
if attempt < max_attempts - 1: # Don't sleep after last attempt
delay = min(initial_delay * (backoff_factor ** attempt), max_delay)
if jitter:
# Add random jitter (±25% of delay)
import random
jitter_range = delay * 0.25
delay += random.uniform(-jitter_range, jitter_range)
logger.warning(f"Attempt {attempt + 1}/{max_attempts} failed for {func.__name__}: {e}. Retrying in {delay:.2f}s")
await asyncio.sleep(delay)
else:
logger.error(f"All {max_attempts} attempts failed for {func.__name__}: {e}")
total_latency = time.time() - start_time
return None, {
'attempts': max_attempts,
'total_latency': round(total_latency, 3),
'last_error': last_error,
'success': False
}
return wrapper
return decorator
# Configuration from environment
def get_retry_config():
"""Get retry configuration from environment variables."""
return {
'max_attempts': int(os.getenv('RETRY_MAX_ATTEMPTS', '3')),
'initial_delay': float(os.getenv('RETRY_INITIAL_DELAY', '1.0')),
'backoff_factor': float(os.getenv('RETRY_BACKOFF_FACTOR', '2.0')),
'max_delay': float(os.getenv('RETRY_MAX_DELAY', '30.0')),
}
# Pre-configured decorators for common use cases
def http_retry(**kwargs):
"""Retry decorator specifically for HTTP operations."""
config = get_retry_config()
config.update(kwargs)
return async_retry(
retry_on=(Exception,), # Retry on any exception for HTTP calls
**config
)