feat: add async retry mechanism with exponential backoff
- Add app/utils/retry.py with configurable async retry decorator - Update DeliveryLog model to track attempt_count and latency_seconds - Apply @http_retry to engine._exec_forward and _exec_notify methods - Update save_logs to record retry metadata - Add comprehensive unit tests for retry functionality - Support configuration via environment variables (RETRY_*) This improves reliability for downstream HTTP calls by automatically retrying transient failures with exponential backoff and jitter.
This commit is contained in:
+40
-30
@@ -1,8 +1,9 @@
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
import asyncio
|
||||
import re
|
||||
from app.db import SessionLocal, ProcessingRule, RuleAction, Target, NotificationChannel, MessageTemplate
|
||||
from app.logging import get_logger
|
||||
from app.utils.retry import http_retry
|
||||
from app.templates import safe_render
|
||||
|
||||
logger = get_logger("engine")
|
||||
@@ -83,8 +84,8 @@ class RuleEngine:
|
||||
for action in rule.actions:
|
||||
if action.action_type == 'forward' and action.target:
|
||||
t_dict = {"name": action.target.name, "url": action.target.url, "timeout_ms": action.target.timeout_ms}
|
||||
tasks.append(self._exec_forward(t_dict, payload))
|
||||
|
||||
tasks.append(self._wrap_retry_task(self._exec_forward, t_dict, payload, action=action))
|
||||
|
||||
elif action.action_type == 'notify':
|
||||
# Check if we have a valid channel
|
||||
if action.channel:
|
||||
@@ -94,18 +95,18 @@ class RuleEngine:
|
||||
template_content = action.template.template_content
|
||||
else:
|
||||
template_content = current_context.get("template_content")
|
||||
|
||||
|
||||
if template_content:
|
||||
try:
|
||||
# Flatten payload + merge current context vars
|
||||
render_context = self._flatten_payload(payload)
|
||||
render_context.update(current_context["vars"])
|
||||
|
||||
|
||||
# Use safe Jinja2 rendering (supports legacy {var} by conversion)
|
||||
msg = safe_render(template_content, render_context)
|
||||
|
||||
|
||||
c_dict = {"channel": action.channel.channel_type, "url": action.channel.webhook_url}
|
||||
tasks.append(self._exec_notify(c_dict, msg))
|
||||
tasks.append(self._wrap_retry_task(self._exec_notify, c_dict, msg, action=action))
|
||||
except Exception as e:
|
||||
logger.exception(f"Template render failed for action {action.id}: {e}")
|
||||
tasks.append(self._return_error("notify", action.channel.name, str(e)))
|
||||
@@ -199,30 +200,39 @@ class RuleEngine:
|
||||
|
||||
return out
|
||||
|
||||
async def _exec_forward(self, target: dict, payload: dict):
|
||||
try:
|
||||
import httpx
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.post(target['url'], json=payload, timeout=target.get('timeout_ms', 5000)/1000)
|
||||
resp.raise_for_status()
|
||||
return {"type": "forward", "target": target['name'], "ok": True}
|
||||
except Exception as e:
|
||||
return {"type": "forward", "target": target['name'], "ok": False, "error": str(e)}
|
||||
@http_retry()
|
||||
async def _exec_forward(self, target: dict, payload: dict) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
||||
"""Execute forward with retry logic. Returns (result_dict, retry_metadata)."""
|
||||
import httpx
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.post(target['url'], json=payload, timeout=target.get('timeout_ms', 5000)/1000)
|
||||
resp.raise_for_status()
|
||||
return {"type": "forward", "target": target['name'], "ok": True}, {}
|
||||
|
||||
async def _exec_notify(self, channel: dict, msg: str):
|
||||
try:
|
||||
from app.services.notify import send_feishu, send_wecom
|
||||
channel_type = channel.get('channel')
|
||||
url = channel.get('url')
|
||||
|
||||
if channel_type == 'feishu':
|
||||
await send_feishu(url, msg)
|
||||
elif channel_type == 'wecom':
|
||||
await send_wecom(url, msg)
|
||||
return {"type": "notify", "channel": channel_type, "ok": True}
|
||||
except Exception as e:
|
||||
logger.exception(f"Notification failed for {channel.get('channel')}: {e}")
|
||||
return {"type": "notify", "channel": channel.get('channel'), "ok": False, "error": str(e)}
|
||||
@http_retry()
|
||||
async def _exec_notify(self, channel: dict, msg: str) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
||||
"""Execute notify with retry logic. Returns (result_dict, retry_metadata)."""
|
||||
from app.services.notify import send_feishu, send_wecom
|
||||
channel_type = channel.get('channel')
|
||||
url = channel.get('url')
|
||||
|
||||
if channel_type == 'feishu':
|
||||
await send_feishu(url, msg)
|
||||
elif channel_type == 'wecom':
|
||||
await send_wecom(url, msg)
|
||||
return {"type": "notify", "channel": channel_type, "ok": True}, {}
|
||||
|
||||
async def _wrap_retry_task(self, func, *args, **kwargs):
|
||||
"""Wrap retry-enabled task to handle metadata and return standard result format."""
|
||||
action = kwargs.pop('action', None) # Remove action from kwargs
|
||||
result, metadata = await func(*args, **kwargs)
|
||||
|
||||
# Add retry metadata to result dict for logging
|
||||
if metadata:
|
||||
result['_retry_attempts'] = metadata.get('attempts', 1)
|
||||
result['_retry_latency'] = metadata.get('total_latency', 0.0)
|
||||
|
||||
return result
|
||||
|
||||
async def _return_error(self, type_str, name, err):
|
||||
return {"type": type_str, "target" if type_str == 'forward' else "channel": name, "ok": False, "error": err}
|
||||
|
||||
Reference in New Issue
Block a user