- Add app/health.py with HealthChecker and MetricsCollector classes - Implement composite /health endpoint checking DB and HTTP client - Add /metrics endpoint with Prometheus exposition format - Add webhook request metrics (counters, histograms, gauges) - Create app/http_client.py for shared AsyncClient management - Update app/main.py lifespan to init/close HTTP client - Add comprehensive tests in tests/test_health.py This enables proper observability with health checks and metrics for monitoring system status and webhook processing performance.
142 lines
5.2 KiB
Python
142 lines
5.2 KiB
Python
import asyncio
|
|
from fastapi import FastAPI, Request, BackgroundTasks, Body
|
|
from fastapi.responses import JSONResponse
|
|
from app.logging import get_logger
|
|
from app.admin import router as admin_router
|
|
from app.db import SessionLocal, WebhookEndpoint, RequestLog, DeliveryLog, init_db
|
|
from app.services.engine import engine
|
|
from app.models import IncomingPayload
|
|
from app.health import health_checker, metrics_collector
|
|
from app.http_client import init_http_client, close_http_client
|
|
from contextlib import asynccontextmanager
|
|
|
|
logger = get_logger("app")
|
|
|
|
@asynccontextmanager
|
|
async def lifespan(app: FastAPI):
|
|
# Startup: Initialize DB and HTTP client
|
|
logger.info("Initializing database...")
|
|
init_db()
|
|
logger.info("Initializing http client...")
|
|
await init_http_client()
|
|
try:
|
|
yield
|
|
finally:
|
|
# Shutdown: close shared http client
|
|
logger.info("Shutting down http client...")
|
|
await close_http_client()
|
|
|
|
app = FastAPI(lifespan=lifespan)
|
|
app.include_router(admin_router)
|
|
|
|
def save_logs(namespace: str, payload_dict: dict, routed: list, notified: list):
|
|
try:
|
|
db = SessionLocal()
|
|
# Create RequestLog
|
|
req_log = RequestLog(
|
|
namespace=namespace,
|
|
remark=str(payload_dict.get("remark", "")),
|
|
event_no=str(payload_dict.get("event_define_no", "")),
|
|
raw_body=payload_dict,
|
|
status="success"
|
|
)
|
|
db.add(req_log)
|
|
db.commit()
|
|
db.refresh(req_log)
|
|
|
|
# Create DeliveryLogs
|
|
for r in routed:
|
|
db.add(DeliveryLog(
|
|
request_id=req_log.id,
|
|
target_name=r.get("target"),
|
|
type="relay",
|
|
status="success" if r.get("ok") else "failed",
|
|
response_summary=str(r.get("error") or "OK"),
|
|
attempt_count=r.get("_retry_attempts", 1),
|
|
latency_seconds=r.get("_retry_latency", 0.0)
|
|
))
|
|
|
|
for n in notified:
|
|
db.add(DeliveryLog(
|
|
request_id=req_log.id,
|
|
target_name=n.get("channel"),
|
|
type="notify",
|
|
status="success" if n.get("ok") else "failed",
|
|
response_summary=str(n.get("error") or "OK"),
|
|
attempt_count=n.get("_retry_attempts", 1),
|
|
latency_seconds=n.get("_retry_latency", 0.0)
|
|
))
|
|
|
|
db.commit()
|
|
db.close()
|
|
except Exception as e:
|
|
logger.error(f"Failed to save logs: {e}")
|
|
|
|
@app.get("/health")
|
|
async def health():
|
|
"""Comprehensive health check endpoint."""
|
|
return await health_checker.check_overall()
|
|
|
|
@app.get("/metrics")
|
|
async def metrics():
|
|
"""Prometheus-style metrics endpoint."""
|
|
return metrics_collector.get_prometheus_format()
|
|
|
|
@app.post("/webhook/{namespace}")
|
|
async def webhook(namespace: str, payload: IncomingPayload = Body(...), background_tasks: BackgroundTasks = BackgroundTasks()):
|
|
import time
|
|
start_time = time.time()
|
|
|
|
db = SessionLocal()
|
|
endpoint = db.query(WebhookEndpoint).filter(WebhookEndpoint.namespace == namespace).first()
|
|
|
|
if not endpoint:
|
|
# Record failed request metric
|
|
metrics_collector.increment_counter("webhook_requests_total", labels={"status": "endpoint_not_found"})
|
|
db.close()
|
|
return JSONResponse({"error": "Endpoint not found"}, status_code=404)
|
|
|
|
if not endpoint.is_active:
|
|
# Record failed request metric
|
|
metrics_collector.increment_counter("webhook_requests_total", labels={"status": "endpoint_inactive"})
|
|
db.close()
|
|
return JSONResponse({"error": "Endpoint inactive"}, status_code=403)
|
|
|
|
endpoint_id = endpoint.id
|
|
db.close()
|
|
|
|
# payload is validated by Pydantic; convert to plain dict for engine
|
|
try:
|
|
body_dict = payload.model_dump()
|
|
except Exception:
|
|
metrics_collector.increment_counter("webhook_requests_total", labels={"status": "invalid_payload"})
|
|
return JSONResponse({"error": "Invalid payload"}, status_code=400)
|
|
|
|
# Use new engine
|
|
routed, notified = await engine.process(endpoint_id, body_dict)
|
|
|
|
# Record successful request metric
|
|
metrics_collector.increment_counter("webhook_requests_total", labels={"status": "success"})
|
|
|
|
# Record processing duration
|
|
processing_time = time.time() - start_time
|
|
metrics_collector.observe_histogram("webhook_processing_duration_seconds", processing_time)
|
|
|
|
# Record delivery attempts and failures
|
|
total_attempts = sum(r.get("_retry_attempts", 1) for r in routed) + sum(n.get("_retry_attempts", 1) for n in notified)
|
|
total_failures = sum(1 for r in routed if not r.get("ok", False)) + sum(1 for n in notified if not n.get("ok", False))
|
|
|
|
metrics_collector.increment_counter("delivery_attempts_total", total_attempts)
|
|
metrics_collector.increment_counter("delivery_failures_total", total_failures)
|
|
|
|
# Async save logs
|
|
background_tasks.add_task(save_logs, namespace, body_dict, routed, notified)
|
|
|
|
result = {
|
|
"namespace": namespace,
|
|
"routed": routed,
|
|
"notified": notified
|
|
}
|
|
logger.info({"event": "webhook_processed", "namespace": namespace, "routed": len(routed), "notified": len(notified)})
|
|
return JSONResponse(result)
|