aibilly_backend_code/src/infrastructure/observability/metrics.py
2026-03-10 16:44:04 +05:30

176 lines
5.7 KiB
Python

from prometheus_client import Counter, Histogram, Gauge, generate_latest, REGISTRY
from prometheus_client import CollectorRegistry, multiprocess, start_http_server
from src.infrastructure.observability.logger import logger
import os
"""
Prometheus Metrics Service
Production-ready metrics collection with Prometheus format
"""
class MetricsService:
def __init__(self):
self.registry = REGISTRY
# Default labels for all metrics
self.default_labels = {
'service': os.getenv('APP_NAME', 'test_project'),
'environment': os.getenv('ENVIRONMENT', 'development')
}
# Custom metrics
self._initialize_custom_metrics()
# Start metrics server if enabled
if os.getenv('METRICS_ENABLED', 'true').lower() == 'true':
self._start_metrics_server()
def _initialize_custom_metrics(self):
"""Initialize custom application metrics"""
# HTTP request metrics
self.http_request_duration = Histogram(
'http_request_duration_seconds',
'Duration of HTTP requests in seconds',
['method', 'route', 'status_code'],
buckets=[0.1, 0.5, 1, 2, 5, 10, 30]
)
self.http_request_total = Counter(
'http_requests_total',
'Total number of HTTP requests',
['method', 'route', 'status_code']
)
# Business metrics
self.business_events_total = Counter(
'business_events_total',
'Total number of business events',
['event_type', 'status']
)
# Database metrics
self.database_query_duration = Histogram(
'database_query_duration_seconds',
'Duration of database queries in seconds',
['operation', 'table'],
buckets=[0.01, 0.05, 0.1, 0.5, 1, 2, 5]
)
# Cache metrics
self.cache_hits = Counter(
'cache_hits_total',
'Total number of cache hits',
['cache_type']
)
self.cache_misses = Counter(
'cache_misses_total',
'Total number of cache misses',
['cache_type']
)
# Kafka metrics
self.kafka_messages_published = Counter(
'kafka_messages_published_total',
'Total number of Kafka messages published',
['topic', 'status']
)
self.kafka_messages_consumed = Counter(
'kafka_messages_consumed_total',
'Total number of Kafka messages consumed',
['topic', 'status']
)
def _start_metrics_server(self):
"""Start Prometheus metrics HTTP server"""
try:
port = int(os.getenv('PROMETHEUS_PORT', '9090'))
start_http_server(port)
logger.info(f'Metrics: Prometheus metrics server started on port {port}')
except Exception as e:
logger.error(f'Metrics: Failed to start metrics server: {e}')
def record_http_request(self, method: str, route: str, status_code: int, duration: float):
"""
Record HTTP request duration
Args:
method: HTTP method
route: Route path
status_code: HTTP status code
duration: Duration in seconds
"""
self.http_request_duration.labels(method=method, route=route, status_code=status_code).observe(duration)
self.http_request_total.labels(method=method, route=route, status_code=status_code).inc()
def record_business_event(self, event_type: str, status: str = 'success'):
"""
Record business event
Args:
event_type: Event type
status: Event status ('success' or 'error')
"""
self.business_events_total.labels(event_type=event_type, status=status).inc()
def record_database_query(self, operation: str, table: str, duration: float):
"""
Record database query duration
Args:
operation: Operation type (select, insert, update, delete)
table: Table name
duration: Duration in seconds
"""
self.database_query_duration.labels(operation=operation, table=table).observe(duration)
def record_cache_hit(self, cache_type: str = 'redis'):
"""
Record cache hit
Args:
cache_type: Cache type (e.g., 'redis', 'memory')
"""
self.cache_hits.labels(cache_type=cache_type).inc()
def record_cache_miss(self, cache_type: str = 'redis'):
"""
Record cache miss
Args:
cache_type: Cache type
"""
self.cache_misses.labels(cache_type=cache_type).inc()
def record_kafka_published(self, topic: str, status: str = 'success'):
"""
Record Kafka message published
Args:
topic: Topic name
status: Status ('success' or 'error')
"""
self.kafka_messages_published.labels(topic=topic, status=status).inc()
def record_kafka_consumed(self, topic: str, status: str = 'success'):
"""
Record Kafka message consumed
Args:
topic: Topic name
status: Status ('success' or 'error')
"""
self.kafka_messages_consumed.labels(topic=topic, status=status).inc()
def get_metrics(self):
"""Get metrics in Prometheus format"""
return generate_latest(self.registry)
def get_registry(self):
"""Get metrics registry"""
return self.registry
# Singleton instance
metrics_service = MetricsService()