from prometheus_client import Counter, Histogram, Gauge, generate_latest, REGISTRY from prometheus_client import CollectorRegistry, multiprocess, start_http_server from src.infrastructure.observability.logger import logger import os """ Prometheus Metrics Service Production-ready metrics collection with Prometheus format """ class MetricsService: def __init__(self): self.registry = REGISTRY # Default labels for all metrics self.default_labels = { 'service': os.getenv('APP_NAME', 'test_project'), 'environment': os.getenv('ENVIRONMENT', 'development') } # Custom metrics self._initialize_custom_metrics() # Start metrics server if enabled if os.getenv('METRICS_ENABLED', 'true').lower() == 'true': self._start_metrics_server() def _initialize_custom_metrics(self): """Initialize custom application metrics""" # HTTP request metrics self.http_request_duration = Histogram( 'http_request_duration_seconds', 'Duration of HTTP requests in seconds', ['method', 'route', 'status_code'], buckets=[0.1, 0.5, 1, 2, 5, 10, 30] ) self.http_request_total = Counter( 'http_requests_total', 'Total number of HTTP requests', ['method', 'route', 'status_code'] ) # Business metrics self.business_events_total = Counter( 'business_events_total', 'Total number of business events', ['event_type', 'status'] ) # Database metrics self.database_query_duration = Histogram( 'database_query_duration_seconds', 'Duration of database queries in seconds', ['operation', 'table'], buckets=[0.01, 0.05, 0.1, 0.5, 1, 2, 5] ) # Cache metrics self.cache_hits = Counter( 'cache_hits_total', 'Total number of cache hits', ['cache_type'] ) self.cache_misses = Counter( 'cache_misses_total', 'Total number of cache misses', ['cache_type'] ) # Kafka metrics self.kafka_messages_published = Counter( 'kafka_messages_published_total', 'Total number of Kafka messages published', ['topic', 'status'] ) self.kafka_messages_consumed = Counter( 'kafka_messages_consumed_total', 'Total number of Kafka messages consumed', ['topic', 'status'] ) def _start_metrics_server(self): """Start Prometheus metrics HTTP server""" try: port = int(os.getenv('PROMETHEUS_PORT', '9090')) start_http_server(port) logger.info(f'Metrics: Prometheus metrics server started on port {port}') except Exception as e: logger.error(f'Metrics: Failed to start metrics server: {e}') def record_http_request(self, method: str, route: str, status_code: int, duration: float): """ Record HTTP request duration Args: method: HTTP method route: Route path status_code: HTTP status code duration: Duration in seconds """ self.http_request_duration.labels(method=method, route=route, status_code=status_code).observe(duration) self.http_request_total.labels(method=method, route=route, status_code=status_code).inc() def record_business_event(self, event_type: str, status: str = 'success'): """ Record business event Args: event_type: Event type status: Event status ('success' or 'error') """ self.business_events_total.labels(event_type=event_type, status=status).inc() def record_database_query(self, operation: str, table: str, duration: float): """ Record database query duration Args: operation: Operation type (select, insert, update, delete) table: Table name duration: Duration in seconds """ self.database_query_duration.labels(operation=operation, table=table).observe(duration) def record_cache_hit(self, cache_type: str = 'redis'): """ Record cache hit Args: cache_type: Cache type (e.g., 'redis', 'memory') """ self.cache_hits.labels(cache_type=cache_type).inc() def record_cache_miss(self, cache_type: str = 'redis'): """ Record cache miss Args: cache_type: Cache type """ self.cache_misses.labels(cache_type=cache_type).inc() def record_kafka_published(self, topic: str, status: str = 'success'): """ Record Kafka message published Args: topic: Topic name status: Status ('success' or 'error') """ self.kafka_messages_published.labels(topic=topic, status=status).inc() def record_kafka_consumed(self, topic: str, status: str = 'success'): """ Record Kafka message consumed Args: topic: Topic name status: Status ('success' or 'error') """ self.kafka_messages_consumed.labels(topic=topic, status=status).inc() def get_metrics(self): """Get metrics in Prometheus format""" return generate_latest(self.registry) def get_registry(self): """Get metrics registry""" return self.registry # Singleton instance metrics_service = MetricsService()