176 lines
5.7 KiB
Python
176 lines
5.7 KiB
Python
from prometheus_client import Counter, Histogram, Gauge, generate_latest, REGISTRY
|
|
from prometheus_client import CollectorRegistry, multiprocess, start_http_server
|
|
from src.infrastructure.observability.logger import logger
|
|
import os
|
|
|
|
"""
|
|
Prometheus Metrics Service
|
|
Production-ready metrics collection with Prometheus format
|
|
"""
|
|
class MetricsService:
|
|
def __init__(self):
|
|
self.registry = REGISTRY
|
|
|
|
# Default labels for all metrics
|
|
self.default_labels = {
|
|
'service': os.getenv('APP_NAME', 'test_project'),
|
|
'environment': os.getenv('ENVIRONMENT', 'development')
|
|
}
|
|
|
|
# Custom metrics
|
|
self._initialize_custom_metrics()
|
|
|
|
# Start metrics server if enabled
|
|
if os.getenv('METRICS_ENABLED', 'true').lower() == 'true':
|
|
self._start_metrics_server()
|
|
|
|
def _initialize_custom_metrics(self):
|
|
"""Initialize custom application metrics"""
|
|
# HTTP request metrics
|
|
self.http_request_duration = Histogram(
|
|
'http_request_duration_seconds',
|
|
'Duration of HTTP requests in seconds',
|
|
['method', 'route', 'status_code'],
|
|
buckets=[0.1, 0.5, 1, 2, 5, 10, 30]
|
|
)
|
|
|
|
self.http_request_total = Counter(
|
|
'http_requests_total',
|
|
'Total number of HTTP requests',
|
|
['method', 'route', 'status_code']
|
|
)
|
|
|
|
# Business metrics
|
|
self.business_events_total = Counter(
|
|
'business_events_total',
|
|
'Total number of business events',
|
|
['event_type', 'status']
|
|
)
|
|
|
|
# Database metrics
|
|
self.database_query_duration = Histogram(
|
|
'database_query_duration_seconds',
|
|
'Duration of database queries in seconds',
|
|
['operation', 'table'],
|
|
buckets=[0.01, 0.05, 0.1, 0.5, 1, 2, 5]
|
|
)
|
|
|
|
# Cache metrics
|
|
self.cache_hits = Counter(
|
|
'cache_hits_total',
|
|
'Total number of cache hits',
|
|
['cache_type']
|
|
)
|
|
|
|
self.cache_misses = Counter(
|
|
'cache_misses_total',
|
|
'Total number of cache misses',
|
|
['cache_type']
|
|
)
|
|
|
|
# Kafka metrics
|
|
self.kafka_messages_published = Counter(
|
|
'kafka_messages_published_total',
|
|
'Total number of Kafka messages published',
|
|
['topic', 'status']
|
|
)
|
|
|
|
self.kafka_messages_consumed = Counter(
|
|
'kafka_messages_consumed_total',
|
|
'Total number of Kafka messages consumed',
|
|
['topic', 'status']
|
|
)
|
|
|
|
def _start_metrics_server(self):
|
|
"""Start Prometheus metrics HTTP server"""
|
|
try:
|
|
port = int(os.getenv('PROMETHEUS_PORT', '9090'))
|
|
start_http_server(port)
|
|
logger.info(f'Metrics: Prometheus metrics server started on port {port}')
|
|
except Exception as e:
|
|
logger.error(f'Metrics: Failed to start metrics server: {e}')
|
|
|
|
def record_http_request(self, method: str, route: str, status_code: int, duration: float):
|
|
"""
|
|
Record HTTP request duration
|
|
|
|
Args:
|
|
method: HTTP method
|
|
route: Route path
|
|
status_code: HTTP status code
|
|
duration: Duration in seconds
|
|
"""
|
|
self.http_request_duration.labels(method=method, route=route, status_code=status_code).observe(duration)
|
|
self.http_request_total.labels(method=method, route=route, status_code=status_code).inc()
|
|
|
|
def record_business_event(self, event_type: str, status: str = 'success'):
|
|
"""
|
|
Record business event
|
|
|
|
Args:
|
|
event_type: Event type
|
|
status: Event status ('success' or 'error')
|
|
"""
|
|
self.business_events_total.labels(event_type=event_type, status=status).inc()
|
|
|
|
def record_database_query(self, operation: str, table: str, duration: float):
|
|
"""
|
|
Record database query duration
|
|
|
|
Args:
|
|
operation: Operation type (select, insert, update, delete)
|
|
table: Table name
|
|
duration: Duration in seconds
|
|
"""
|
|
self.database_query_duration.labels(operation=operation, table=table).observe(duration)
|
|
|
|
def record_cache_hit(self, cache_type: str = 'redis'):
|
|
"""
|
|
Record cache hit
|
|
|
|
Args:
|
|
cache_type: Cache type (e.g., 'redis', 'memory')
|
|
"""
|
|
self.cache_hits.labels(cache_type=cache_type).inc()
|
|
|
|
def record_cache_miss(self, cache_type: str = 'redis'):
|
|
"""
|
|
Record cache miss
|
|
|
|
Args:
|
|
cache_type: Cache type
|
|
"""
|
|
self.cache_misses.labels(cache_type=cache_type).inc()
|
|
|
|
def record_kafka_published(self, topic: str, status: str = 'success'):
|
|
"""
|
|
Record Kafka message published
|
|
|
|
Args:
|
|
topic: Topic name
|
|
status: Status ('success' or 'error')
|
|
"""
|
|
self.kafka_messages_published.labels(topic=topic, status=status).inc()
|
|
|
|
def record_kafka_consumed(self, topic: str, status: str = 'success'):
|
|
"""
|
|
Record Kafka message consumed
|
|
|
|
Args:
|
|
topic: Topic name
|
|
status: Status ('success' or 'error')
|
|
"""
|
|
self.kafka_messages_consumed.labels(topic=topic, status=status).inc()
|
|
|
|
def get_metrics(self):
|
|
"""Get metrics in Prometheus format"""
|
|
return generate_latest(self.registry)
|
|
|
|
def get_registry(self):
|
|
"""Get metrics registry"""
|
|
return self.registry
|
|
|
|
# Singleton instance
|
|
metrics_service = MetricsService()
|
|
|