codenuk_backend_mine/services/ai-analysis-service/migrate_database.py
2025-10-16 10:52:33 +05:30

204 lines
7.1 KiB
Python

#!/usr/bin/env python3
"""
Database Migration Script using psql command
Executes the complete 001-schema.sql file using PostgreSQL's psql command
"""
import os
import subprocess
import sys
from dotenv import load_dotenv
import logging
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
logger = logging.getLogger(__name__)
def run_migration():
"""Run the database migration using psql command."""
load_dotenv()
# Database connection parameters
db_config = {
'host': os.getenv('POSTGRES_HOST', 'localhost'),
'port': os.getenv('POSTGRES_PORT', 5432),
'database': os.getenv('POSTGRES_DB', 'dev_pipeline'),
'user': os.getenv('POSTGRES_USER', 'pipeline_admin'),
'password': os.getenv('POSTGRES_PASSWORD', 'secure_pipeline_2024')
}
# Schema file path
schema_file = os.path.join(os.path.dirname(__file__), '001-schema.sql')
if not os.path.exists(schema_file):
logger.error(f"❌ Schema file not found: {schema_file}")
return False
try:
logger.info("🔧 Starting database migration with psql...")
logger.info(f" • Database: {db_config['database']}@{db_config['host']}:{db_config['port']}")
logger.info(f" • User: {db_config['user']}")
logger.info(f" • Schema file: {schema_file}")
# Set PGPASSWORD environment variable for psql
env = os.environ.copy()
env['PGPASSWORD'] = db_config['password']
# Build psql command
psql_cmd = [
'psql',
'-h', db_config['host'],
'-p', str(db_config['port']),
'-U', db_config['user'],
'-d', db_config['database'],
'-f', schema_file,
'-v', 'ON_ERROR_STOP=1', # Stop on first error
'--echo-errors', # Show errors
'--echo-queries' # Show queries being executed
]
logger.info(" • Executing migration...")
logger.info(f" • Command: {' '.join(psql_cmd)}")
# Run psql command
result = subprocess.run(
psql_cmd,
env=env,
capture_output=True,
text=True,
timeout=300 # 5 minute timeout
)
# Check if psql command exists
if result.returncode == 127:
logger.error("❌ psql command not found. Please install PostgreSQL client tools.")
logger.error(" On Ubuntu/Debian: sudo apt-get install postgresql-client")
logger.error(" On CentOS/RHEL: sudo yum install postgresql")
return False
# Check for errors
if result.returncode != 0:
logger.error(f"❌ Migration failed with return code: {result.returncode}")
if result.stderr:
logger.error("STDERR:")
logger.error(result.stderr)
if result.stdout:
logger.error("STDOUT:")
logger.error(result.stdout)
return False
# Log success
logger.info("✅ Migration completed successfully!")
if result.stdout:
logger.info("Migration output:")
# Filter out common psql output noise
lines = result.stdout.split('\n')
for line in lines:
if line.strip() and not line.startswith('SET') and not line.startswith('NOTICE'):
logger.info(f" {line}")
# Verify migration by checking if key tables exist
logger.info(" • Verifying migration...")
verify_cmd = [
'psql',
'-h', db_config['host'],
'-p', str(db_config['port']),
'-U', db_config['user'],
'-d', db_config['database'],
'-t', # tuples only
'-c', """
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'public'
AND table_name IN ('code_embeddings', 'query_embeddings', 'knowledge_embeddings',
'repository_metadata', 'analysis_sessions', 'file_analysis_history')
ORDER BY table_name;
"""
]
verify_result = subprocess.run(
verify_cmd,
env=env,
capture_output=True,
text=True,
timeout=30
)
if verify_result.returncode == 0:
tables = [line.strip() for line in verify_result.stdout.split('\n') if line.strip()]
logger.info(f" ✓ Found {len(tables)} core tables: {', '.join(tables)}")
else:
logger.warning(" ⚠ Could not verify table creation")
# Check for pgvector extension
vector_cmd = [
'psql',
'-h', db_config['host'],
'-p', str(db_config['port']),
'-U', db_config['user'],
'-d', db_config['database'],
'-t',
'-c', "SELECT EXISTS(SELECT 1 FROM pg_extension WHERE extname = 'vector');"
]
vector_result = subprocess.run(
vector_cmd,
env=env,
capture_output=True,
text=True,
timeout=30
)
if vector_result.returncode == 0:
has_vector = vector_result.stdout.strip() == 't'
if has_vector:
logger.info(" ✓ pgvector extension is available")
else:
logger.warning(" ⚠ pgvector extension not available - vector operations will be limited")
logger.info("🚀 Database migration completed successfully!")
logger.info("📊 Production-level database ready for AI repository analysis")
return True
except subprocess.TimeoutExpired:
logger.error("❌ Migration timed out after 5 minutes")
return False
except FileNotFoundError:
logger.error("❌ psql command not found. Please install PostgreSQL client tools.")
return False
except Exception as e:
logger.error(f"❌ Migration failed: {e}")
return False
def check_psql_available():
"""Check if psql command is available."""
try:
result = subprocess.run(['psql', '--version'], capture_output=True, text=True)
if result.returncode == 0:
logger.info(f"✓ Found psql: {result.stdout.strip()}")
return True
else:
return False
except FileNotFoundError:
return False
if __name__ == "__main__":
logger.info("🔧 AI Repository Analysis Database Migration")
logger.info("=" * 50)
# Check if psql is available
if not check_psql_available():
logger.error("❌ psql command not found!")
logger.error("Please install PostgreSQL client tools:")
logger.error(" Ubuntu/Debian: sudo apt-get install postgresql-client")
logger.error(" CentOS/RHEL: sudo yum install postgresql")
logger.error(" macOS: brew install postgresql")
sys.exit(1)
# Run migration
success = run_migration()
sys.exit(0 if success else 1)