#!/usr/bin/env python3 """ Database Migration Script using psql command Executes the complete 001-schema.sql file using PostgreSQL's psql command """ import os import subprocess import sys from dotenv import load_dotenv import logging # Configure logging logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') logger = logging.getLogger(__name__) def run_migration(): """Run the database migration using psql command.""" load_dotenv() # Database connection parameters db_config = { 'host': os.getenv('POSTGRES_HOST', 'localhost'), 'port': os.getenv('POSTGRES_PORT', 5432), 'database': os.getenv('POSTGRES_DB', 'dev_pipeline'), 'user': os.getenv('POSTGRES_USER', 'pipeline_admin'), 'password': os.getenv('POSTGRES_PASSWORD', 'secure_pipeline_2024') } # Schema file path schema_file = os.path.join(os.path.dirname(__file__), '001-schema.sql') if not os.path.exists(schema_file): logger.error(f"❌ Schema file not found: {schema_file}") return False try: logger.info("🔧 Starting database migration with psql...") logger.info(f" • Database: {db_config['database']}@{db_config['host']}:{db_config['port']}") logger.info(f" • User: {db_config['user']}") logger.info(f" • Schema file: {schema_file}") # Set PGPASSWORD environment variable for psql env = os.environ.copy() env['PGPASSWORD'] = db_config['password'] # Build psql command psql_cmd = [ 'psql', '-h', db_config['host'], '-p', str(db_config['port']), '-U', db_config['user'], '-d', db_config['database'], '-f', schema_file, '-v', 'ON_ERROR_STOP=1', # Stop on first error '--echo-errors', # Show errors '--echo-queries' # Show queries being executed ] logger.info(" • Executing migration...") logger.info(f" • Command: {' '.join(psql_cmd)}") # Run psql command result = subprocess.run( psql_cmd, env=env, capture_output=True, text=True, timeout=300 # 5 minute timeout ) # Check if psql command exists if result.returncode == 127: logger.error("❌ psql command not found. Please install PostgreSQL client tools.") logger.error(" On Ubuntu/Debian: sudo apt-get install postgresql-client") logger.error(" On CentOS/RHEL: sudo yum install postgresql") return False # Check for errors if result.returncode != 0: logger.error(f"❌ Migration failed with return code: {result.returncode}") if result.stderr: logger.error("STDERR:") logger.error(result.stderr) if result.stdout: logger.error("STDOUT:") logger.error(result.stdout) return False # Log success logger.info("✅ Migration completed successfully!") if result.stdout: logger.info("Migration output:") # Filter out common psql output noise lines = result.stdout.split('\n') for line in lines: if line.strip() and not line.startswith('SET') and not line.startswith('NOTICE'): logger.info(f" {line}") # Verify migration by checking if key tables exist logger.info(" • Verifying migration...") verify_cmd = [ 'psql', '-h', db_config['host'], '-p', str(db_config['port']), '-U', db_config['user'], '-d', db_config['database'], '-t', # tuples only '-c', """ SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' AND table_name IN ('code_embeddings', 'query_embeddings', 'knowledge_embeddings', 'repository_metadata', 'analysis_sessions', 'file_analysis_history') ORDER BY table_name; """ ] verify_result = subprocess.run( verify_cmd, env=env, capture_output=True, text=True, timeout=30 ) if verify_result.returncode == 0: tables = [line.strip() for line in verify_result.stdout.split('\n') if line.strip()] logger.info(f" ✓ Found {len(tables)} core tables: {', '.join(tables)}") else: logger.warning(" ⚠ Could not verify table creation") # Check for pgvector extension vector_cmd = [ 'psql', '-h', db_config['host'], '-p', str(db_config['port']), '-U', db_config['user'], '-d', db_config['database'], '-t', '-c', "SELECT EXISTS(SELECT 1 FROM pg_extension WHERE extname = 'vector');" ] vector_result = subprocess.run( vector_cmd, env=env, capture_output=True, text=True, timeout=30 ) if vector_result.returncode == 0: has_vector = vector_result.stdout.strip() == 't' if has_vector: logger.info(" ✓ pgvector extension is available") else: logger.warning(" ⚠ pgvector extension not available - vector operations will be limited") logger.info("🚀 Database migration completed successfully!") logger.info("📊 Production-level database ready for AI repository analysis") return True except subprocess.TimeoutExpired: logger.error("❌ Migration timed out after 5 minutes") return False except FileNotFoundError: logger.error("❌ psql command not found. Please install PostgreSQL client tools.") return False except Exception as e: logger.error(f"❌ Migration failed: {e}") return False def check_psql_available(): """Check if psql command is available.""" try: result = subprocess.run(['psql', '--version'], capture_output=True, text=True) if result.returncode == 0: logger.info(f"✓ Found psql: {result.stdout.strip()}") return True else: return False except FileNotFoundError: return False if __name__ == "__main__": logger.info("🔧 AI Repository Analysis Database Migration") logger.info("=" * 50) # Check if psql is available if not check_psql_available(): logger.error("❌ psql command not found!") logger.error("Please install PostgreSQL client tools:") logger.error(" Ubuntu/Debian: sudo apt-get install postgresql-client") logger.error(" CentOS/RHEL: sudo yum install postgresql") logger.error(" macOS: brew install postgresql") sys.exit(1) # Run migration success = run_migration() sys.exit(0 if success else 1)