diff --git a/docker-compose.yml b/docker-compose.yml index 2ab0cc4..6ea84f8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -654,7 +654,7 @@ services: - GITHUB_CLIENT_ID=Ov23liQgF14aogXVZNCR - GITHUB_CLIENT_SECRET=8bf82a29154fdccb837bc150539a2226d00b5da5 - GITHUB_REDIRECT_URI=http://localhost:8000/api/github/auth/github/callback - - ATTACHED_REPOS_DIR=/app/git-repos + - ATTACHED_REPOS_DIR=/home/ubuntu/git/git-repo - SESSION_SECRET=git-integration-secret-key-2024 - JWT_ACCESS_SECRET=access-secret-key-2024-tech4biz-secure_pipeline_2024 - API_GATEWAY_PUBLIC_URL=http://localhost:8000 @@ -677,10 +677,10 @@ services: # Additional environment variables for git-integration service - ENABLE_BACKGROUND_DIFF_PROCESSING=true - DIFF_PROCESSING_INTERVAL_MS=30000 - - DIFF_STORAGE_PATH=/app/git-repos/diffs + - DIFF_STORAGE_PATH=/home/ubuntu/git/git-diff - MAX_DIFF_SIZE_BYTES=10485760 volumes: - - /home/tech4biz/Desktop/Projectsnew/CODENUK1/git-repos:/app/git-repos + - /home/tech4biz/Desktop/Projectsnew/CODENUK1/git-repos:/home/ubuntu/git/git-repo networks: - pipeline_network depends_on: diff --git a/services/git-integration/MIGRATION_STRATEGY.md b/services/git-integration/MIGRATION_STRATEGY.md new file mode 100644 index 0000000..2292ea5 --- /dev/null +++ b/services/git-integration/MIGRATION_STRATEGY.md @@ -0,0 +1,144 @@ +# šŸ—ļø Enterprise Database Migration Strategy + +## 🚨 Current Issues Identified + +### Critical Problems +1. **No Migration State Tracking** - Migrations run repeatedly causing conflicts +2. **Schema Duplication** - Migration 017 recreates entire schema (20KB) +3. **Inconsistent Patterns** - Mix of idempotent and non-idempotent operations +4. **Missing Versioning** - No proper version control or rollback capability +5. **Conflicting Constraints** - Same columns added with different FK behaviors + +### Impact Assessment +- **High Risk**: Production deployments may fail +- **Data Integrity**: Potential for inconsistent schema states +- **Maintenance**: Extremely difficult to debug and maintain +- **Scalability**: Cannot handle complex schema evolution + +## šŸŽÆ Recommended Solution Architecture + +### 1. Migration Tracking System +```sql +-- Core tracking table +schema_migrations ( + version, filename, checksum, applied_at, + execution_time_ms, success, error_message +) + +-- Concurrency control +migration_locks ( + locked_at, locked_by, process_id +) +``` + +### 2. Enterprise Migration Runner +- **State Tracking**: Records all migration attempts +- **Checksum Validation**: Prevents modified migrations from re-running +- **Concurrency Control**: Prevents parallel migration execution +- **Error Handling**: Distinguishes between fatal and idempotent errors +- **Rollback Support**: Tracks rollback instructions + +### 3. Migration Naming Convention +``` +XXX_descriptive_name.sql +ā”œā”€ā”€ 000_migration_tracking_system.sql # Infrastructure +ā”œā”€ā”€ 001_core_tables.sql # Core schema +ā”œā”€ā”€ 002_indexes_and_constraints.sql # Performance +ā”œā”€ā”€ 003_user_management.sql # Features +└── 999_data_cleanup.sql # Maintenance +``` + +## šŸ”§ Implementation Plan + +### Phase 1: Infrastructure Setup āœ… +- [x] Create migration tracking system (`000_migration_tracking_system.sql`) +- [x] Build enterprise migration runner (`migrate_v2.js`) +- [x] Add conflict resolution (`021_cleanup_migration_conflicts.sql`) + +### Phase 2: Migration Cleanup (Recommended) +1. **Backup Current Database** +2. **Run New Migration System** +3. **Validate Schema Consistency** +4. **Remove Duplicate Migrations** + +### Phase 3: Process Improvement +1. **Code Review Process** for all new migrations +2. **Testing Strategy** with migration rollback tests +3. **Documentation Standards** for complex schema changes + +## šŸ“‹ Migration Best Practices + +### DO āœ… +- Always use `IF NOT EXISTS` for idempotent operations +- Include rollback instructions in comments +- Test migrations on copy of production data +- Use transactions for multi-step operations +- Document breaking changes clearly + +### DON'T āŒ +- Never modify existing migration files +- Don't create massive "complete schema" migrations +- Avoid mixing DDL and DML in same migration +- Don't skip version numbers +- Never run migrations manually in production + +## šŸš€ Quick Start Guide + +### 1. Initialize New System +```bash +# Run the new migration system +node src/migrations/migrate_v2.js +``` + +### 2. Verify Status +```sql +-- Check migration history +SELECT * FROM get_migration_history(); + +-- Get current version +SELECT get_current_schema_version(); +``` + +### 3. Create New Migration +```bash +# Follow naming convention +touch 022_add_new_feature.sql +``` + +## šŸ“Š Schema Health Metrics + +### Current State +- **Tables**: 41 total +- **Migrations**: 21 files (20 + tracking) +- **Conflicts**: Multiple (resolved in 021) +- **Duplications**: High (migration 017) + +### Target State +- **Tracking**: Full migration history +- **Consistency**: Zero schema conflicts +- **Performance**: Optimized indexes +- **Maintainability**: Clear migration path + +## šŸ” Monitoring & Maintenance + +### Regular Checks +1. **Weekly**: Review failed migrations +2. **Monthly**: Analyze schema drift +3. **Quarterly**: Performance optimization review + +### Alerts +- Migration failures +- Long-running migrations (>5 minutes) +- Schema inconsistencies between environments + +## šŸŽÆ Success Criteria + +- āœ… Zero migration conflicts +- āœ… Full state tracking +- āœ… Rollback capability +- āœ… Performance optimization +- āœ… Documentation compliance + +--- + +**Next Steps**: Run the new migration system and validate all schema objects are correctly created with proper relationships and constraints. diff --git a/services/git-integration/src/migrations/000_migration_tracking_system.sql b/services/git-integration/src/migrations/000_migration_tracking_system.sql new file mode 100644 index 0000000..0c22cef --- /dev/null +++ b/services/git-integration/src/migrations/000_migration_tracking_system.sql @@ -0,0 +1,154 @@ +-- Migration 000: Migration Tracking System +-- This MUST be the first migration to run +-- Creates the infrastructure for tracking migration state + +-- ============================================= +-- Migration Tracking Infrastructure +-- ============================================= + +-- Create schema_migrations table to track applied migrations +CREATE TABLE IF NOT EXISTS schema_migrations ( + id SERIAL PRIMARY KEY, + version VARCHAR(255) NOT NULL UNIQUE, + filename VARCHAR(500) NOT NULL, + checksum VARCHAR(64), -- SHA-256 of migration content + applied_at TIMESTAMP DEFAULT NOW(), + execution_time_ms INTEGER, + success BOOLEAN DEFAULT true, + error_message TEXT, + rollback_sql TEXT, -- Optional rollback instructions + created_by VARCHAR(100) DEFAULT 'system' +); + +-- Create index for fast lookups +CREATE INDEX IF NOT EXISTS idx_schema_migrations_version ON schema_migrations(version); +CREATE INDEX IF NOT EXISTS idx_schema_migrations_applied_at ON schema_migrations(applied_at); +CREATE INDEX IF NOT EXISTS idx_schema_migrations_success ON schema_migrations(success); + +-- Create migration_locks table to prevent concurrent migrations +CREATE TABLE IF NOT EXISTS migration_locks ( + id INTEGER PRIMARY KEY DEFAULT 1, + locked_at TIMESTAMP DEFAULT NOW(), + locked_by VARCHAR(100) DEFAULT 'system', + process_id VARCHAR(100), + CONSTRAINT single_lock CHECK (id = 1) +); + +-- ============================================= +-- Migration Helper Functions +-- ============================================= + +-- Function to check if migration has been applied +CREATE OR REPLACE FUNCTION migration_applied(migration_version VARCHAR(255)) +RETURNS BOOLEAN AS $$ +BEGIN + RETURN EXISTS ( + SELECT 1 FROM schema_migrations + WHERE version = migration_version AND success = true + ); +END; +$$ LANGUAGE plpgsql; + +-- Function to record migration execution +CREATE OR REPLACE FUNCTION record_migration( + migration_version VARCHAR(255), + migration_filename VARCHAR(500), + migration_checksum VARCHAR(64) DEFAULT NULL, + execution_time INTEGER DEFAULT NULL, + migration_success BOOLEAN DEFAULT true, + error_msg TEXT DEFAULT NULL +) +RETURNS VOID AS $$ +BEGIN + INSERT INTO schema_migrations ( + version, filename, checksum, execution_time_ms, success, error_message + ) VALUES ( + migration_version, migration_filename, migration_checksum, + execution_time, migration_success, error_msg + ) + ON CONFLICT (version) DO UPDATE SET + applied_at = NOW(), + execution_time_ms = EXCLUDED.execution_time_ms, + success = EXCLUDED.success, + error_message = EXCLUDED.error_message; +END; +$$ LANGUAGE plpgsql; + +-- Function to acquire migration lock +CREATE OR REPLACE FUNCTION acquire_migration_lock(process_identifier VARCHAR(100)) +RETURNS BOOLEAN AS $$ +BEGIN + -- Try to acquire lock + INSERT INTO migration_locks (locked_by, process_id) + VALUES ('system', process_identifier) + ON CONFLICT (id) DO NOTHING; + + -- Check if we got the lock + RETURN EXISTS ( + SELECT 1 FROM migration_locks + WHERE process_id = process_identifier + ); +END; +$$ LANGUAGE plpgsql; + +-- Function to release migration lock +CREATE OR REPLACE FUNCTION release_migration_lock(process_identifier VARCHAR(100)) +RETURNS VOID AS $$ +BEGIN + DELETE FROM migration_locks WHERE process_id = process_identifier; +END; +$$ LANGUAGE plpgsql; + +-- ============================================= +-- Database Metadata Functions +-- ============================================= + +-- Function to get current schema version +CREATE OR REPLACE FUNCTION get_current_schema_version() +RETURNS VARCHAR(255) AS $$ +BEGIN + RETURN ( + SELECT version + FROM schema_migrations + WHERE success = true + ORDER BY applied_at DESC + LIMIT 1 + ); +END; +$$ LANGUAGE plpgsql; + +-- Function to get migration history +CREATE OR REPLACE FUNCTION get_migration_history() +RETURNS TABLE ( + version VARCHAR(255), + filename VARCHAR(500), + applied_at TIMESTAMP, + execution_time_ms INTEGER, + success BOOLEAN +) AS $$ +BEGIN + RETURN QUERY + SELECT + sm.version, + sm.filename, + sm.applied_at, + sm.execution_time_ms, + sm.success + FROM schema_migrations sm + ORDER BY sm.applied_at DESC; +END; +$$ LANGUAGE plpgsql; + +-- ============================================= +-- Initial Migration Record +-- ============================================= + +-- Record this migration as applied +SELECT record_migration('000', '000_migration_tracking_system.sql', NULL, NULL, true, NULL); + +-- Display current status +DO $$ +BEGIN + RAISE NOTICE 'āœ… Migration tracking system initialized'; + RAISE NOTICE 'Current schema version: %', get_current_schema_version(); +END $$; diff --git a/services/git-integration/src/migrations/013_add_user_id_to_github_user_tokens.sql b/services/git-integration/src/migrations/013_add_user_id_to_github_user_tokens.sql new file mode 100644 index 0000000..2c80d8d --- /dev/null +++ b/services/git-integration/src/migrations/013_add_user_id_to_github_user_tokens.sql @@ -0,0 +1,21 @@ +-- Migration 013: Add user_id to github_user_tokens table +-- This fixes the GitHub OAuth callback error: "Cannot read properties of undefined (reading 'count')" + +-- Add user_id column to github_user_tokens table +ALTER TABLE github_user_tokens +ADD COLUMN IF NOT EXISTS user_id UUID; + +-- Add is_primary column to support multiple GitHub accounts per user +ALTER TABLE github_user_tokens +ADD COLUMN IF NOT EXISTS is_primary BOOLEAN DEFAULT false; + +-- Create index for better performance +CREATE INDEX IF NOT EXISTS idx_github_user_tokens_user_id ON github_user_tokens(user_id); + +-- Add unique constraint to prevent duplicate primary accounts per user +CREATE UNIQUE INDEX IF NOT EXISTS idx_github_user_tokens_user_primary +ON github_user_tokens(user_id, github_username) +WHERE is_primary = true; + +-- Update existing records to set a default user_id if needed (optional) +-- UPDATE github_user_tokens SET user_id = uuid_generate_v4() WHERE user_id IS NULL; diff --git a/services/git-integration/src/migrations/020_add_user_id_to_all_repositories.sql b/services/git-integration/src/migrations/020_add_user_id_to_all_repositories.sql new file mode 100644 index 0000000..abf42e3 --- /dev/null +++ b/services/git-integration/src/migrations/020_add_user_id_to_all_repositories.sql @@ -0,0 +1,45 @@ +-- Migration 020: Add user_id column to all_repositories table +-- This migration ensures the user_id column exists in all_repositories table + +-- Check if user_id column exists, if not add it +DO $$ +BEGIN + -- Check if the column exists + IF NOT EXISTS ( + SELECT 1 + FROM information_schema.columns + WHERE table_name = 'all_repositories' + AND column_name = 'user_id' + AND table_schema = 'public' + ) THEN + -- Add the user_id column + ALTER TABLE all_repositories + ADD COLUMN user_id UUID REFERENCES users(id) ON DELETE SET NULL; + + RAISE NOTICE 'Added user_id column to all_repositories table'; + ELSE + RAISE NOTICE 'user_id column already exists in all_repositories table'; + END IF; +END $$; + +-- Create index for better performance if it doesn't exist +CREATE INDEX IF NOT EXISTS idx_all_repositories_user_id ON all_repositories(user_id); + +-- Add comment to document the column +COMMENT ON COLUMN all_repositories.user_id IS 'References the user who owns/created this repository record'; + +-- Verify the column was added +DO $$ +BEGIN + IF EXISTS ( + SELECT 1 + FROM information_schema.columns + WHERE table_name = 'all_repositories' + AND column_name = 'user_id' + AND table_schema = 'public' + ) THEN + RAISE NOTICE 'SUCCESS: user_id column exists in all_repositories table'; + ELSE + RAISE EXCEPTION 'FAILED: user_id column was not added to all_repositories table'; + END IF; +END $$; diff --git a/services/git-integration/src/migrations/021_cleanup_migration_conflicts.sql b/services/git-integration/src/migrations/021_cleanup_migration_conflicts.sql new file mode 100644 index 0000000..5d267fb --- /dev/null +++ b/services/git-integration/src/migrations/021_cleanup_migration_conflicts.sql @@ -0,0 +1,202 @@ +-- Migration 021: Cleanup Migration Conflicts +-- This migration resolves conflicts and ensures schema consistency + +-- ============================================= +-- Schema Consistency Fixes +-- ============================================= + +-- Fix missing ID column in repository_directories (from migration 017) +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'repository_directories' + AND column_name = 'id' + AND table_schema = 'public' + ) THEN + ALTER TABLE repository_directories + ADD COLUMN id UUID PRIMARY KEY DEFAULT uuid_generate_v4(); + RAISE NOTICE 'Added missing id column to repository_directories'; + END IF; +END $$; + +-- Ensure user_id column exists with consistent constraints +DO $$ +BEGIN + -- Check if user_id exists in all_repositories + IF NOT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'all_repositories' + AND column_name = 'user_id' + AND table_schema = 'public' + ) THEN + ALTER TABLE all_repositories + ADD COLUMN user_id UUID REFERENCES users(id) ON DELETE SET NULL; + RAISE NOTICE 'Added user_id column to all_repositories'; + END IF; + + -- Ensure index exists + IF NOT EXISTS ( + SELECT 1 FROM pg_indexes + WHERE tablename = 'all_repositories' + AND indexname = 'idx_all_repositories_user_id' + ) THEN + CREATE INDEX idx_all_repositories_user_id ON all_repositories(user_id); + RAISE NOTICE 'Created index on all_repositories.user_id'; + END IF; +END $$; + +-- Fix template_id references that may not exist +DO $$ +BEGIN + -- Check if templates table exists + IF NOT EXISTS ( + SELECT 1 FROM information_schema.tables + WHERE table_name = 'templates' + AND table_schema = 'public' + ) THEN + -- Remove foreign key constraint if templates table doesn't exist + IF EXISTS ( + SELECT 1 FROM information_schema.table_constraints + WHERE table_name = 'all_repositories' + AND constraint_type = 'FOREIGN KEY' + AND constraint_name LIKE '%template_id%' + ) THEN + -- Find and drop the constraint + DECLARE + constraint_name_var TEXT; + BEGIN + SELECT constraint_name INTO constraint_name_var + FROM information_schema.table_constraints + WHERE table_name = 'all_repositories' + AND constraint_type = 'FOREIGN KEY' + AND constraint_name LIKE '%template_id%' + LIMIT 1; + + IF constraint_name_var IS NOT NULL THEN + EXECUTE 'ALTER TABLE all_repositories DROP CONSTRAINT ' || constraint_name_var; + RAISE NOTICE 'Dropped foreign key constraint % (templates table does not exist)', constraint_name_var; + END IF; + END; + END IF; + END IF; +END $$; + +-- ============================================= +-- Index Optimization +-- ============================================= + +-- Ensure all critical indexes exist +CREATE INDEX IF NOT EXISTS idx_all_repositories_provider_name ON all_repositories(provider_name); +CREATE INDEX IF NOT EXISTS idx_all_repositories_owner_name ON all_repositories(owner_name); +CREATE INDEX IF NOT EXISTS idx_all_repositories_sync_status ON all_repositories(sync_status); +CREATE INDEX IF NOT EXISTS idx_all_repositories_created_at ON all_repositories(created_at); + +-- Repository storage indexes +CREATE INDEX IF NOT EXISTS idx_repository_storage_status ON repository_storage(storage_status); +CREATE INDEX IF NOT EXISTS idx_repository_files_extension ON repository_files(file_extension); +CREATE INDEX IF NOT EXISTS idx_repository_files_is_binary ON repository_files(is_binary); + +-- Webhook indexes for performance +CREATE INDEX IF NOT EXISTS idx_github_webhooks_event_type ON github_webhooks(event_type); +CREATE INDEX IF NOT EXISTS idx_github_webhooks_created_at ON github_webhooks(created_at); + +-- ============================================= +-- Data Integrity Checks +-- ============================================= + +-- Check for orphaned records and report +DO $$ +DECLARE + orphaned_count INTEGER; +BEGIN + -- Check for repositories without valid storage references + SELECT COUNT(*) INTO orphaned_count + FROM all_repositories ar + LEFT JOIN repository_storage rs ON ar.id = rs.repository_id + WHERE rs.id IS NULL; + + IF orphaned_count > 0 THEN + RAISE NOTICE 'Found % repositories without storage records', orphaned_count; + END IF; + + -- Check for files without valid directory references + SELECT COUNT(*) INTO orphaned_count + FROM repository_files rf + LEFT JOIN repository_directories rd ON rf.directory_id = rd.id + WHERE rf.directory_id IS NOT NULL AND rd.id IS NULL; + + IF orphaned_count > 0 THEN + RAISE NOTICE 'Found % files with invalid directory references', orphaned_count; + END IF; +END $$; + +-- ============================================= +-- Performance Optimizations +-- ============================================= + +-- Update table statistics for better query planning +ANALYZE all_repositories; +ANALYZE repository_storage; +ANALYZE repository_files; +ANALYZE repository_directories; +ANALYZE github_webhooks; + +-- ============================================= +-- Migration Validation +-- ============================================= + +-- Validate critical tables exist +DO $$ +DECLARE + missing_tables TEXT[] := ARRAY[]::TEXT[]; +BEGIN + -- Check for required tables + IF NOT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'all_repositories') THEN + missing_tables := array_append(missing_tables, 'all_repositories'); + END IF; + + IF NOT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'repository_storage') THEN + missing_tables := array_append(missing_tables, 'repository_storage'); + END IF; + + IF NOT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'github_user_tokens') THEN + missing_tables := array_append(missing_tables, 'github_user_tokens'); + END IF; + + IF array_length(missing_tables, 1) > 0 THEN + RAISE EXCEPTION 'Critical tables missing: %', array_to_string(missing_tables, ', '); + ELSE + RAISE NOTICE 'āœ… All critical tables present'; + END IF; +END $$; + +-- Validate critical columns exist +DO $$ +DECLARE + missing_columns TEXT[] := ARRAY[]::TEXT[]; +BEGIN + -- Check for user_id in all_repositories + IF NOT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'all_repositories' AND column_name = 'user_id' + ) THEN + missing_columns := array_append(missing_columns, 'all_repositories.user_id'); + END IF; + + -- Check for provider_name in all_repositories + IF NOT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'all_repositories' AND column_name = 'provider_name' + ) THEN + missing_columns := array_append(missing_columns, 'all_repositories.provider_name'); + END IF; + + IF array_length(missing_columns, 1) > 0 THEN + RAISE EXCEPTION 'Critical columns missing: %', array_to_string(missing_columns, ', '); + ELSE + RAISE NOTICE 'āœ… All critical columns present'; + END IF; +END $$; + +RAISE NOTICE 'šŸŽ‰ Migration 021 completed - Schema conflicts resolved'; diff --git a/services/git-integration/src/migrations/migrate_v2.js b/services/git-integration/src/migrations/migrate_v2.js new file mode 100644 index 0000000..5701894 --- /dev/null +++ b/services/git-integration/src/migrations/migrate_v2.js @@ -0,0 +1,265 @@ +const fs = require('fs'); +const path = require('path'); +const crypto = require('crypto'); +const database = require('../config/database'); + +const migrationsDir = path.join(__dirname); + +/** + * Enterprise-grade migration runner with proper state tracking + */ +class MigrationRunner { + constructor() { + this.processId = `migration_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`; + } + + /** + * Calculate SHA-256 checksum of migration content + */ + calculateChecksum(content) { + return crypto.createHash('sha256').update(content).digest('hex'); + } + + /** + * Parse migration version from filename + */ + parseVersion(filename) { + const match = filename.match(/^(\d{3})_/); + return match ? match[1] : null; + } + + /** + * Check if migration tracking system exists + */ + async ensureMigrationTrackingExists() { + try { + const result = await database.query(` + SELECT EXISTS ( + SELECT 1 FROM information_schema.tables + WHERE table_name = 'schema_migrations' + AND table_schema = 'public' + ) as exists + `); + + return result.rows[0].exists; + } catch (error) { + console.error('Error checking migration tracking:', error); + return false; + } + } + + /** + * Initialize migration tracking system + */ + async initializeMigrationTracking() { + console.log('šŸ”§ Initializing migration tracking system...'); + + const trackingMigrationPath = path.join(migrationsDir, '000_migration_tracking_system.sql'); + if (!fs.existsSync(trackingMigrationPath)) { + throw new Error('Migration tracking system file not found: 000_migration_tracking_system.sql'); + } + + const trackingSQL = fs.readFileSync(trackingMigrationPath, 'utf8'); + await database.query(trackingSQL); + console.log('āœ… Migration tracking system initialized'); + } + + /** + * Acquire migration lock to prevent concurrent runs + */ + async acquireLock() { + console.log(`šŸ”’ Acquiring migration lock (${this.processId})...`); + + const result = await database.query( + 'SELECT acquire_migration_lock($1) as acquired', + [this.processId] + ); + + if (!result.rows[0].acquired) { + throw new Error('Could not acquire migration lock. Another migration may be running.'); + } + + console.log('āœ… Migration lock acquired'); + } + + /** + * Release migration lock + */ + async releaseLock() { + try { + await database.query('SELECT release_migration_lock($1)', [this.processId]); + console.log('šŸ”“ Migration lock released'); + } catch (error) { + console.warn('āš ļø Error releasing migration lock:', error.message); + } + } + + /** + * Check if migration has already been applied + */ + async isMigrationApplied(version) { + const result = await database.query( + 'SELECT migration_applied($1) as applied', + [version] + ); + return result.rows[0].applied; + } + + /** + * Record migration execution + */ + async recordMigration(version, filename, checksum, executionTime, success, errorMessage = null) { + await database.query( + 'SELECT record_migration($1, $2, $3, $4, $5, $6)', + [version, filename, checksum, executionTime, success, errorMessage] + ); + } + + /** + * Get list of migration files to run + */ + getMigrationFiles() { + return fs.readdirSync(migrationsDir) + .filter(file => file.endsWith('.sql') && file !== '000_migration_tracking_system.sql') + .sort(); + } + + /** + * Run a single migration + */ + async runSingleMigration(migrationFile) { + const version = this.parseVersion(migrationFile); + if (!version) { + console.warn(`āš ļø Skipping file with invalid version format: ${migrationFile}`); + return; + } + + // Check if already applied + if (await this.isMigrationApplied(version)) { + console.log(`ā­ļø Skipping already applied migration: ${migrationFile}`); + return; + } + + console.log(`šŸš€ Running migration: ${migrationFile}`); + + const migrationPath = path.join(migrationsDir, migrationFile); + const migrationSQL = fs.readFileSync(migrationPath, 'utf8'); + const checksum = this.calculateChecksum(migrationSQL); + + const startTime = Date.now(); + let success = false; + let errorMessage = null; + + try { + await database.query(migrationSQL); + success = true; + console.log(`āœ… Migration ${migrationFile} completed successfully!`); + } catch (err) { + errorMessage = err.message; + console.error(`āŒ Migration ${migrationFile} failed:`, err.message); + + // Check if it's an idempotent error we can ignore + const isIdempotentError = this.isIdempotentError(err); + if (isIdempotentError) { + console.warn(`āš ļø Treating as idempotent error, marking as successful`); + success = true; + errorMessage = `Idempotent: ${err.message}`; + } else { + throw err; // Re-throw non-idempotent errors + } + } finally { + const executionTime = Date.now() - startTime; + await this.recordMigration(version, migrationFile, checksum, executionTime, success, errorMessage); + } + } + + /** + * Check if error is idempotent (safe to ignore) + */ + isIdempotentError(err) { + const message = (err && err.message) ? err.message.toLowerCase() : ''; + const code = err && err.code ? err.code : ''; + + return message.includes('already exists') || + code === '42710' /* duplicate_object */ || + code === '42P07' /* duplicate_table */ || + code === '42701' /* duplicate_column */ || + code === '42P06' /* duplicate_schema */ || + code === '42723' /* duplicate_function */; + } + + /** + * Display migration status + */ + async displayStatus() { + try { + const result = await database.query('SELECT * FROM get_migration_history() LIMIT 10'); + console.log('\nšŸ“Š Recent Migration History:'); + console.log('Version | Filename | Applied At | Success | Time (ms)'); + console.log('--------|----------|------------|---------|----------'); + + result.rows.forEach(row => { + const status = row.success ? 'āœ…' : 'āŒ'; + const time = row.execution_time_ms || 'N/A'; + console.log(`${row.version.padEnd(7)} | ${row.filename.substring(0, 30).padEnd(30)} | ${row.applied_at.toISOString().substring(0, 19)} | ${status.padEnd(7)} | ${time}`); + }); + + const versionResult = await database.query('SELECT get_current_schema_version() as version'); + console.log(`\nšŸ·ļø Current Schema Version: ${versionResult.rows[0].version || 'None'}`); + } catch (error) { + console.warn('āš ļø Could not display migration status:', error.message); + } + } + + /** + * Main migration runner + */ + async runMigrations() { + console.log('šŸš€ Starting Enterprise Database Migration System...'); + + try { + // Connect to database + await database.testConnection(); + console.log('āœ… Database connected successfully'); + + // Ensure migration tracking exists + const trackingExists = await this.ensureMigrationTrackingExists(); + if (!trackingExists) { + await this.initializeMigrationTracking(); + } + + // Acquire lock + await this.acquireLock(); + + // Get migration files + const migrationFiles = this.getMigrationFiles(); + console.log(`šŸ“„ Found ${migrationFiles.length} migration files to process`); + + // Run migrations + for (const migrationFile of migrationFiles) { + await this.runSingleMigration(migrationFile); + } + + // Display status + await this.displayStatus(); + + console.log('šŸŽ‰ All migrations completed successfully!'); + + } catch (error) { + console.error('āŒ Migration failed:', error); + process.exit(1); + } finally { + await this.releaseLock(); + await database.close(); + console.log('šŸ”Œ Database connection closed'); + } + } +} + +// Run migrations if this file is executed directly +if (require.main === module) { + const runner = new MigrationRunner(); + runner.runMigrations(); +} + +module.exports = { MigrationRunner }; diff --git a/services/git-integration/src/routes/github-integration.routes.js b/services/git-integration/src/routes/github-integration.routes.js index bbc38b6..4311ee3 100644 --- a/services/git-integration/src/routes/github-integration.routes.js +++ b/services/git-integration/src/routes/github-integration.routes.js @@ -90,8 +90,13 @@ router.post('/attach-repository', async (req, res) => { // Check if user has GitHub authentication first try { - const authStatus = await oauthService.getAuthStatus(); - hasAuth = authStatus.connected; + if (userId) { + const userTokens = await oauthService.getUserTokens(userId); + hasAuth = userTokens && userTokens.length > 0; + } else { + const authStatus = await oauthService.getAuthStatus(); + hasAuth = authStatus.connected; + } console.log(`šŸ” User authentication status: ${hasAuth ? 'Connected' : 'Not connected'}`); } catch (authError) { console.log(`āŒ Error checking auth status: ${authError.message}`); diff --git a/services/git-integration/src/services/github-integration.service.js b/services/git-integration/src/services/github-integration.service.js index 31a57ea..b6b6f52 100644 --- a/services/git-integration/src/services/github-integration.service.js +++ b/services/git-integration/src/services/github-integration.service.js @@ -34,6 +34,9 @@ class GitHubIntegrationService { // Normalize the URL first let normalizedUrl = url.trim(); + // Remove trailing slashes and .git extensions + normalizedUrl = normalizedUrl.replace(/\/+$/, '').replace(/\.git$/, ''); + // Handle URLs without protocol if (!normalizedUrl.startsWith('http://') && !normalizedUrl.startsWith('https://') && !normalizedUrl.startsWith('git@')) { normalizedUrl = 'https://' + normalizedUrl; @@ -46,32 +49,39 @@ class GitHubIntegrationService { // Handle git+https format: git+https://github.com/owner/repo.git if (normalizedUrl.startsWith('git+https://') || normalizedUrl.startsWith('git+http://')) { - normalizedUrl = normalizedUrl.replace('git+', ''); + normalizedUrl = normalizedUrl.replace(/^git\+/, ''); } - // Validate that it's a GitHub URL before parsing - if (!normalizedUrl.includes('github.com')) { - throw new Error(`Invalid GitHub repository URL: ${url}`); + // More robust GitHub URL validation (after all transformations) + const githubDomainRegex = /^https?:\/\/(www\.)?github\.com\//i; + if (!githubDomainRegex.test(normalizedUrl)) { + throw new Error(`Invalid GitHub repository URL: ${url}. Must be a GitHub.com URL.`); } // Clean URL by removing query parameters and fragments for parsing const cleanUrl = normalizedUrl.split('?')[0].split('#')[0]; - // Use the parse-github-url library to parse the URL - const parsed = parseGitHubUrl(cleanUrl); + // Try to parse with the library first + let parsed = parseGitHubUrl(cleanUrl); + // If library parsing fails, try manual parsing as fallback if (!parsed || !parsed.owner || !parsed.name) { - throw new Error(`Invalid GitHub repository URL: ${url}`); + const manualParsed = this.manualParseGitHubUrl(cleanUrl); + if (manualParsed) { + parsed = manualParsed; + } else { + throw new Error(`Invalid GitHub repository URL format: ${url}`); + } } // Additional validation: reject URLs with invalid paths const urlWithoutQuery = normalizedUrl.split('?')[0].split('#')[0]; const pathAfterRepo = urlWithoutQuery.split(/github\.com\/[^\/]+\/[^\/]+/)[1]; if (pathAfterRepo && pathAfterRepo.length > 0) { - const validPaths = ['/tree/', '/blob/', '/commit/', '/pull/', '/issue', '/archive/', '/releases', '/actions', '/projects', '/wiki', '/settings', '/security', '/insights', '/pulse', '/graphs', '/network', '/compare']; + const validPaths = ['/tree/', '/blob/', '/commit/', '/pull/', '/issue', '/archive/', '/releases', '/actions', '/projects', '/wiki', '/settings', '/security', '/insights', '/pulse', '/graphs', '/network', '/compare', '/']; const hasValidPath = validPaths.some(path => pathAfterRepo.startsWith(path)); if (!hasValidPath) { - throw new Error(`Invalid GitHub repository URL: ${url}`); + throw new Error(`Invalid GitHub repository URL path: ${url}`); } } @@ -108,6 +118,44 @@ class GitHubIntegrationService { }; } + // Manual GitHub URL parsing as fallback when parse-github-url library fails + manualParseGitHubUrl(url) { + try { + // Extract path from URL + const urlObj = new URL(url); + const pathParts = urlObj.pathname.split('/').filter(part => part.length > 0); + + // GitHub URLs should have at least owner and repo: /owner/repo + if (pathParts.length < 2) { + return null; + } + + const owner = pathParts[0]; + const repo = pathParts[1]; + let branch = null; + + // Extract branch from tree/blob URLs: /owner/repo/tree/branch or /owner/repo/blob/branch + if (pathParts.length >= 4 && (pathParts[2] === 'tree' || pathParts[2] === 'blob')) { + branch = pathParts[3]; + } + + // Validate owner and repo names + const nameRegex = /^[a-zA-Z0-9]([a-zA-Z0-9\-\._]*[a-zA-Z0-9])?$/; + if (!nameRegex.test(owner) || !nameRegex.test(repo)) { + return null; + } + + return { + owner, + name: repo, + branch: branch || null + }; + } catch (error) { + console.warn('Manual URL parsing failed:', error.message); + return null; + } + } + // Check repository access and type async checkRepositoryAccess(owner, repo) { try { diff --git a/services/git-integration/src/services/github-oauth.js b/services/git-integration/src/services/github-oauth.js index 5873ef4..419f4e4 100644 --- a/services/git-integration/src/services/github-oauth.js +++ b/services/git-integration/src/services/github-oauth.js @@ -111,11 +111,16 @@ class GitHubOAuthService { // Check if this is the first GitHub account for a user async isFirstGitHubAccountForUser(userId) { - const result = await database.query( - 'SELECT COUNT(*) as count FROM github_user_tokens WHERE user_id = $1', - [userId] - ); - return parseInt(result.rows[0].count) === 0; + try { + const result = await database.query( + 'SELECT COUNT(*) as count FROM github_user_tokens WHERE user_id = $1', + [userId] + ); + return result.rows && result.rows[0] ? parseInt(result.rows[0].count) === 0 : true; + } catch (error) { + console.warn('Error checking first GitHub account:', error.message); + return true; // Default to true if we can't determine + } } // Get stored token (legacy method - gets any token) @@ -127,16 +132,26 @@ class GitHubOAuthService { // Get all tokens for a specific user async getUserTokens(userId) { - const query = 'SELECT * FROM github_user_tokens WHERE user_id = $1 ORDER BY is_primary DESC, created_at DESC'; - const result = await database.query(query, [userId]); - return result.rows; + try { + const query = 'SELECT * FROM github_user_tokens WHERE user_id = $1 ORDER BY is_primary DESC, created_at DESC'; + const result = await database.query(query, [userId]); + return result.rows || []; + } catch (error) { + console.warn('Error getting user tokens:', error.message); + return []; + } } // Get primary token for a user async getUserPrimaryToken(userId) { - const query = 'SELECT * FROM github_user_tokens WHERE user_id = $1 AND is_primary = true LIMIT 1'; - const result = await database.query(query, [userId]); - return result.rows[0] || null; + try { + const query = 'SELECT * FROM github_user_tokens WHERE user_id = $1 AND is_primary = true LIMIT 1'; + const result = await database.query(query, [userId]); + return result.rows && result.rows[0] ? result.rows[0] : null; + } catch (error) { + console.warn('Error getting user primary token:', error.message); + return null; + } } // Find the right token for accessing a specific repository