chenges in the frontend

2025-10-16 10:52:33 +05:30 · 2025-10-16 10:52:33 +05:30 · b3a6bb8fdc
commit b3a6bb8fdc
parent 5e39839d42
19 changed files with 5507 additions and 13 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -4,7 +4,7 @@ services:
  # =====================================
  postgres:
-    image: postgres:15
+    image: pgvector/pgvector:pg15
    container_name: pipeline_postgres
    environment:
      POSTGRES_USER: pipeline_admin
@ -31,7 +31,7 @@ services:
    volumes:
      - redis_data:/data
    ports:
-      - "6379:6379"
+      - "6380:6379"
    networks:
      - pipeline_network
    healthcheck:
@ -714,6 +714,55 @@ services:
      timeout: 10s
      retries: 3
      start_period: 40s
  # =====================================
  # AI Analysis Service
  # =====================================
  ai-analysis-service:
    build: ./services/ai-analysis-service
    container_name: pipeline_ai_analysis_service
    ports:
      - "8022:8022"
    environment:
      - PORT=8022
      - HOST=0.0.0.0
      - ANTHROPIC_API_KEY=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
      - POSTGRES_HOST=postgres
      - POSTGRES_PORT=5432
      - POSTGRES_DB=dev_pipeline
      - POSTGRES_USER=pipeline_admin
      - POSTGRES_PASSWORD=secure_pipeline_2024
      - REDIS_HOST=redis
      - REDIS_PORT=6379
      - REDIS_PASSWORD=redis_secure_2024
      - MONGODB_URL=mongodb://pipeline_admin:mongo_secure_2024@mongodb:27017/
      - MONGODB_DB=repo_analyzer
      - JWT_ACCESS_SECRET=access-secret-key-2024-tech4biz-secure_pipeline_2024
      - USER_AUTH_SERVICE_URL=http://user-auth:8011
      - PYTHONUNBUFFERED=1
    volumes:
      - ai_analysis_logs:/app/logs
      - ai_analysis_reports:/app/reports
      - ai_analysis_temp:/app/temp
    networks:
      - pipeline_network
    depends_on:
      postgres:
        condition: service_healthy
      redis:
        condition: service_healthy
      mongodb:
        condition: service_started
      migrations:
        condition: service_completed_successfully
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8022/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 60s
    restart: unless-stopped
  # =====================================
  # Workflow Orchestration
  # =====================================
@ -827,6 +876,12 @@ volumes:
    driver: local
  migration_state:
    driver: local
  ai_analysis_logs:
    driver: local
  ai_analysis_reports:
    driver: local
  ai_analysis_temp:
    driver: local
 # =====================================
 # Networks
@ -834,11 +889,3 @@ volumes:
 networks:
  pipeline_network:
    driver: bridge
  # =====================================
  # Self-Improving Code Generator
  # =====================================
  # =====================================
  # Self-Improving Code Generator  
  # =====================================
--- a/fix_provider_names.sql
+++ b/fix_provider_names.sql
@ -0,0 +1,95 @@
 -- Fix provider_name based on repository URLs across ALL tables
 -- This script updates the provider_name field to match the actual provider from the repository URL
 -- =============================================
 -- 1. Fix all_repositories table
 -- =============================================
 UPDATE all_repositories 
 SET provider_name = 'github' 
 WHERE repository_url LIKE '%github.com%' 
   OR repository_url LIKE '%github.io%';
 UPDATE all_repositories 
 SET provider_name = 'gitlab' 
 WHERE repository_url LIKE '%gitlab.com%' 
   OR repository_url LIKE '%gitlab.io%';
 UPDATE all_repositories 
 SET provider_name = 'bitbucket' 
 WHERE repository_url LIKE '%bitbucket.org%' 
   OR repository_url LIKE '%bitbucket.io%';
 UPDATE all_repositories 
 SET provider_name = 'gitea' 
 WHERE repository_url LIKE '%gitea.com%' 
   OR repository_url LIKE '%gitea.io%';
 -- =============================================
 -- 2. Fix repository_storage table (linked to all_repositories)
 -- =============================================
 UPDATE repository_storage 
 SET provider_name = ar.provider_name
 FROM all_repositories ar
 WHERE repository_storage.repository_id = ar.id;
 -- =============================================
 -- 3. Fix repository_commit_details table (linked to all_repositories)
 -- =============================================
 UPDATE repository_commit_details 
 SET provider_name = ar.provider_name
 FROM all_repositories ar
 WHERE repository_commit_details.repository_id = ar.id;
 -- =============================================
 -- 4. Fix repository_commit_files table (linked to all_repositories)
 -- =============================================
 UPDATE repository_commit_files 
 SET provider_name = ar.provider_name
 FROM all_repositories ar
 WHERE repository_commit_files.repository_id = ar.id;
 -- =============================================
 -- 5. Fix repository_directories table (linked to all_repositories)
 -- =============================================
 UPDATE repository_directories 
 SET provider_name = ar.provider_name
 FROM all_repositories ar
 WHERE repository_directories.repository_id = ar.id;
 -- =============================================
 -- 6. Fix repository_files table (linked to all_repositories)
 -- =============================================
 UPDATE repository_files 
 SET provider_name = ar.provider_name
 FROM all_repositories ar
 WHERE repository_files.repository_id = ar.id;
 -- =============================================
 -- 7. Show results for verification
 -- =============================================
 -- Show all_repositories results
 SELECT 
    'all_repositories' as table_name,
    repository_url,
    repository_name,
    owner_name,
    provider_name,
    CASE 
        WHEN repository_url LIKE '%github.com%' OR repository_url LIKE '%github.io%' THEN 'github'
        WHEN repository_url LIKE '%gitlab.com%' OR repository_url LIKE '%gitlab.io%' THEN 'gitlab'
        WHEN repository_url LIKE '%bitbucket.org%' OR repository_url LIKE '%bitbucket.io%' THEN 'bitbucket'
        WHEN repository_url LIKE '%gitea.com%' OR repository_url LIKE '%gitea.io%' THEN 'gitea'
        ELSE 'unknown'
    END as detected_provider
 FROM all_repositories 
 ORDER BY provider_name, repository_name;
 -- Show summary counts by provider
 SELECT 
    'Summary by Provider' as info,
    provider_name,
    COUNT(*) as count
 FROM all_repositories 
 GROUP BY provider_name
 ORDER BY provider_name;
--- a/services/ai-analysis-service/001-schema.sql
+++ b/services/ai-analysis-service/001-schema.sql
@ -0,0 +1,613 @@
 -- ================================================
 -- Repository Analyzer Memory System Database Migration
 -- Version: 1.0
 -- Description: Complete database setup for AI memory system
 -- ================================================
 -- Enable required extensions
 CREATE EXTENSION IF NOT EXISTS vector;
 CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
 -- ================================================
 -- CORE TABLES
 -- ================================================
 -- Code embeddings table for semantic search of analyzed code
 CREATE TABLE IF NOT EXISTS code_embeddings (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    repo_id VARCHAR(255) NOT NULL,
    file_path TEXT NOT NULL,
    content_hash VARCHAR(64) NOT NULL,
    embedding vector(384) NOT NULL,
    metadata JSONB DEFAULT '{}',
    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    last_accessed TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    access_count INTEGER DEFAULT 0,
    -- Ensure uniqueness per repo/file/hash combination
    CONSTRAINT unique_code_analysis UNIQUE(repo_id, file_path, content_hash)
 );
 -- Query embeddings for episodic memory (user interactions)
 CREATE TABLE IF NOT EXISTS query_embeddings (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    session_id VARCHAR(255) NOT NULL,
    query_text TEXT NOT NULL,
    query_embedding vector(384) NOT NULL,
    response_embedding vector(384),
    repo_context VARCHAR(255),
    timestamp TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    metadata JSONB DEFAULT '{}',
    -- Index for session-based queries
    CONSTRAINT valid_session_id CHECK (LENGTH(session_id) > 0)
 );
 -- Persistent knowledge embeddings for long-term learning
 CREATE TABLE IF NOT EXISTS knowledge_embeddings (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    fact_id VARCHAR(255) UNIQUE NOT NULL,
    content TEXT NOT NULL,
    category VARCHAR(100) NOT NULL,
    embedding vector(384) NOT NULL,
    confidence REAL DEFAULT 1.0 CHECK (confidence >= 0.0 AND confidence <= 1.0),
    source_repos TEXT[] DEFAULT '{}',
    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    last_accessed TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    access_frequency INTEGER DEFAULT 0,
    -- Ensure valid categories
    CONSTRAINT valid_category CHECK (category IN ('code_pattern', 'best_practice', 'vulnerability', 'architecture', 'security_vulnerability', 'performance'))
 );
 -- Repository metadata for tracking analyzed repositories
 CREATE TABLE IF NOT EXISTS repository_metadata (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    repo_id VARCHAR(255) UNIQUE NOT NULL,
    repo_path TEXT NOT NULL,
    repo_name VARCHAR(500),
    primary_language VARCHAR(100),
    total_files INTEGER DEFAULT 0,
    total_lines INTEGER DEFAULT 0,
    last_analyzed TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    analysis_count INTEGER DEFAULT 0,
    quality_score REAL DEFAULT 5.0 CHECK (quality_score >= 0.0 AND quality_score <= 10.0),
    metadata JSONB DEFAULT '{}'
 );
 -- Session tracking for episodic memory correlation
 CREATE TABLE IF NOT EXISTS analysis_sessions (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    session_id VARCHAR(255) UNIQUE NOT NULL,
    user_identifier VARCHAR(255),
    start_time TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    end_time TIMESTAMP WITH TIME ZONE,
    total_queries INTEGER DEFAULT 0,
    repositories_analyzed TEXT[] DEFAULT '{}',
    session_metadata JSONB DEFAULT '{}'
 );
 -- File analysis history for change tracking
 CREATE TABLE IF NOT EXISTS file_analysis_history (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    repo_id VARCHAR(255) NOT NULL,
    file_path TEXT NOT NULL,
    content_hash VARCHAR(64) NOT NULL,
    language VARCHAR(100),
    lines_of_code INTEGER DEFAULT 0,
    complexity_score REAL DEFAULT 0.0,
    severity_score REAL DEFAULT 5.0 CHECK (severity_score >= 0.0 AND severity_score <= 10.0),
    issues_count INTEGER DEFAULT 0,
    analyzed_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    analysis_version VARCHAR(50) DEFAULT '1.0'
 );
 -- Memory consolidation log for tracking knowledge extraction
 CREATE TABLE IF NOT EXISTS memory_consolidation_log (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    source_type VARCHAR(50) NOT NULL, -- 'episodic', 'code_analysis', 'manual'
    source_id VARCHAR(255) NOT NULL,
    target_memory_type VARCHAR(50) NOT NULL, -- 'persistent', 'working'
    target_id VARCHAR(255),
    consolidation_confidence REAL DEFAULT 0.5,
    consolidation_timestamp TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    consolidation_metadata JSONB DEFAULT '{}'
 );
 -- ================================================
 -- PERFORMANCE INDEXES
 -- ================================================
 -- Code embeddings indexes
 CREATE INDEX IF NOT EXISTS idx_code_embeddings_repo_id ON code_embeddings(repo_id);
 CREATE INDEX IF NOT EXISTS idx_code_embeddings_file_path ON code_embeddings(file_path);
 CREATE INDEX IF NOT EXISTS idx_code_embeddings_accessed ON code_embeddings(last_accessed DESC);
 CREATE INDEX IF NOT EXISTS idx_code_embeddings_metadata ON code_embeddings USING gin(metadata);
 -- Vector similarity indexes (using IVFFlat for better performance)
 CREATE INDEX IF NOT EXISTS idx_code_embeddings_vector 
 ON code_embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
 -- Query embeddings indexes
 CREATE INDEX IF NOT EXISTS idx_query_embeddings_session ON query_embeddings(session_id);
 CREATE INDEX IF NOT EXISTS idx_query_embeddings_timestamp ON query_embeddings(timestamp DESC);
 CREATE INDEX IF NOT EXISTS idx_query_embeddings_repo_context ON query_embeddings(repo_context);
 CREATE INDEX IF NOT EXISTS idx_query_embeddings_vector 
 ON query_embeddings USING ivfflat (query_embedding vector_cosine_ops) WITH (lists = 100);
 -- Knowledge embeddings indexes
 CREATE INDEX IF NOT EXISTS idx_knowledge_embeddings_category ON knowledge_embeddings(category);
 CREATE INDEX IF NOT EXISTS idx_knowledge_embeddings_confidence ON knowledge_embeddings(confidence DESC);
 CREATE INDEX IF NOT EXISTS idx_knowledge_embeddings_access_freq ON knowledge_embeddings(access_frequency DESC);
 CREATE INDEX IF NOT EXISTS idx_knowledge_embeddings_vector 
 ON knowledge_embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
 CREATE INDEX IF NOT EXISTS idx_knowledge_source_repos ON knowledge_embeddings USING gin(source_repos);
 -- Repository metadata indexes
 CREATE INDEX IF NOT EXISTS idx_repository_metadata_repo_id ON repository_metadata(repo_id);
 CREATE INDEX IF NOT EXISTS idx_repository_metadata_analyzed ON repository_metadata(last_analyzed DESC);
 CREATE INDEX IF NOT EXISTS idx_repository_metadata_language ON repository_metadata(primary_language);
 -- File history indexes
 CREATE INDEX IF NOT EXISTS idx_file_history_repo_file ON file_analysis_history(repo_id, file_path);
 CREATE INDEX IF NOT EXISTS idx_file_history_analyzed ON file_analysis_history(analyzed_at DESC);
 CREATE INDEX IF NOT EXISTS idx_file_history_severity ON file_analysis_history(severity_score);
 -- ================================================
 -- MATERIALIZED VIEWS FOR COMMON QUERIES
 -- ================================================
 -- High confidence knowledge view
 CREATE MATERIALIZED VIEW IF NOT EXISTS high_confidence_knowledge AS
 SELECT 
    fact_id,
    content,
    category,
    confidence,
    source_repos,
    created_at,
    last_accessed,
    access_frequency
 FROM knowledge_embeddings 
 WHERE confidence > 0.8
 ORDER BY confidence DESC, access_frequency DESC;
 CREATE INDEX ON high_confidence_knowledge (category);
 CREATE INDEX ON high_confidence_knowledge (confidence DESC);
 -- Repository quality summary view
 CREATE MATERIALIZED VIEW IF NOT EXISTS repository_quality_summary AS
 SELECT 
    rm.repo_id,
    rm.repo_path,
    rm.repo_name,
    rm.primary_language,
    rm.total_files,
    rm.total_lines,
    rm.quality_score,
    rm.last_analyzed,
    COUNT(ce.id) as total_embeddings,
    AVG(fah.severity_score) as avg_file_quality,
    COUNT(DISTINCT fah.file_path) as analyzed_files_count
 FROM repository_metadata rm
 LEFT JOIN code_embeddings ce ON rm.repo_id = ce.repo_id
 LEFT JOIN file_analysis_history fah ON rm.repo_id = fah.repo_id
 GROUP BY rm.repo_id, rm.repo_path, rm.repo_name, rm.primary_language, 
         rm.total_files, rm.total_lines, rm.quality_score, rm.last_analyzed;
 CREATE INDEX ON repository_quality_summary (quality_score DESC);
 CREATE INDEX ON repository_quality_summary (last_analyzed DESC);
 -- Recent activity view
 CREATE MATERIALIZED VIEW IF NOT EXISTS recent_activity AS
 SELECT 
    'query' as activity_type,
    session_id as identifier,
    query_text as description,
    timestamp as activity_time,
    repo_context
 FROM query_embeddings
 WHERE timestamp >= CURRENT_TIMESTAMP - INTERVAL '7 days'
 UNION ALL
 SELECT 
    'analysis' as activity_type,
    repo_id as identifier,
    file_path as description,
    analyzed_at as activity_time,
    repo_id as repo_context
 FROM file_analysis_history
 WHERE analyzed_at >= CURRENT_TIMESTAMP - INTERVAL '7 days'
 ORDER BY activity_time DESC;
 CREATE INDEX ON recent_activity (activity_time DESC);
 CREATE INDEX ON recent_activity (activity_type);
 -- ================================================
 -- STORED FUNCTIONS AND PROCEDURES
 -- ================================================
 -- Function to refresh all materialized views
 CREATE OR REPLACE FUNCTION refresh_memory_views()
 RETURNS void AS $$
 BEGIN
    REFRESH MATERIALIZED VIEW CONCURRENTLY high_confidence_knowledge;
    REFRESH MATERIALIZED VIEW CONCURRENTLY repository_quality_summary;
    REFRESH MATERIALIZED VIEW CONCURRENTLY recent_activity;
    -- Log the refresh
    INSERT INTO memory_consolidation_log (
        source_type, source_id, target_memory_type, target_id, 
        consolidation_confidence, consolidation_metadata
    ) VALUES (
        'system', 'materialized_views', 'system', 'view_refresh',
        1.0, '{"refresh_time": "' || CURRENT_TIMESTAMP || '"}'::jsonb
    );
 END;
 $$ LANGUAGE plpgsql;
 -- Function to calculate semantic similarity between texts
 CREATE OR REPLACE FUNCTION calculate_similarity(embedding1 vector(384), embedding2 vector(384))
 RETURNS real AS $$
 BEGIN
    RETURN 1 - (embedding1 <=> embedding2);
 END;
 $$ LANGUAGE plpgsql IMMUTABLE STRICT;
 -- Function to update access patterns
 CREATE OR REPLACE FUNCTION update_access_pattern(table_name text, id_column text, id_value text)
 RETURNS void AS $$
 BEGIN
    CASE table_name
        WHEN 'knowledge_embeddings' THEN
            EXECUTE 'UPDATE knowledge_embeddings SET last_accessed = CURRENT_TIMESTAMP, access_frequency = access_frequency + 1 WHERE fact_id = $1'
            USING id_value;
        WHEN 'code_embeddings' THEN
            EXECUTE 'UPDATE code_embeddings SET last_accessed = CURRENT_TIMESTAMP, access_count = access_count + 1 WHERE id = $1::uuid'
            USING id_value;
        ELSE
            RAISE EXCEPTION 'Unsupported table: %', table_name;
    END CASE;
 END;
 $$ LANGUAGE plpgsql;
 -- Function to cleanup old memories
 CREATE OR REPLACE FUNCTION cleanup_old_memories(retention_days integer DEFAULT 365)
 RETURNS integer AS $$
 DECLARE
    deleted_count integer := 0;
    cutoff_date timestamp;
 BEGIN
    cutoff_date := CURRENT_TIMESTAMP - (retention_days || ' days')::interval;
    -- Delete old query embeddings (episodic memories)
    DELETE FROM query_embeddings WHERE timestamp < cutoff_date;
    GET DIAGNOSTICS deleted_count = ROW_COUNT;
    -- Update knowledge confidence based on access patterns
    UPDATE knowledge_embeddings 
    SET confidence = LEAST(confidence * (
        CASE 
            WHEN EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - last_accessed)) / 86400 < 30 
            THEN 1.05 
            ELSE 0.98 
        END * 
        (1.0 + LOG(access_frequency + 1) / 20.0)
    ), 1.0);
    -- Log cleanup activity
    INSERT INTO memory_consolidation_log (
        source_type, source_id, target_memory_type, target_id, 
        consolidation_confidence, consolidation_metadata
    ) VALUES (
        'system', 'cleanup_function', 'system', 'memory_cleanup',
        1.0, ('{"deleted_records": ' || deleted_count || ', "cutoff_date": "' || cutoff_date || '"}')::jsonb
    );
    RETURN deleted_count;
 END;
 $$ LANGUAGE plpgsql;
 -- Function to find similar code patterns
 CREATE OR REPLACE FUNCTION find_similar_code(
    query_embedding vector(384),
    repo_filter text DEFAULT NULL,
    similarity_threshold real DEFAULT 0.7,
    max_results integer DEFAULT 10
 )
 RETURNS TABLE (
    id uuid,
    repo_id varchar(255),
    file_path text,
    similarity real,
    metadata jsonb
 ) AS $$
 BEGIN
    RETURN QUERY
    SELECT 
        ce.id,
        ce.repo_id,
        ce.file_path,
        (1 - (ce.embedding <=> query_embedding))::real as similarity,
        ce.metadata
    FROM code_embeddings ce
    WHERE (repo_filter IS NULL OR ce.repo_id = repo_filter)
      AND (1 - (ce.embedding <=> query_embedding)) > similarity_threshold
    ORDER BY similarity DESC
    LIMIT max_results;
 END;
 $$ LANGUAGE plpgsql;
 -- Function to get knowledge by category
 CREATE OR REPLACE FUNCTION get_knowledge_by_category(
    category_filter varchar(100),
    min_confidence real DEFAULT 0.5,
    max_results integer DEFAULT 20
 )
 RETURNS TABLE (
    fact_id varchar(255),
    content text,
    confidence real,
    access_frequency integer,
    source_repos text[]
 ) AS $$
 BEGIN
    RETURN QUERY
    SELECT 
        ke.fact_id,
        ke.content,
        ke.confidence,
        ke.access_frequency,
        ke.source_repos
    FROM knowledge_embeddings ke
    WHERE ke.category = category_filter
      AND ke.confidence >= min_confidence
    ORDER BY ke.confidence DESC, ke.access_frequency DESC
    LIMIT max_results;
 END;
 $$ LANGUAGE plpgsql;
 -- ================================================
 -- TRIGGERS FOR AUTOMATIC MAINTENANCE
 -- ================================================
 -- Trigger function to update repository metadata when embeddings are added
 CREATE OR REPLACE FUNCTION update_repository_stats()
 RETURNS trigger AS $$
 BEGIN
    -- Update or insert repository metadata
    INSERT INTO repository_metadata (repo_id, repo_path, analysis_count, last_analyzed)
    VALUES (NEW.repo_id, NEW.repo_id, 1, CURRENT_TIMESTAMP)
    ON CONFLICT (repo_id) 
    DO UPDATE SET 
        analysis_count = repository_metadata.analysis_count + 1,
        last_analyzed = CURRENT_TIMESTAMP;
    RETURN NEW;
 END;
 $$ LANGUAGE plpgsql;
 -- Create triggers
 DROP TRIGGER IF EXISTS trigger_update_repo_stats ON code_embeddings;
 CREATE TRIGGER trigger_update_repo_stats
    AFTER INSERT ON code_embeddings
    FOR EACH ROW
    EXECUTE FUNCTION update_repository_stats();
 -- Trigger to automatically update access patterns
 CREATE OR REPLACE FUNCTION auto_update_access()
 RETURNS trigger AS $$
 BEGIN
    NEW.last_accessed = CURRENT_TIMESTAMP;
    NEW.access_count = COALESCE(OLD.access_count, 0) + 1;
    RETURN NEW;
 END;
 $$ LANGUAGE plpgsql;
 DROP TRIGGER IF EXISTS trigger_auto_access_update ON code_embeddings;
 CREATE TRIGGER trigger_auto_access_update
    BEFORE UPDATE ON code_embeddings
    FOR EACH ROW
    EXECUTE FUNCTION auto_update_access();
 -- ================================================
 -- SECURITY AND PERMISSIONS
 -- ================================================
 -- Create roles for different access levels
 DO $$
 BEGIN
    IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'repo_analyzer_read') THEN
        CREATE ROLE repo_analyzer_read;
    END IF;
    IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'repo_analyzer_write') THEN
        CREATE ROLE repo_analyzer_write;
    END IF;
    IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'repo_analyzer_admin') THEN
        CREATE ROLE repo_analyzer_admin;
    END IF;
 END
 $$;
 -- Grant permissions
 GRANT SELECT ON ALL TABLES IN SCHEMA public TO repo_analyzer_read;
 GRANT SELECT ON high_confidence_knowledge TO repo_analyzer_read;
 GRANT SELECT ON repository_quality_summary TO repo_analyzer_read;
 GRANT SELECT ON recent_activity TO repo_analyzer_read;
 GRANT SELECT, INSERT, UPDATE ON ALL TABLES IN SCHEMA public TO repo_analyzer_write;
 GRANT SELECT ON high_confidence_knowledge TO repo_analyzer_write;
 GRANT SELECT ON repository_quality_summary TO repo_analyzer_write;
 GRANT SELECT ON recent_activity TO repo_analyzer_write;
 GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public TO repo_analyzer_write;
 GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO repo_analyzer_admin;
 GRANT ALL PRIVILEGES ON high_confidence_knowledge TO repo_analyzer_admin;
 GRANT ALL PRIVILEGES ON repository_quality_summary TO repo_analyzer_admin;
 GRANT ALL PRIVILEGES ON recent_activity TO repo_analyzer_admin;
 GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO repo_analyzer_admin;
 GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO repo_analyzer_admin;
 -- ================================================
 -- DATA VALIDATION AND CONSTRAINTS
 -- ================================================
 -- Add check constraints for data quality
 -- Note: Vector dimensions are validated at insertion time, no need for runtime checks
 -- Add constraints for reasonable data ranges
 DO $$
 BEGIN
    IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'reasonable_lines_of_code') THEN
        ALTER TABLE file_analysis_history ADD CONSTRAINT reasonable_lines_of_code 
            CHECK (lines_of_code >= 0 AND lines_of_code <= 1000000);
    END IF;
    IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'reasonable_complexity') THEN
        ALTER TABLE file_analysis_history ADD CONSTRAINT reasonable_complexity 
            CHECK (complexity_score >= 0.0 AND complexity_score <= 100.0);
    END IF;
 END
 $$;
 -- ================================================
 -- INITIAL DATA AND CONFIGURATION
 -- ================================================
 -- Insert initial system configuration
 INSERT INTO memory_consolidation_log (
    source_type, source_id, target_memory_type, target_id, 
    consolidation_confidence, consolidation_metadata
 ) VALUES (
    'system', 'database_migration', 'system', 'initial_setup',
    1.0, ('{"migration_version": "1.0", "setup_time": "' || CURRENT_TIMESTAMP || '"}')::jsonb
 ) ON CONFLICT DO NOTHING;
 -- Create initial knowledge categories
 INSERT INTO knowledge_embeddings (
    fact_id, content, category, embedding, confidence, source_repos
 ) VALUES
 (
    'init_security_001',
    'Always validate and sanitize user input to prevent injection attacks',
    'security_vulnerability',
    array_fill(0.0, ARRAY[384])::vector(384),
    0.95,
    ARRAY[]::text[]
 ),
 (
    'init_performance_001', 
    'Use appropriate data structures and algorithms for better performance',
    'performance',
    array_fill(0.0, ARRAY[384])::vector(384),
    0.9,
    ARRAY[]::text[]
 ),
 (
    'init_best_practice_001',
    'Follow consistent naming conventions and code formatting standards',
    'best_practice', 
    array_fill(0.0, ARRAY[384])::vector(384),
    0.85,
    ARRAY[]::text[]
 )
 ON CONFLICT (fact_id) DO NOTHING;
 -- ================================================
 -- BACKUP AND MAINTENANCE PROCEDURES
 -- ================================================
 -- Function to create backup of critical memory data
 CREATE OR REPLACE FUNCTION backup_memory_data(backup_path text DEFAULT '/tmp/memory_backup')
 RETURNS text AS $$
 DECLARE
    backup_file text;
    result_message text;
 BEGIN
    backup_file := backup_path || '_' || to_char(CURRENT_TIMESTAMP, 'YYYY-MM-DD_HH24-MI-SS') || '.sql';
    -- This would need to be implemented with actual backup logic
    -- For now, just return the intended backup file name
    result_message := 'Backup would be created at: ' || backup_file;
    -- Log backup activity
    INSERT INTO memory_consolidation_log (
        source_type, source_id, target_memory_type, target_id, 
        consolidation_confidence, consolidation_metadata
    ) VALUES (
        'system', 'backup_function', 'system', 'backup_created',
        1.0, ('{"backup_file": "' || backup_file || '"}')::jsonb
    );
    RETURN result_message;
 END;
 $$ LANGUAGE plpgsql;
 -- ================================================
 -- MONITORING AND ANALYTICS
 -- ================================================
 -- View for system health monitoring
 CREATE OR REPLACE VIEW system_health_monitor AS
 SELECT 
    'code_embeddings' as table_name,
    COUNT(*) as record_count,
    MAX(created_at) as latest_record,
    AVG(access_count) as avg_access_count
 FROM code_embeddings
 UNION ALL
 SELECT 
    'query_embeddings' as table_name,
    COUNT(*) as record_count,
    MAX(timestamp) as latest_record,
    NULL as avg_access_count
 FROM query_embeddings
 UNION ALL
 SELECT 
    'knowledge_embeddings' as table_name,
    COUNT(*) as record_count,
    MAX(created_at) as latest_record,
    AVG(access_frequency) as avg_access_count
 FROM knowledge_embeddings;
 -- Function to get comprehensive system statistics
 CREATE OR REPLACE FUNCTION get_system_statistics()
 RETURNS jsonb AS $$
 DECLARE
    stats jsonb;
 BEGIN
    SELECT jsonb_build_object(
        'total_code_embeddings', (SELECT COUNT(*) FROM code_embeddings),
        'total_query_embeddings', (SELECT COUNT(*) FROM query_embeddings),
        'total_knowledge_embeddings', (SELECT COUNT(*) FROM knowledge_embeddings),
        'unique_repositories', (SELECT COUNT(DISTINCT repo_id) FROM code_embeddings),
        'high_confidence_knowledge', (SELECT COUNT(*) FROM knowledge_embeddings WHERE confidence > 0.8),
        'recent_activity_7d', (SELECT COUNT(*) FROM query_embeddings WHERE timestamp >= CURRENT_TIMESTAMP - INTERVAL '7 days'),
        'average_code_quality', (SELECT AVG(quality_score) FROM repository_metadata),
        'last_updated', CURRENT_TIMESTAMP
    ) INTO stats;
    RETURN stats;
 END;
 $$ LANGUAGE plpgsql;
 -- ================================================
 -- COMPLETION MESSAGE
 -- ================================================
 DO $$
 BEGIN
    RAISE NOTICE '================================================';
    RAISE NOTICE 'Repository Analyzer Memory System Database Setup Complete';
    RAISE NOTICE '================================================';
    RAISE NOTICE 'Tables created: code_embeddings, query_embeddings, knowledge_embeddings';
    RAISE NOTICE 'Indexes created: Vector similarity indexes with IVFFlat';
    RAISE NOTICE 'Functions created: Similarity search, cleanup, statistics';
    RAISE NOTICE 'Materialized views created: High confidence knowledge, repository summary';
    RAISE NOTICE 'Triggers created: Auto-update repository stats and access patterns';
    RAISE NOTICE '================================================';
    RAISE NOTICE 'Ready for AI-enhanced repository analysis with persistent memory';
    RAISE NOTICE '================================================';
 END
 $$;
--- a/services/ai-analysis-service/Dockerfile
+++ b/services/ai-analysis-service/Dockerfile
@ -0,0 +1,37 @@
 FROM python:3.11-slim
 # Set working directory
 WORKDIR /app
 # Install system dependencies
 RUN apt-get update && apt-get install -y \
    git \
    postgresql-client \
    curl \
    build-essential \
    && rm -rf /var/lib/apt/lists/*
 # Copy requirements and install Python dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 # Copy the service code
 COPY . .
 # Create necessary directories
 RUN mkdir -p /app/logs /app/temp /app/reports
 # Set environment variables
 ENV PYTHONPATH=/app
 ENV PYTHONUNBUFFERED=1
 ENV PORT=8022
 # Expose port
 EXPOSE 8022
 # Health check
 HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD curl -f http://localhost:8022/health || exit 1
 # Run migration and then start the service
 CMD ["sh", "-c", "python run_migration.py && python server.py"]
--- a/services/ai-analysis-service/README.md
+++ b/services/ai-analysis-service/README.md
@ -0,0 +1,202 @@
 # Complete AI Repository Analysis Service
 A comprehensive AI-powered repository analysis tool that automatically analyzes **ALL files** in a repository without any limits or user queries required.
 ## 🚀 Features
 - **Complete Analysis**: Analyzes ALL files in the repository (no max-files limit)
 - **Fully Automated**: No user query required - runs completely automatically
 - **Memory-Enhanced**: Learns from previous analyses using advanced memory systems
 - **Comprehensive Reports**: Generates detailed PDF reports with executive summaries
 - **Multi-Database Support**: Uses PostgreSQL, MongoDB, and Redis for optimal performance
 - **Security Focus**: Identifies security vulnerabilities and code quality issues
 - **Architecture Assessment**: Provides architectural insights and recommendations
 ## 📋 Requirements
 ### System Dependencies
 - Python 3.8+
 - PostgreSQL with pgvector extension
 - MongoDB
 - Redis
 ### Python Dependencies
 ```bash
 pip install anthropic python-dotenv git redis pymongo psycopg2-binary numpy reportlab
 ```
 ## 🛠️ Setup
 1. **Install Dependencies**:
   ```bash
   pip install -r requirements.txt
   ```
 2. **Database Setup**:
   ```bash
   # Run the database migration
   psql -U postgres -d repo_vectors -f 001-schema.sql
   ```
 3. **Environment Variables**:
   Create a `.env` file with:
   ```env
   ANTHROPIC_API_KEY=your_api_key_here
   REDIS_HOST=localhost
   REDIS_PORT=6379
   REDIS_DB=0
   MONGODB_URL=mongodb://localhost:27017/
   MONGODB_DB=repo_analyzer
   POSTGRES_HOST=localhost
   POSTGRES_PORT=5432
   POSTGRES_DB=repo_vectors
   POSTGRES_USER=postgres
   POSTGRES_PASSWORD=your_password
   ```
 ## 🎯 Usage
 ### Basic Usage
 ```bash
 python ai-analyze.py /path/to/repository
 ```
 ### With Custom Output
 ```bash
 python ai-analyze.py /path/to/repository --output my_analysis.pdf
 ```
 ### With API Key Override
 ```bash
 python ai-analyze.py /path/to/repository --api-key your_api_key
 ```
 ## 📊 What It Analyzes
 ### File Types Supported
 - **Programming Languages**: Python, JavaScript, TypeScript, Java, C++, C#, Go, Rust, PHP, Ruby, Swift, Kotlin
 - **Web Technologies**: HTML, CSS, SCSS, SASS
 - **Configuration Files**: JSON, YAML, XML, SQL
 - **Build Files**: Dockerfile, Makefile, CMake, package.json, requirements.txt, Cargo.toml, pom.xml, build.gradle
 - **Documentation**: README.md, Markdown files
 ### Analysis Coverage
 - **Code Quality**: Complexity, maintainability, best practices
 - **Security**: Vulnerabilities, injection attacks, authentication issues
 - **Architecture**: Project structure, scalability, design patterns
 - **Performance**: Optimization opportunities, bottlenecks
 - **Documentation**: Completeness and quality
 ## 📈 Output
 ### Console Output
 - Real-time analysis progress
 - Repository statistics
 - Quality breakdown by file
 - Language distribution
 - Memory system statistics
 ### PDF Report
 - Executive summary for leadership
 - Repository overview with metrics
 - Detailed file-by-file analysis
 - Security assessment
 - Architecture evaluation
 - Recommendations and next steps
 ## 🧠 Memory System
 The tool uses a sophisticated three-tier memory system:
 1. **Working Memory (Redis)**: Temporary, fast access for current analysis
 2. **Episodic Memory (MongoDB)**: User interactions and analysis sessions
 3. **Persistent Memory (PostgreSQL)**: Long-term knowledge and best practices
 This allows the tool to learn from previous analyses and provide increasingly accurate insights.
 ## 🔧 Configuration
 ### File Size Limits
 - Default: 2MB per file (configurable in code)
 - Large files are skipped with notification
 ### Excluded Directories
 - `.git`, `node_modules`, `__pycache__`, `build`, `dist`, `target`
 - `venv`, `env`, `.next`, `coverage`, `vendor`
 - `bower_components`, `.gradle`, `.m2`, `.cargo`
 ### Rate Limiting
 - 0.1 second delay between file analyses to avoid API rate limits
 - Configurable in the code
 ## 📝 Example Output
 ```
 🚀 Starting Complete AI Repository Analysis
 ============================================================
 Repository: /path/to/my-project
 Output: complete_repository_analysis.pdf
 Mode: Complete automated analysis of ALL files
 ============================================================
 Scanning repository: /path/to/my-project
 Found 127 files to analyze
 Starting comprehensive analysis of 127 files...
 Analyzing file 1/127: main.py
 Analyzing file 2/127: config.js
 ...
 🎯 COMPLETE ANALYSIS FINISHED
 ============================================================
 📊 Repository Statistics:
   • Files Analyzed: 127
   • Lines of Code: 15,432
   • Languages: 8
   • Code Quality: 7.2/10
 📈 Quality Breakdown:
   • High Quality Files (8-10): 45
   • Medium Quality Files (5-7): 67
   • Low Quality Files (1-4): 15
   • Total Issues Found: 89
 🔤 Language Distribution:
   • Python: 45 files
   • JavaScript: 32 files
   • TypeScript: 28 files
   • HTML: 12 files
   • CSS: 10 files
 📄 Complete PDF Report: complete_repository_analysis.pdf
 ✅ Complete analysis finished successfully!
 ```
 ## 🚨 Troubleshooting
 ### Common Issues
 1. **Database Connection Errors**:
   - Ensure PostgreSQL, MongoDB, and Redis are running
   - Check connection credentials in `.env` file
 2. **API Key Issues**:
   - Verify Anthropic API key is valid and has sufficient credits
   - Check rate limits if analysis fails
 3. **Memory Issues**:
   - Large repositories may require more RAM
   - Consider increasing system memory or processing in batches
 4. **File Permission Errors**:
   - Ensure read access to repository files
   - Check write permissions for output directory
 ## 🤝 Contributing
 This is a complete automated analysis system. The tool will:
 - Analyze every file in the repository
 - Generate comprehensive reports
 - Learn from previous analyses
 - Provide actionable insights
 No user interaction required - just run and get results!
--- a/services/ai-analysis-service/ai-analysis/adv_git_analyzer.py
+++ b/services/ai-analysis-service/ai-analysis/adv_git_analyzer.py
@ -0,0 +1,710 @@
 #!/usr/bin/env python3
 """
 Robust GitHub Repository AI Analysis Tool
 Simplified version with better error handling and JSON parsing.
 """
 import os
 import asyncio
 from pathlib import Path
 from typing import Dict, List, Optional, Tuple
 from datetime import datetime
 import argparse
 from dataclasses import dataclass
 import shutil
 import tempfile
 import json
 import re
 from collections import Counter
 # Core packages
 import anthropic
 from dotenv import load_dotenv
 import git
 # PDF generation
 from reportlab.lib.pagesizes import A4
 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
 from reportlab.lib.enums import TA_CENTER, TA_LEFT
 from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak, Table, TableStyle
 from reportlab.lib import colors
@dataclass
 class FileAnalysis:
    path: str
    language: str
    lines_of_code: int
    complexity_score: float
    issues_found: List[str]
    recommendations: List[str]
    detailed_analysis: str
    severity_score: float
@dataclass 
 class RepositoryAnalysis:
    repo_path: str
    total_files: int
    total_lines: int
    languages: Dict[str, int]
    architecture_assessment: str
    security_assessment: str
    code_quality_score: float
    file_analyses: List[FileAnalysis]
    executive_summary: str
 class RobustGitHubAnalyzer:
    def __init__(self, api_key: str):
        self.client = anthropic.Anthropic(api_key=api_key)
        self.temp_dir = None
        # Language mapping for file detection
        self.language_map = {
            '.py': 'Python', '.js': 'JavaScript', '.ts': 'TypeScript',
            '.tsx': 'TypeScript', '.jsx': 'JavaScript', '.java': 'Java',
            '.cpp': 'C++', '.c': 'C', '.cs': 'C#', '.go': 'Go', '.rs': 'Rust',
            '.php': 'PHP', '.rb': 'Ruby', '.swift': 'Swift', '.kt': 'Kotlin',
            '.html': 'HTML', '.css': 'CSS', '.scss': 'SCSS', '.sass': 'SASS',
            '.sql': 'SQL', '.yaml': 'YAML', '.yml': 'YAML', '.json': 'JSON',
            '.xml': 'XML', '.sh': 'Shell', '.dockerfile': 'Docker',
            '.md': 'Markdown', '.txt': 'Text'
        }
        # Code file extensions to analyze
        self.code_extensions = set(self.language_map.keys())
    def clone_repository(self, repo_path: str) -> str:
        """Clone repository or use existing path."""
        if os.path.exists(repo_path):
            print(f"Using existing repository: {repo_path}")
            return repo_path
        else:
            print(f"Cloning repository: {repo_path}")
            self.temp_dir = tempfile.mkdtemp(prefix="repo_analysis_")
            try:
                git.Repo.clone_from(repo_path, self.temp_dir)
                return self.temp_dir
            except Exception as e:
                raise Exception(f"Failed to clone repository: {e}")
    def get_file_language(self, file_path: Path) -> str:
        """Get programming language from file extension."""
        return self.language_map.get(file_path.suffix.lower(), 'Unknown')
    def calculate_complexity_score(self, content: str) -> float:
        """Calculate basic complexity score based on code patterns."""
        lines = content.split('\n')
        complexity_indicators = ['if', 'else', 'elif', 'for', 'while', 'try', 'except', 'catch', 'switch']
        complexity = 1
        for line in lines:
            line_lower = line.lower().strip()
            for indicator in complexity_indicators:
                if indicator in line_lower:
                    complexity += 1
        # Normalize to 1-10 scale
        return min(complexity / max(len(lines), 1) * 100, 10.0)
    async def analyze_file_comprehensive(self, file_path: Path, content: str) -> FileAnalysis:
        """Perform comprehensive file analysis using a single, robust prompt."""
        language = self.get_file_language(file_path)
        lines_of_code = len([line for line in content.split('\n') if line.strip()])
        complexity_score = self.calculate_complexity_score(content)
        # Truncate content if too long
        if len(content) > 4000:
            content = content[:4000] + "\n... [truncated for analysis]"
        print(f"  Analyzing {file_path.name} ({language}, {lines_of_code} lines)")
        # Create comprehensive analysis prompt
        prompt = f"""
 You are a senior software engineer with 25 years of experience. Analyze this {language} code file:
 FILENAME: {file_path.name}
 LANGUAGE: {language}
 LINES OF CODE: {lines_of_code}
 CODE:
 ```{language.lower()}
 {content}
 ```
 Provide a comprehensive analysis covering:
 1. ISSUES FOUND: List specific problems, bugs, security vulnerabilities, or code smells
 2. RECOMMENDATIONS: Actionable suggestions for improvement
 3. CODE QUALITY: Overall assessment of code quality and maintainability
 4. SECURITY: Any security concerns or vulnerabilities
 5. PERFORMANCE: Potential performance issues or optimizations
 6. BEST PRACTICES: Adherence to coding standards and best practices
 Provide your analysis in clear, structured text (not JSON). Be specific and actionable.
 Rate the overall code quality from 1-10 where 10 is excellent.
 ANALYSIS:
 """
        try:
            message = self.client.messages.create(
                model="claude-3-5-sonnet-20241022",
                max_tokens=3000,
                temperature=0.1,
                messages=[{"role": "user", "content": prompt}]
            )
            analysis_text = message.content[0].text.strip()
            # Extract severity score from analysis
            severity_match = re.search(r'(\d+(?:\.\d+)?)/10', analysis_text)
            severity_score = float(severity_match.group(1)) if severity_match else 5.0
            # Parse issues and recommendations from the text
            issues = self.extract_issues_from_analysis(analysis_text)
            recommendations = self.extract_recommendations_from_analysis(analysis_text)
            return FileAnalysis(
                path=str(file_path.relative_to(Path(self.temp_dir or '.'))),
                language=language,
                lines_of_code=lines_of_code,
                complexity_score=complexity_score,
                issues_found=issues,
                recommendations=recommendations,
                detailed_analysis=analysis_text,
                severity_score=severity_score
            )
        except Exception as e:
            print(f"    Error analyzing {file_path.name}: {e}")
            return FileAnalysis(
                path=str(file_path),
                language=language,
                lines_of_code=lines_of_code,
                complexity_score=complexity_score,
                issues_found=[f"Analysis failed: {str(e)}"],
                recommendations=["Review file manually due to analysis error"],
                detailed_analysis=f"Analysis failed due to error: {str(e)}",
                severity_score=5.0
            )
    def extract_issues_from_analysis(self, analysis_text: str) -> List[str]:
        """Extract issues from analysis text."""
        issues = []
        lines = analysis_text.split('\n')
        # Look for common issue indicators
        issue_keywords = ['issue', 'problem', 'bug', 'vulnerability', 'error', 'warning', 'concern']
        for line in lines:
            line_lower = line.lower().strip()
            if any(keyword in line_lower for keyword in issue_keywords):
                if line.strip() and not line.strip().startswith('#'):
                    issues.append(line.strip())
        return issues[:10]  # Limit to top 10 issues
    def extract_recommendations_from_analysis(self, analysis_text: str) -> List[str]:
        """Extract recommendations from analysis text."""
        recommendations = []
        lines = analysis_text.split('\n')
        # Look for recommendation indicators
        rec_keywords = ['recommend', 'suggest', 'should', 'consider', 'improve']
        for line in lines:
            line_lower = line.lower().strip()
            if any(keyword in line_lower for keyword in rec_keywords):
                if line.strip() and not line.strip().startswith('#'):
                    recommendations.append(line.strip())
        return recommendations[:10]  # Limit to top 10 recommendations
    def scan_repository(self, repo_path: str, max_files: int = 50) -> List[Tuple[Path, str]]:
        """Scan repository and collect files for analysis."""
        print(f"Scanning repository: {repo_path}")
        files_to_analyze = []
        # Important files to always include
        important_files = {
            'README.md', 'package.json', 'requirements.txt', 'Dockerfile',
            'docker-compose.yml', 'tsconfig.json', 'next.config.js',
            'tailwind.config.js', 'webpack.config.js', '.env.example'
        }
        for root, dirs, files in os.walk(repo_path):
            # Skip common build/cache directories
            dirs[:] = [d for d in dirs if not d.startswith('.') and 
                      d not in {'node_modules', '__pycache__', 'build', 'dist', 'target', 
                               'venv', 'env', '.git', '.next', 'coverage'}]
            for file in files:
                if len(files_to_analyze) >= max_files:
                    break
                file_path = Path(root) / file
                # Skip large files
                try:
                    if file_path.stat().st_size > 1000000:  # 1MB limit
                        continue
                except:
                    continue
                # Include important files or files with code extensions
                should_include = (
                    file.lower() in important_files or
                    file_path.suffix.lower() in self.code_extensions or
                    file.lower().startswith('dockerfile')
                )
                if should_include:
                    try:
                        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                            content = f.read()
                            if content.strip():  # Only non-empty files
                                files_to_analyze.append((file_path, content))
                    except Exception as e:
                        print(f"Could not read {file_path}: {e}")
        print(f"Found {len(files_to_analyze)} files to analyze")
        return files_to_analyze
    async def analyze_repository_overview(self, repo_path: str, file_analyses: List[FileAnalysis]) -> Tuple[str, str]:
        """Analyze repository architecture and security."""
        print("Analyzing repository overview...")
        # Prepare summary data
        languages = dict(Counter(fa.language for fa in file_analyses))
        total_lines = sum(fa.lines_of_code for fa in file_analyses)
        avg_quality = sum(fa.severity_score for fa in file_analyses) / len(file_analyses) if file_analyses else 5.0
        # Get repository structure
        structure_lines = []
        try:
            for root, dirs, files in os.walk(repo_path):
                dirs[:] = [d for d in dirs if not d.startswith('.') and d not in {'node_modules', '__pycache__'}]
                level = root.replace(repo_path, '').count(os.sep)
                indent = '  ' * level
                structure_lines.append(f"{indent}{os.path.basename(root)}/")
                for file in files[:3]:  # Limit files shown per directory
                    structure_lines.append(f"{indent}  {file}")
                if len(structure_lines) > 50:  # Limit total structure size
                    break
        except Exception as e:
            structure_lines = [f"Error reading structure: {e}"]
        # Architecture analysis
        arch_prompt = f"""
 You are a Senior Software Architect with 25 years of experience.
 Analyze this repository:
 REPOSITORY STRUCTURE:
 {chr(10).join(structure_lines[:30])}
 STATISTICS:
 - Total files analyzed: {len(file_analyses)}
 - Total lines of code: {total_lines:,}
 - Languages: {languages}
 - Average code quality: {avg_quality:.1f}/10
 TOP FILE ISSUES:
 {chr(10).join([f"- {fa.path}: {len(fa.issues_found)} issues" for fa in file_analyses[:10]])}
 Provide an architectural assessment covering:
 1. Project type and purpose
 2. Technology stack evaluation
 3. Code organization and structure
 4. Scalability and maintainability concerns
 5. Key recommendations for improvement
 Keep response under 1500 words and focus on actionable insights.
 """
        # Security analysis
        security_issues = []
        for fa in file_analyses:
            security_issues.extend([issue for issue in fa.issues_found if 
                                  any(keyword in issue.lower() for keyword in 
                                      ['security', 'vulnerability', 'injection', 'xss', 'auth', 'password'])])
        sec_prompt = f"""
 You are a Senior Security Engineer with 20+ years of experience.
 Security Analysis for repository with {len(file_analyses)} files:
 SECURITY ISSUES FOUND:
 {chr(10).join(security_issues[:20]) if security_issues else "No obvious security issues detected"}
 HIGH-RISK FILE TYPES PRESENT:
 {[lang for lang, count in languages.items() if lang in ['JavaScript', 'TypeScript', 'Python', 'PHP', 'SQL']]}
 Provide security assessment covering:
 1. Overall security posture
 2. Main security risks and vulnerabilities
 3. Authentication and authorization concerns
 4. Data protection and privacy issues
 5. Immediate security priorities
 Keep response under 1000 words and focus on actionable security recommendations.
 """
        try:
            # Run both analyses
            arch_task = self.client.messages.create(
                model="claude-3-5-sonnet-20241022",
                max_tokens=2000,
                temperature=0.1,
                messages=[{"role": "user", "content": arch_prompt}]
            )
            sec_task = self.client.messages.create(
                model="claude-3-5-sonnet-20241022", 
                max_tokens=1500,
                temperature=0.1,
                messages=[{"role": "user", "content": sec_prompt}]
            )
            architecture_assessment = arch_task.content[0].text
            security_assessment = sec_task.content[0].text
            return architecture_assessment, security_assessment
        except Exception as e:
            return f"Architecture analysis failed: {e}", f"Security analysis failed: {e}"
    async def generate_executive_summary(self, analysis: RepositoryAnalysis) -> str:
        """Generate executive summary for leadership."""
        print("Generating executive summary...")
        prompt = f"""
 You are presenting to C-level executives. Create an executive summary of this technical analysis:
 REPOSITORY METRICS:
 - Total Files: {analysis.total_files}
 - Lines of Code: {analysis.total_lines:,}
 - Languages: {analysis.languages}
 - Code Quality Score: {analysis.code_quality_score:.1f}/10
 KEY FINDINGS:
 - Total issues identified: {sum(len(fa.issues_found) for fa in analysis.file_analyses)}
 - Files needing attention: {len([fa for fa in analysis.file_analyses if fa.severity_score < 7])}
 - High-quality files: {len([fa for fa in analysis.file_analyses if fa.severity_score >= 8])}
 Create an executive summary for non-technical leadership covering:
 1. Business impact of code quality findings
 2. Risk assessment and implications
 3. Investment priorities and recommendations
 4. Expected ROI from addressing technical debt
 5. Competitive implications
 Focus on business outcomes, not technical details. Keep under 800 words.
 """
        try:
            message = self.client.messages.create(
                model="claude-3-5-sonnet-20241022",
                max_tokens=1200,
                temperature=0.1,
                messages=[{"role": "user", "content": prompt}]
            )
            return message.content[0].text
        except Exception as e:
            return f"Executive summary generation failed: {e}"
    def create_pdf_report(self, analysis: RepositoryAnalysis, output_path: str):
        """Generate comprehensive PDF report."""
        print(f"Generating PDF report: {output_path}")
        doc = SimpleDocTemplate(output_path, pagesize=A4, 
                               leftMargin=72, rightMargin=72, 
                               topMargin=72, bottomMargin=72)
        styles = getSampleStyleSheet()
        story = []
        # Custom styles
        title_style = ParagraphStyle(
            'CustomTitle',
            parent=styles['Heading1'],
            fontSize=24,
            textColor=colors.darkblue,
            spaceAfter=30,
            alignment=TA_CENTER
        )
        heading_style = ParagraphStyle(
            'CustomHeading',
            parent=styles['Heading2'],
            fontSize=16,
            textColor=colors.darkblue,
            spaceBefore=20,
            spaceAfter=10
        )
        # Title Page
        story.append(Paragraph("Repository Analysis Report", title_style))
        story.append(Spacer(1, 20))
        story.append(Paragraph(f"<b>Repository:</b> {analysis.repo_path}", styles['Normal']))
        story.append(Paragraph(f"<b>Analysis Date:</b> {datetime.now().strftime('%B %d, %Y at %H:%M')}", styles['Normal']))
        story.append(Paragraph("<b>Generated by:</b> AI Senior Engineering Team", styles['Normal']))
        story.append(PageBreak())
        # Executive Summary
        story.append(Paragraph("Executive Summary", heading_style))
        story.append(Paragraph(analysis.executive_summary, styles['Normal']))
        story.append(PageBreak())
        # Repository Overview
        story.append(Paragraph("Repository Overview", heading_style))
        overview_data = [
            ['Metric', 'Value'],
            ['Total Files Analyzed', str(analysis.total_files)],
            ['Total Lines of Code', f"{analysis.total_lines:,}"],
            ['Primary Languages', ', '.join(list(analysis.languages.keys())[:5])],
            ['Overall Code Quality', f"{analysis.code_quality_score:.1f}/10"],
        ]
        overview_table = Table(overview_data, colWidths=[200, 300])
        overview_table.setStyle(TableStyle([
            ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
            ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
            ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
            ('FONTSIZE', (0, 0), (-1, 0), 12),
            ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
            ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
            ('GRID', (0, 0), (-1, -1), 1, colors.black)
        ]))
        story.append(overview_table)
        story.append(Spacer(1, 20))
        # Languages Distribution
        if analysis.languages:
            story.append(Paragraph("Language Distribution", heading_style))
            lang_data = [['Language', 'Files']]
            for lang, count in sorted(analysis.languages.items(), key=lambda x: x[1], reverse=True):
                lang_data.append([lang, str(count)])
            lang_table = Table(lang_data, colWidths=[200, 100])
            lang_table.setStyle(TableStyle([
                ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
                ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
                ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
                ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
                ('GRID', (0, 0), (-1, -1), 1, colors.black)
            ]))
            story.append(lang_table)
            story.append(PageBreak())
        # Architecture Assessment
        story.append(Paragraph("Architecture Assessment", heading_style))
        # Split long text into paragraphs
        arch_paragraphs = analysis.architecture_assessment.split('\n\n')
        for para in arch_paragraphs[:10]:  # Limit paragraphs
            if para.strip():
                story.append(Paragraph(para.strip(), styles['Normal']))
                story.append(Spacer(1, 10))
        story.append(PageBreak())
        # Security Assessment
        story.append(Paragraph("Security Assessment", heading_style))
        sec_paragraphs = analysis.security_assessment.split('\n\n')
        for para in sec_paragraphs[:10]:  # Limit paragraphs
            if para.strip():
                story.append(Paragraph(para.strip(), styles['Normal']))
                story.append(Spacer(1, 10))
        story.append(PageBreak())
        # File Analysis Summary
        story.append(Paragraph("File Analysis Summary", heading_style))
        # Summary statistics
        high_quality_files = [fa for fa in analysis.file_analyses if fa.severity_score >= 8]
        medium_quality_files = [fa for fa in analysis.file_analyses if 5 <= fa.severity_score < 8]
        low_quality_files = [fa for fa in analysis.file_analyses if fa.severity_score < 5]
        quality_data = [
            ['Quality Level', 'Files', 'Percentage'],
            ['High Quality (8-10)', str(len(high_quality_files)), f"{len(high_quality_files)/len(analysis.file_analyses)*100:.1f}%"],
            ['Medium Quality (5-7)', str(len(medium_quality_files)), f"{len(medium_quality_files)/len(analysis.file_analyses)*100:.1f}%"],
            ['Low Quality (1-4)', str(len(low_quality_files)), f"{len(low_quality_files)/len(analysis.file_analyses)*100:.1f}%"]
        ]
        quality_table = Table(quality_data)
        quality_table.setStyle(TableStyle([
            ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
            ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
            ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
            ('GRID', (0, 0), (-1, -1), 1, colors.black),
            ('BACKGROUND', (0, 1), (-1, 1), colors.lightgreen),
            ('BACKGROUND', (0, 2), (-1, 2), colors.lightyellow),
            ('BACKGROUND', (0, 3), (-1, 3), colors.lightcoral)
        ]))
        story.append(quality_table)
        story.append(Spacer(1, 20))
        # Top Issues Found
        story.append(Paragraph("Files Requiring Attention", heading_style))
        # Sort files by severity (lowest scores first - need most attention)
        files_by_priority = sorted(analysis.file_analyses, key=lambda x: x.severity_score)
        for i, file_analysis in enumerate(files_by_priority[:15]):  # Top 15 files needing attention
            story.append(Paragraph(f"<b>{i+1}. {file_analysis.path}</b>", styles['Heading4']))
            story.append(Paragraph(f"Language: {file_analysis.language} | Quality Score: {file_analysis.severity_score:.1f}/10 | Lines: {file_analysis.lines_of_code}", styles['Normal']))
            # Show top issues
            if file_analysis.issues_found:
                story.append(Paragraph("Key Issues:", styles['Heading5']))
                for issue in file_analysis.issues_found[:3]:  # Top 3 issues
                    story.append(Paragraph(f"• {issue}", styles['Normal']))
            # Show top recommendations  
            if file_analysis.recommendations:
                story.append(Paragraph("Recommendations:", styles['Heading5']))
                for rec in file_analysis.recommendations[:2]:  # Top 2 recommendations
                    story.append(Paragraph(f"• {rec}", styles['Normal']))
            story.append(Spacer(1, 15))
        # Build PDF
        try:
            doc.build(story)
            print(f"✅ PDF report generated successfully: {output_path}")
        except Exception as e:
            print(f"❌ Error generating PDF: {e}")
    async def analyze_repository(self, repo_path: str, max_files: int = 50) -> RepositoryAnalysis:
        """Main analysis function."""
        try:
            # Clone/access repository
            actual_repo_path = self.clone_repository(repo_path)
            # Scan files
            files_to_analyze = self.scan_repository(actual_repo_path, max_files)
            if not files_to_analyze:
                raise Exception("No files found to analyze")
            # Analyze each file
            print(f"Starting analysis of {len(files_to_analyze)} files...")
            file_analyses = []
            for i, (file_path, content) in enumerate(files_to_analyze):
                print(f"Analyzing file {i+1}/{len(files_to_analyze)}: {file_path.name}")
                analysis = await self.analyze_file_comprehensive(file_path, content)
                file_analyses.append(analysis)
                # Small delay to avoid rate limiting
                await asyncio.sleep(0.2)
            # Repository-level analyses
            print("Performing repository-level analysis...")
            architecture_assessment, security_assessment = await self.analyze_repository_overview(
                actual_repo_path, file_analyses)
            # Calculate overall quality score
            avg_quality = sum(fa.severity_score for fa in file_analyses) / len(file_analyses)
            # Generate statistics
            languages = dict(Counter(fa.language for fa in file_analyses))
            total_lines = sum(fa.lines_of_code for fa in file_analyses)
            # Create repository analysis
            repo_analysis = RepositoryAnalysis(
                repo_path=repo_path,
                total_files=len(file_analyses),
                total_lines=total_lines,
                languages=languages,
                architecture_assessment=architecture_assessment,
                security_assessment=security_assessment,
                code_quality_score=avg_quality,
                file_analyses=file_analyses,
                executive_summary=""
            )
            # Generate executive summary
            print("Generating executive summary...")
            repo_analysis.executive_summary = await self.generate_executive_summary(repo_analysis)
            return repo_analysis
        finally:
            # Cleanup
            if self.temp_dir and os.path.exists(self.temp_dir):
                shutil.rmtree(self.temp_dir)
                print("Temporary files cleaned up")
 async def main():
    # Load environment variables
    load_dotenv()
    parser = argparse.ArgumentParser(description="Robust GitHub Repository AI Analysis")
    parser.add_argument("repo_path", help="Repository path (local directory or Git URL)")
    parser.add_argument("--output", "-o", default="repository_analysis.pdf", 
                       help="Output PDF file path")
    parser.add_argument("--max-files", type=int, default=50, 
                       help="Maximum files to analyze")
    parser.add_argument("--api-key", help="Anthropic API key (overrides .env)")
    args = parser.parse_args()
    # Get API key
    api_key = args.api_key or os.getenv('ANTHROPIC_API_KEY')
    if not api_key:
        print("❌ Error: ANTHROPIC_API_KEY not found in .env file or command line")
        print("Please create a .env file with: ANTHROPIC_API_KEY=your_key_here")
        return 1
    try:
        print("🚀 Starting Repository Analysis")
        print("=" * 60)
        print(f"Repository: {args.repo_path}")
        print(f"Max files: {args.max_files}")
        print(f"Output: {args.output}")
        print("=" * 60)
        # Initialize analyzer
        analyzer = RobustGitHubAnalyzer(api_key)
        # Perform analysis
        analysis = await analyzer.analyze_repository(args.repo_path, args.max_files)
        # Generate PDF report
        analyzer.create_pdf_report(analysis, args.output)
        # Print summary to console
        print("\n" + "=" * 60)
        print("🎯 ANALYSIS COMPLETE")
        print("=" * 60)
        print(f"📊 Repository Statistics:")
        print(f"   • Files Analyzed: {analysis.total_files}")
        print(f"   • Lines of Code: {analysis.total_lines:,}")
        print(f"   • Languages: {len(analysis.languages)}")
        print(f"   • Code Quality: {analysis.code_quality_score:.1f}/10")
        # Quality breakdown
        high_quality = len([fa for fa in analysis.file_analyses if fa.severity_score >= 8])
        low_quality = len([fa for fa in analysis.file_analyses if fa.severity_score < 5])
        print(f"\n📈 Quality Breakdown:")
        print(f"   • High Quality Files: {high_quality}")
        print(f"   • Files Needing Attention: {low_quality}")
        print(f"   • Total Issues Found: {sum(len(fa.issues_found) for fa in analysis.file_analyses)}")
        print(f"\n📄 Detailed PDF Report: {args.output}")
        print("\n✅ Analysis completed successfully!")
        return 0
    except Exception as e:
        print(f"❌ Error during analysis: {e}")
        return 1
 if __name__ == "__main__":
    exit(asyncio.run(main()))
--- a/services/ai-analysis-service/ai-analysis/ai_blog_analysis.pdf
+++ b/services/ai-analysis-service/ai-analysis/ai_blog_analysis.pdf
@ -0,0 +1,232 @@
 %PDF-1.4
 %“Œ‹ž ReportLab Generated PDF document http://www.reportlab.com
 1 0 obj
 <<
 /F1 2 0 R /F2 3 0 R /F3 9 0 R
 >>
 endobj
 2 0 obj
 <<
 /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
 >>
 endobj
 3 0 obj
 <<
 /BaseFont /Helvetica-Bold /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font
 >>
 endobj
 4 0 obj
 <<
 /Contents 17 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 16 0 R /Resources <<
 /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
 >> /Rotate 0 /Trans <<
 >> 
  /Type /Page
 >>
 endobj
 5 0 obj
 <<
 /Contents 18 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 16 0 R /Resources <<
 /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
 >> /Rotate 0 /Trans <<
 >> 
  /Type /Page
 >>
 endobj
 6 0 obj
 <<
 /Contents 19 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 16 0 R /Resources <<
 /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
 >> /Rotate 0 /Trans <<
 >> 
  /Type /Page
 >>
 endobj
 7 0 obj
 <<
 /Contents 20 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 16 0 R /Resources <<
 /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
 >> /Rotate 0 /Trans <<
 >> 
  /Type /Page
 >>
 endobj
 8 0 obj
 <<
 /Contents 21 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 16 0 R /Resources <<
 /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
 >> /Rotate 0 /Trans <<
 >> 
  /Type /Page
 >>
 endobj
 9 0 obj
 <<
 /BaseFont /Helvetica-BoldOblique /Encoding /WinAnsiEncoding /Name /F3 /Subtype /Type1 /Type /Font
 >>
 endobj
 10 0 obj
 <<
 /Contents 22 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 16 0 R /Resources <<
 /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
 >> /Rotate 0 /Trans <<
 >> 
  /Type /Page
 >>
 endobj
 11 0 obj
 <<
 /Contents 23 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 16 0 R /Resources <<
 /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
 >> /Rotate 0 /Trans <<
 >> 
  /Type /Page
 >>
 endobj
 12 0 obj
 <<
 /Contents 24 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 16 0 R /Resources <<
 /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
 >> /Rotate 0 /Trans <<
 >> 
  /Type /Page
 >>
 endobj
 13 0 obj
 <<
 /Contents 25 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 16 0 R /Resources <<
 /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
 >> /Rotate 0 /Trans <<
 >> 
  /Type /Page
 >>
 endobj
 14 0 obj
 <<
 /PageMode /UseNone /Pages 16 0 R /Type /Catalog
 >>
 endobj
 15 0 obj
 <<
 /Author (\(anonymous\)) /CreationDate (D:20250919123308+05'00') /Creator (\(unspecified\)) /Keywords () /ModDate (D:20250919123308+05'00') /Producer (ReportLab PDF Library - www.reportlab.com) 
  /Subject (\(unspecified\)) /Title (\(anonymous\)) /Trapped /False
 >>
 endobj
 16 0 obj
 <<
 /Count 9 /Kids [ 4 0 R 5 0 R 6 0 R 7 0 R 8 0 R 10 0 R 11 0 R 12 0 R 13 0 R ] /Type /Pages
 >>
 endobj
 17 0 obj
 <<
 /Filter [ /ASCII85Decode /FlateDecode ] /Length 367
 >>
 stream
 Gat>Ob>,r/&-^F/^>^aQ+qM;2mo!"Z,rU:'+DFN<Wd3O48H!l^(6k\u=B$Mj0cj[B%tdBkbdhVAKn0'=^c97;R.'e]03ASIdpbP*;2iS/:)kW9]qC:gm0%mr;+">!-*UmX9fWY/Ec?M%jF#/Z\\ge'p)luOhIPLQ[I2NF=e"ji6TniD.=DH+Kt)n$GsIg"Wei,tr^>pN;0%8ZkR<IhdR[p*8G#TTl4fO&M-5e*R:2k55GYGdeU"PTS9<Gn6>lCGNkJ`@0/m+gMd9CE2":C%X7.gS;0UgGA$4o>n6P`k2MG+<aTK&n"6>p1deWfJ:Cu=FH'YR36n(u<fiPU+;-S5ObI4ET.952)&2J1L1pF[pP3AK!~>endstream
 endobj
 18 0 obj
 <<
 /Filter [ /ASCII85Decode /FlateDecode ] /Length 2039
 >>
 stream
 Gat%#?$"aY&:Dg-\;-rFFG?eDbDtmI7q"KL`h-_gFs\jr#uPA,J,qpglEBXt5Z*^1cEu!O1SKW:]t<)`32J&fC%tuB7.1N[n`Q.b)&4YokE@n@+"8^HI=%4hDn\<2GOs;*q>!hL3.WaXn`4e@3lM2*^I!Tq%#Q_j!mW2W$N\R6gmdY%QG$?=8^"hbL#'J>i_M%Qi'_ea*$m[,9b3C-76c&VkP,JZ@t[#,/CX*n2%okZ/NspFkDY_!Y-'DGs.G(F,i/-f;1;0q;^'>l<i'IXf./AA[sdLf/*YJBl!,aHhdLkqr:b\_o/XG_S7fUa9lEU:d;5@oA6.dCP"L?0&%tm[.7ePd!sDpJ9ic/-B\\tQ:bB)^U1q'C`]&@[`T]uY#Uek6q)*G+C[#D!_Fibui*3CddP[^4iFT,`;L0RYk>EX++MHH]M"E9B@8,eb/ms&c3VsDZm#4l%b#&\6%lf;?<KFq"&fd\/.Qjh5]l*?";..unl[+V4da?3>P'S^%.60J81ZiG+dN1WOVX:0\JIJ:,#X#6NK\h2^k1A:,8bpp(jeAE$(;7*qKZi7=-eF-,%b6Gl7ZQHJk*cc>@hGD?kHicFiCYuCf1KRCWu0tt.:pKu)+/bE.q'r`gr7u>N6MDN;^IqTF2aH?2f4HYkW&t<JX#?^%<Xd3i9j9`LuV7aN@H5Sb1XLei0"0hY"Xo#iB0*[9V\V?_E;NT`H0_R1:4/P8$#a@M-\8t/(?FCCJ\M!S^".Bjq/&!4<)-"*8Tf$?g2UA7%^ZglXEPNO-M7)AnSN`!5L*[_[iki?b)$*3\X->a%CTRi.u*D9idts<89Mf>80)0fG=oJHTlK`<=oI7R_GcJcq]gS3"9IY8j'%+Rlq]E,p6q+b<e#H&Sh'k!Ssj@<!<_2Z>7Z"*IOZJ'J+>r+-!E:<7"P"N_0]ps+6OkIXd<"5c77US33[UeBE*Ki]tYA/Z#AeD#,%[T_fj@[A$ucW^:0MaX"6PeN$%TiT=krA5J"<XhoEpNiEWDm\KWh2\8ejRD@:j94@rCj2bU=nCk(oUPouLL-T0ld\7Fm2PKIl;7S9(&Gc"^CfrGPkk73T\^-0r>LL1f2CQ.'"d`d?qj07PVAfo#0K!a!#\r%AH$_jA":#,tNUb[XP(6.bf?6Dus+8B)2fnJjH#cB8;LWaqhU63Q\Hp=g?E0%!Rlb7>kckrg&EX+)d=0>;:*sE+d@!B5_@!a!Sc&#Lo#;a!GDJ!.a2i_Ebn`bA@8(`lPLFO]m6s@TLO$(fkG)Z]\j+9s@Tll:ojniKhXUN91eQs7n&ALiR0NKtN"/9%1k-QfCaRf7.dk@Yh%.l/ZNM%`"Rl!UQqK.G2mH9e>/AQ(dmZorU4pRSOE2)CH#i`iKibBM]L`>$nQInMi8,9s?kqko>rnBZ%D!]12Aeh)a_9m_*8@g0\[p%C4D]:ZMi[\nZH-seQZNtjNNmDWF`qb4+9#V@=&^krFr'dUetY-PZrKuT/701G@&e2Qn(G-NU9T_;o<<k89j$Ep`D1r?X&_*p4u7/g>(r6-cu3$qk)o>DhlCR/<.cEBWP0d,'eU9Q4GA5.+%D4D<u`sNBBU7ErF'A>b$s"kI['JUFRIS]66\-:S&U\$%7k,X>@N%H1g&J:H?\(<5d_O'*nM:<'07lq!nrfI5i9cTnrf'#(XVelQJB^qYl$ul+7Lf;7ZJnpbWHO7eC><;G]lg9\\S*V_Q5aTQ;[bq2JTR"bD>qF^,qfZIne5Y$SQ*f*B#f_eW*a[0lT:,CRRKJ)t4FVk:,K9QSf\h\R2"FjUQGoL4O]+$N_+L=2/C\_&$#$\:R%;\<jqNrl;E-\4?cDLHEpKFGC;\?4k)@>Y!rlH5e+^aq@bi)hnuJ18.BD:f0VnGZ;r?[:D=dVXp!c9#W$Y;U@>5qhkgkR9L@I?5X!dgLNYNkE:9GT140pL;Z_<4#a7BNIjZ?Wh?-6j/<O/rX`34WXc'^TKOM!8j.b&=>M$Cfg%URGaj>&I]Nci7+I0Tk+I477c0\ScaE7WoF):_lgUMP!9TmO`C-p/##-kDNW~>endstream
 endobj
 19 0 obj
 <<
 /Filter [ /ASCII85Decode /FlateDecode ] /Length 764
 >>
 stream
 GatU0?#SFN'Rf.GgrHR,WU2Z?o%8*NU^G[MU.K_(MF$Jn_En7-?b[P0OHe^U2FV$:ptTq#qjpH3i'[2;o+KtK"ul8j."c=GPQr26&U__*^BW1Cirig4"\Fk((kE&H*(2n5#h4b5.aWerat-DO!>SclC#uLhe>c^89i^Z@ENAAAY'07VH\(Op9f9bb9?6'XKU>\kU6dZl#YbJVit:mL(m_$1&H_E(%(1]_ocQd/M%^AS0bFh<iV5,`HlP:s4.?-4%@Il=p3_1u(4\g.p=38&FoL1N=c^MBJbDeR)qAF19lZTj/r2_jP\Q&VC1IA>H(if.>KUFT>L!(kD,<Vq;Zq'n;]XsGW`F2NX(KL-C1AY2$:]H\/C^K!FhX)bq'0#p2KGMBIT[VPm8n'!>j&/"#S5D)01-T"qWFs6Q1uu@d]Ir4*KPTi,H]H2S1G#\)jkGPOZ3.rN_7?"$/X&.Bsm'uJA2nI=\k[[A`l[(WJ_'1"\^dC/4<ef4c3D?.h9bM?oW..`%?]WLP<TS3,od=c!WLc(pce9QXk,I[ao)uo@_Mh'IWolH["<80jg\3IVbIc._Wj"cM=!:bFMFsJ?ZF:k.K?hD]"F2;h4;jrMAM@g4fQ?k;56"G(2PGj^&V@YpQhqnZ,b)pNjidMU#[D]*^XK<:6X4ZVAB=@Cq*^rn4a=D+*OUr8"+=5/#26D;?ddjgDG/c,147ml2KT=T(c_&r2Y2J>S?qP1NDP4OGFk'29Z5d3M%cPAoDh\c`H@!#HR!U&~>endstream
 endobj
 20 0 obj
 <<
 /Filter [ /ASCII85Decode /FlateDecode ] /Length 1610
 >>
 stream
 Gat=*968iG&AJ$Cln(tJaeIY;c-`=]3_AX,b,'4k+M":UK)c:0P1a4">u77:[Zl_@1Ro$XmOn3[/0a<*0+-%$!-l8/lX(ilqQS$`)Kpn?p^A5[(]Rf0S"5`l9ST>1FF#a>05,oDG=TPJO'^K:Jg*U":^U,t^ck0H&9,eN/oPU4PTCKF=bL#Bd('4cIg_/>=T$,%rhSF[b5U<IAa`SRWkK:4og?$GB^q(7U'@kq.hmdt0:31G'#A$kSFjhPn'743m7KBB)NTpuo=W5^oYiS\0&0V$5h3]F/eNb3iNBIc&]/!+*Jh!h@3'Rp0eGb$;R@@n)"WoRM=Ar,Lb#GeXJK:@n+btU&+0dhHpgtQcZJEuY,'@B!u*(:g/AMCR7Sl`.?mI"bh/Wr&M'&P$gVAucp:^sKTcZfIGLa*&5%ijFb8DdmCoXjfi=N&O5`k_'*m)fScZGPcd@"Cd_FHc!XbViNX[<NaspH?/\0AJ<D?BSD&(.@VN<k##2gh;jnL(?<9&;Z#D`RK_eiPn="_AXdfI)D!$@7k-NT9G^/)rBMh(Qd/=7urTjH8'pEFm9%kF?R#03,TD1oCeT[Ot@rphQHU.AlbJSSa;n))PC'5!h<=aJdUlp2s)KkJ*X[M`sU>mBq";f\`^Jrj_A)dtXs;iFg4'rVH@-Bi_5EnEISS2UU&NHldA(u$AuTLU+F_(M5_D7n(N"Ef:KKo)cu;Of9%Q!C"0/Y9qSGB4+DdId=1MhWlo0_Z?*m[&r\r$;X6MYi#H-SfQVK+`if:C/Mi`(Y0)b*5::I%mMIm-h`[7"r)0ABMs@'T/@7[O)T_TG'sOM5#Gj1<<[JE_B+mI:*qiQCDm0c)(IRQE];O'Xf.j$'*A(W8t:E)bj(jG;OP%H1)1-<K`>jQA+r?Z@SqY9Y?OcEnif%h4CF5;,o#m-(Tu$IV*Y)4^J(VN$;;-s(8p*bd"Tp+Z`J_PjOmG;A8Y+q6TStbFtaBC>Z.8i&qrd\fl%#l'Wb?M\JQgNMDV4.5+?%F-3+7W_,'$c'Q72rC.e4mp,aF209Ucrb:diP?3dP6'k\@>l2G$6HfCto<G[ba6*/+8\N6FP(o1aorOhV8c5EQ?4qGa+_GA3gFt_.`h-."V5;LsAW`(Zd(YJjW8GF/%VL[j2<36?2g&77TZjhk=_<mRCM(lk&;Z0;jCr/#1'mtpo!4pT'NO[8PI67q>)P]ogW=Sfq6s:&r_ILMDdEXKgDV/R*cm6b3"/Y^agaK4:&BE?-76iNlJ<uM*)a[oF:tSPpPnVE;R`G(a&'tu-lAqEOuu<;7808F($SuBo2HfGBO<#*e>mK@p!<<8Vr=1J(j8H.8r@Rtd#^0qWVk<cmk:YQfDON'<!(!Tl3R@F\j7Lg<8j:Ace!j)]2&ks]*hAL"'_Z0`q7JAA+XO^\,H#7O(psK1#F*e,QS?eR@M5D3pEichJ;S]__0.HTph*@LPieO8YhYuqel*0hESM1GuG`BI_^27k0AEeZ_cTZ8=-k3o1t&VOJ,Ieoo/qp:!bb1".'WXPAK-fB`8Xm]G[j-]2Z_Gd]"Ab*%@B0^r)SrUk/2`g)Q'u:tq9E,^$go1'u\lHl"9@[;=!NGeUf-I1M$irrFJGr6Y~>endstream
 endobj
 21 0 obj
 <<
 /Filter [ /ASCII85Decode /FlateDecode ] /Length 1572
 >>
 stream
 Gat=*gJ[&k&:N^lqIu;LJkSij>i("!/Z9S2Z6W-2"##P5,T:L@/'3@dfC*E6EL`-+(6p?t>?5+Vl-nGp[IHoL?^VR5NTfu+#pgrURS_FLF_UK-^5`^&4\1lGSt=>D\(<Z&4$UUl*:W8pYM3jQKps.!6;ebVR2cE,.-X6SB5I!TW+>.7=O<o=b55G3n;0V!C%MYBqA[1DG:/q5GO9g=&:$>u3f/kL4UE#VUTL<FkuT[k7O`OL1Zd-TT(3'\eY/odU9X2;-#4pQ'G\p=Fc'g?7(2U=?b@7rZAs1qW8HLPmWC:UZ^obp/c%Y4:1W$=R=Y%)FGc)mKIEZk'o,MX.$S\BJu(2n.*$lpoW6&[P\DRu7V8LKB"CUqE3q%C:c_m8,J$rCih%nRN93.7ICQ+IO.C5nH+:I!?HY3D\Ios'!,i"8%2G:mMB3:H7\df(p7TXJ3iNtQ,P!`oUmqNG1\f''b2XjO*]T_.9=CLMZR.)a*`&jgR>bc!AgB0lqo9b"OMe&<\;>QVqF.6gX'C<-1'CNGWUh<lhIHBf<]]P0B388gf::KQ'(Z88B3H.J&Gap14YDD?Kc5QPkjS7lHg,I@I['hq(>T:-;fdGlrKE9Vr?sIS_AMT4#H$Z&kMS>3?oT_\$sI36cYuGH`g7'Dk%m&K;/*Zs\FQ[$i6CKR)j"J0!mH&>:<<J?9PU3ANdpV`O:l;b[+I.<8.#Ns%IZm6P^Hq"^Bs^(FqQNQC8QsAF&b<LVSKmUaE1Tc,h(tf.d;9D\l%akgBD?WH8n0`+LHusQ>Uj6f(a8@d?9DtX/p&[N)aJfe&K"*r:S?2p[Ql$-h$f(r_EI\=G%eG-KTRCE3)&a7Y@KjF5_tl>8F*CAX8K7@[nnD@YZ3q&/CkCbQ5-BX#fAUW)EhZJocT)[?1s)A2((M"GolUQ])[nP,T!s>?]0_W#!M[\@!f$-VXp,3Z#VZOS4jNO=&54\-'h[^GVT5eEO3dU<=2:fn<?qq^oOd:B.>c;+2+gO&O^-EjHQYWe/Tc-Y$#7g1pn!Rl]S2rP)4/c=Z@ORMJO^Y\`eE[<d=FH&Z8b5eo.6^FUflKdu1]"5f*0A]-FF[%Z5$g.d<rMt*OWbT[jM_-1aH4&tP>V5^[X8S[_]>M];S7nN!SkR/3g^`ar5A-ktZ/th?2n&m[d*fS;sZ>.Wb8O+AK'b[QnNHfhU[]GIiR&=>gc*i^7OM[aE`Hr9^BNDe\Q:G*6*#nD!DLAYu<)qBs-3C"=Mj7b]N*lr49*\-GOer\k?anWmn996BHf=G-5;m\g5eRrhk.+)A3_uN;3ika"XEZl*mLV=7G76P'!d"D3e!jchp3+Joo)>MPFEb`MUB1$CXMk>h*;5Po34OjWHFSH2VJ/2_RWZDu8emc57MhT7KYjh+RO=1>.\`g/7jSCV7bFQA=ZD:kkfogXD=?<F3iD-_9cHIuX-pq$A$\C>Q>6VhEaCX4g1V1Z"h,AN9-RH`eiblG*EEt:cca-VFH@7RKBLKQ48lj8fQjn#s6iWCO\rJ_[G;<nKN@YJ/%#[KjQA=b%,Mf/(p@sbCNHgtCbGfN1[[(=+!(7a'TqHE~>endstream
 endobj
 22 0 obj
 <<
 /Filter [ /ASCII85Decode /FlateDecode ] /Length 1697
 >>
 stream
 GauHKgN)%,&:O:Slq:u]1dVK)PrCXA)Q1<fQ85#VjFnp(M$Za-!,5Ljq=l"-+sj9&-/3.9Ysa2%4_GSCLB&\0+5VCc3RMbTHMsEK[qRrJmP:-KkB&aKqL6ALQ9e&ml`i,`L7!MsAsdkHB44&"O5PS5:>mOT6^tUC3"1eYj7d77kbO$?\P>#Al9(-Wqur(pdeKX>]>eIeaG2D>\K-k%4);(EZhVo1[.t(:"m,tHfp9r8Ns7jLJgN-*`HMF--T6(j+1:jd.A$G*.=`c]#,1@)SfN<=kFp(Ei9qil].Hs/$[ug]GEK`hB3(3PHas8pM7#A84S"4R]>rNGPblp#cGc?qf!;etcT,W52o2:smkAj3`nf58P>JM4Wb<m5XKBLLA]]:$Ef4?@>i,8POA9H<;Z1VU%_22n`@eS"j.Y)MFSH>%04_uG^MbpoQgKN00;l(c&4p'gCFm+aY`H_C.NeAI=B[`D:(l=r0mSc3\\)8o_$BCG&jqn;\"%'t0_.43>*Fa:VMRLrs6F^UDLTXNIG5ih>ElYCB[dGpX&83!TXD)jSo8\#-L->*h%$2o\m\jQ_ruhm(tX[SDL&*_NW8*OkF]APWR'_Sic=kYH:'N^;SKc+Mp4cCo*%h:NVHhX.P7N>;H;qE<#.Pa%%pqjCk,^$i1($XFj(_g7@=ZA)1Q/f.*m3Jr8:D=LWt0n*Ym-Bc2NIs3k75J+'jkd@];&=<I0W25Nk3]KRSR'4$bM5GK?jK9K3F7$kF:,(0dU0%l't<X_Rq0@Q;Y>N:##AiB]_AUXA8R&\YsUI/0oea#Y=YG;mln-7G1:TL@kHd$9J<<7"UeKZY_BL9+;p(&5mJ85uT;Y0n.&[rk-G8<\e<X!j`6$/A7NC<7NpH)]k11QOCIH'B)[.Uo(YD'nlZjpm%,,S6et,IG\_9H-qr^Cd,6+KNm"PaW?RKb$L^5IEsu6nEB^Sj]IMq?t9]$Xt2S:RjAADm(cG:+tLqN6eXZ8,Z8Uoc,BLd9B#&-:/:[2f"`1Uc/l3@MM<oVjU5$1KI(`-^$M[@F\qT`[ImrTQDhWa6P^&52sk]oJ^k!b]SWILtPkcYD!IdmV++RM:BSne`D7Bnp>)DqV;*QTc=d'5)fIF4'89u'](X=I\j@pcKYP<,F">uK`kPI77EB5e9Z\Jr@p@l!U>L$^n`Sle':GLMM0t_6q&>QGhJh$D^18<U-E;Q%$=QuH=1PiIp%3`N^\"'66uCBX?*W1cl1UdmG,(7<)j(/KiUssBrr9g+^$^4@Pei]/C@\aIP=c!UY]Q/kGBEq6>T:@1ceNrS9,kq`oBi>&d:D9$U$G"Ce:T4\!/qUdQ@!!M:!a8`'ec%lR\`6;2>O1S1'e(NX.]T#To^P!]k=V\4'XQ1r1[lK`We,N8_%`?PLfpe:Sl$lW[(&)\rDQct")"Q$kpr6MVI$[QX(>BS2R"7nI/f3YNnJV)R\[e4mOr]l^K.osZHUc,2o:DCDa,aAdmF9SL3PA25p"0IS0"^-J0l9)m^?$B=tj*3F=.4>4Z%<bY_(=(f-h"3D)+0gAa2q3/;2O4up?DtD'?-leT$V-IcVXfToV.Y[0HDD?<j8osab"3:rZ-V<lj_jp-p<'k\QEGb1/kCCV:#sVg]rfd&#Md:D^s+EOAXc8^\*K:?&JjLFPVA)N'W!hH3P_hu^Lruj8.9!?!Hlp2O?4jh6U8*2<-5Y0_~>endstream
 endobj
 23 0 obj
 <<
 /Filter [ /ASCII85Decode /FlateDecode ] /Length 1467
 >>
 stream
 GauHKgN)%,&:O:Sls$$@9aeUi;Voj?C#/R4=Nm841GB,,E(GK_5V("<j1>:;g"+*7/@ljI1_rCD*\>SX*"WtFLUcfc!r+@"PE,i;#h]n_*5mr0_eF;`cN.1^R>rCa82(sA7lUSU#&Z]N%WF&RKYmd)L5LKi>c?!R3fF0>C&XCC=E(17GQZV>AA?h$TCMM08X/S1KKMtL:;s<N'Q;sVajh^8JBnZV\QQcpQO9@^_;GGDr.&i@R!oF<aGY]/q].Z#*%d<b`X^)*5!UNgfFlK:DD7&/a+/9oq"\*38=IEKAFei(!["V[>^l2))%Mku4N$=q?/7;*bOPq_S85o)$<ib.Gd=<Qi+A"q.gAs-WaR@ln>]O[SlJIO!4"V;MK/a.'KK)YgDAJO%l&k%(oF#/6eWDC70+.TRYr%_bg:q[g4h=5T*q7>'!sq5OO#6!R0s:c/24T)]SX=0AU1AH\sCLCiWsE@"+i7dNm*"nB2+j.ed)hY;6gVC-&oOGNl981oU6\''p@!CnechBZG;&L!gdRDX9%=<MF+SV"EM).BRdSPXGVUV@``4T^0*6-^1_O'%C`+:SCd`dKjbP\bP5k=b6g21`UB`'E%pWPjIuPFl2.N>Mpoi[n$9:#bDA/X1627-M?9.^/2U?1s3<M*&P[DkPc]*t%GVr'p#;Zb)tg]#I`2J>2`6nSl'jV<R-AhtF*f<,G?r%NkU$=.kaMm?c;QpE*q^^B54;RO8$YeVkp0?C2Vm@c;D/ra@f.=+Dh9P,b4-D0)^>N5j?X,Z8ef6+jAO6eiuG)^K8.\H4VOd<mLa>YUKRs9e2.^,qGUp=&e+f$L6%O<B;/IBpc)%GUr$l]a45,k6*0W<SR!6K&s"e+B/k.^n6A?#l_bKlj,1\_Y3dC6l&Lk73d_bEXm;2)*l1<qoAu$pL&9E^TtdpFc$&LCB]O%"..)2'I2b:8f'VWG?e4-I;Q`M2'#;K/j"R=6bNF-.PkM"<6u63<[a6<V#!&ZP>O?ULG5/EVmX03tiC18cVd:T1X6R"`A8!JiL:3d:mq:/@,c;u]_egjoYH7o&H7<m=F'>:,ip>^9?Qr$<5ND\T5mmA[hT(8!6qK4/+^;#\B27OrAj,pJ$0THtd(3GVd-[Od(X<mnAQd00?CTnh7W[?HC.T\X'tL?1_X==$1Fi0D=1%W]"th._a@f"AOULr_q$m>X>4%Ua#bfYI#iH6(@-Ea>4b5'UMZtJ=[=&Pc]DsqbCn0dF75iK@6gWbei3f^r1>!:dHRKm$]%($MR^VKRQ/PgM]p$Zp,i"ScqoNXkO*kof3839<mjPN`b<?\gB:1'u83QO^gG4-ADHi!&.O4CS8_nBIpIVaX?6Dg`;>ic:'u_siqEcH)\$^Su]d..<a2>VZ01eB4SiecIm:FM-Oln7*FJ<es3+HR[*rgr+J9Ng!,]3%6Cp2K:a\cr9D$[d6$Oh\cI#,h]!`"NQpqX_\Zc;02~>endstream
 endobj
 24 0 obj
 <<
 /Filter [ /ASCII85Decode /FlateDecode ] /Length 1179
 >>
 stream
 GauHK9lo&I'YO<E]VC[Y;?Xb&><\;&;d5hqb,'4k&.)!#8V.,]Nn)74cNE:5%dSWZl"2as%07'Qf_;UT&odA.g@)*GGdDt?HNM-<boMn]$".f\bK\^IBB8$DE<:,NS0UZ>]E9P:G!bWBn6XNpLC9?VPRk]LQh&?ekD9;JXr#hZlk[@:U=oLMW9K=&>2?rDbpV/V1ghEpo.?UWNWg]c!aa;if-%p\fGnY7c6TSNI"i.@/\"![3YN.h@`Md4D4fdM=%p;Z0FFn'#i77##8K94nfVfF\7P^YrQ5UhTi?Y(<Xb<'@Q_?c>8"tqZ!MobY2T?Pa437%:6_PqE/4TH!DH*/@8Er:i7/>*n:I"*3Y[2.m0MfB,FPhmM2,*=0_$-m.-lJMXO9p<;)A$`CFbi'Viih>aKX^#1t;\e_SkuAf(k&3U-paseQc)I@Rku.#\;Wbc1:8pe^\^5me,`%HF:1Kq<f6$'k=d06;-im/%$CVKH405.STfG4F/'3*^,0c=nO3;#dLY3D0_RsOOUbf_6+.EBD2\:TnY7m_`^?QVAVA0,PQG)9)7([b]53n)&CX%mQ9o!cbF61NA;ic1rF6lUBeRJ?&K&Hfk:P'l9mkR21iag"L9!/03%)%aB9"Ah.tde8ri>=pJce!Ml+R(b5eH-XK^gdZ.C4VaW*LB\6';3*E7O2&&tp=)%.4RFVZQDSuGT<&mu)Bg[[1:n[ue0a2caMb"6ZV`Q,-NQGlV*(-`i\17uDi*Ot+/4i9'SJ0,8ZC+&QoS*?*aM+iN[9_^0aid9XS.!Ea)p5)!=$=>4<ibj!Y[(Y(SB%gq/B@MkWH1-DOqZ:)<!K,E,U<<KS\Q*Q$9fd1jWAkeh]C.KlRJ0&GW>:J&5A<EKFp;VJmR2mYk*O/X?Td\laf0N\a`q,O-`)h])39_OU#1-K55k2mEd[])W%qeY_WN?MampcgdRqM"(=o'iIP/R)(/QoXD@:W0'Y$:Z=XiE?h-RLL_\GAuR8gXKn0FC!Hq(@YJ[^=fN01*:po?/NTk=^JJ!+>%Cd*b/OXQY3peJ(?1*SuI^m]($?TKN*$<RGt"0l^-IOIo^.m0Bi)ljT?@hc_!h-d;=07LgR@Bm%)VP/rW's,S&kEo0TSfK\pm_aspIIsGmE3,LV$hGED==L\k(edOnC;AimLRD8s@fp@N!3uorXT~>endstream
 endobj
 25 0 obj
 <<
 /Filter [ /ASCII85Decode /FlateDecode ] /Length 1292
 >>
 stream
 GauI7>Ar7U&;B$5/*<ti,$%BCCo)'\\>[e7Bk$aPJH[9XAYq5^P#PTerUCoND@gHi<,/6;#74pHcJe9^j!Y=joO7.UHj+JX_9k=K/;.g?/.Q.1/msh_HoYTh/&s)tP:.9Fa!oh1-\h!p>oqA+m,;q880M3b_a6>#T/C^t>eoY9\;$t;%@XbU6$<Ld)u'8GqWqrbj%bSQUHj!XX[LeKr6<5kR*Ole-?FEUp$)&&Y7i72FJ8bI_AFFh5cp<+-'q'^%Ms5+X4::(g43IL$\abI/=<<;Q+!pV2IFb6&DV&>aLY"bs4Uhg7*&AZ+:S46\M+<MOPJrW_#fi(%aKNWT[D7])K@8W@FkOm='tJ=ocXSK(!f+6-UU#oC&E*>:KsO"g^5><>62@=&I`$>?%Z80>2>sc_?@U#Nm]TepC5[_k%[='7I.g_Y0gq4.HHoiS&s@6Cc8gd5KTd(QZKo@').NG"#t@c;P9o.I!3W#?(F_D-NBUm9MRd!]UE/=+QOR*QJ^+9deqHS01=LWp@qs5T^(.kLq^=mc$I&m`t)LKSmpaC%O9[J#,=%B1IKQ1o7(:!%2B@j.8ZjAN@Y-H^3NH#'%jC<(L3780C^W)PfA!O7;_!F>W:FA*9Of[FH/>%7(7T"$R#gK&2TrJKH_?7@J2"3c7Y*C?sc7Jm%Heo]Mr)^gq&p7>+fjAguX4@68\$]Vh]2$@)_S*b[B:@2lhsZW20O_YY3WDT=WEPX_AfKq+3#A[9O-KK\XS2(lcO4](M'oJE(ZE$FC5D\47[YE&UH7W2?t(2qCX0KX"qWIo%^\:-+)8Lh^oJooTS';6=PVca3EeXQIsX^:Bu4)N1,oVZg&0YX_aERgg+7V-@]amP7Nnm56mr+&"j]'p"sPs!c7Q*Lq*uBICi0:hnC7ZC'(S?e+j;fkBSl6b,nj0ZkSsA=(;/TIcg"p<\X;TkpWZbIP:KD<p9V:>kr77Q:`'l#efMY,oZ<'#7(9r0sdjYGtQ)Ftbf=e"6RLDk_\D3Xt[Df>YOF\=aI98oM^_m(1&Ndqk>MW<ui(4)Ku/<POQCe4/C/@@V:0W_Gq9==cnO*1VMK66T$.n[7m*U>c[_)ae&&51f+!$mdtP>#^CGa`;p^[a4A,;)f'[XO;PGMGgVsMX92Zs"dLd7aLL1H_Dj`r:SDSrF5</fPTKC]]-$)O<3qCbJ'YB:TpT1pLpRShUlgl]D3XU@cOgk?i)p5&F7rJ5CU5>rC->5[f8tP/7L#)DR&63066?9XE#u\=EEjVW3Pa%3\22;GATr'@1QDB&)c@N.11I*~>endstream
 endobj
 xref
 0 26
 0000000000 65535 f 
 0000000073 00000 n 
 0000000124 00000 n 
 0000000231 00000 n 
 0000000343 00000 n 
 0000000548 00000 n 
 0000000753 00000 n 
 0000000958 00000 n 
 0000001163 00000 n 
 0000001368 00000 n 
 0000001487 00000 n 
 0000001693 00000 n 
 0000001899 00000 n 
 0000002105 00000 n 
 0000002311 00000 n 
 0000002381 00000 n 
 0000002665 00000 n 
 0000002777 00000 n 
 0000003235 00000 n 
 0000005366 00000 n 
 0000006221 00000 n 
 0000007923 00000 n 
 0000009587 00000 n 
 0000011376 00000 n 
 0000012935 00000 n 
 0000014206 00000 n 
 trailer
 <<
 /ID 
 [<18e7918b3296693e83634aaf57fa33ad><18e7918b3296693e83634aaf57fa33ad>]
 % ReportLab generated PDF document -- digest (http://www.reportlab.com)
 /Info 15 0 R
 /Root 14 0 R
 /Size 26
 >>
 startxref
 15590
 %%EOF
--- a/services/ai-analysis-service/ai-analysis/analysis_report.md
+++ b/services/ai-analysis-service/ai-analysis/analysis_report.md
@ -0,0 +1,363 @@
 # GitHub Repository Analysis Report
 **Repository:** https://github.com/TejasTeju-dev/AI-Blog
 **Analysis Date:** 2025-09-19 11:09:14
 **Analyzed by:** Claude AI Assistant
 ---
 ## Executive Summary
 Let me provide a comprehensive analysis:
 1. **Project Type & Purpose**:
 This appears to be a modern web application built with Next.js, likely a blog or content platform with articles and topics sections. The extensive UI component library suggests it's a full-featured web application with a sophisticated user interface.
 2. **Technology Stack**:
 - Frontend Framework: Next.js (React)
 - Language: TypeScript
 - Styling: Tailwind CSS
 - Package Manager: pnpm
 - UI Components: Extensive component library (possibly using shadcn/ui)
 - State Management: Custom hooks
 - Animations: Multiple background animation components
 3. **Architecture Overview**:
 The project follows Next.js 13+ App Router structure:
 ```
 app/                 # Main application routes
 components/          # Reusable UI components
 hooks/               # Custom React hooks
 lib/                 # Utility functions
 public/             # Static assets
 styles/             # Global styles
 ```
 4. **Key Components**:
 - **UI Components**: Comprehensive set of 40+ UI components including:
  - Basic elements (Button, Input, Form)
  - Navigation (Navbar, Menu, Breadcrumb)
  - Feedback (Toast, Alert, Dialog)
  - Data display (Table, Chart, Card)
  - Layout (Grid, Sidebar)
 - **Background Components**:
  - AnimatedGrid
  - FloatingElements
  - ParticleField
  - 3DBackground
 - **Core Pages**:
  - Home (page.tsx)
  - Articles
  - Blog
  - Topics
  - About
 5. **Development Setup**:
 Required setup likely includes:
 ```bash
 # Install dependencies
 pnpm install
 # Development server
 pnpm dev
 # Build
 pnpm build
 ```
 Requirements:
 - Node.js
 - pnpm
 - TypeScript knowledge
 - Understanding of Next.js and React
 6. **Code Quality Assessment**:
 Strengths:
 - Well-organized directory structure
 - Consistent use of TypeScript
 - Modular component architecture
 - Separation of concerns (UI components, hooks, pages)
 - Comprehensive UI component library
 - Modern development practices (App Router, TypeScript)
 Areas for consideration:
 - Large number of UI components might indicate need for documentation
 - Multiple background components might need performance optimization
 - Could benefit from API documentation
 - Might need testing infrastructure (not visible in structure)
 Additional Observations:
 - The project uses modern React patterns (hooks)
 - Strong focus on UI/UX with multiple animation options
 - Built with scalability in mind (modular structure)
 - Follows Next.js best practices
 - Uses modern tooling (pnpm, TypeScript, Tailwind)
 This appears to be a well-structured, modern web application with a strong focus on UI components and user experience. The architecture suggests it's built for scalability and maintainability.
 ---
 ## Detailed Code Analysis
 I'll analyze each aspect of this Next.js project:
 1. **Code Quality**
 - Strong TypeScript usage with proper type definitions and configurations
 - Consistent code formatting and organization following Next.js 13+ conventions
 - Clean project structure with clear separation of concerns
 - Good use of modern React patterns and Next.js features
 - Well-structured configuration files (next.config.js, tailwind.config.js, etc.)
 - Follows React best practices with components organization
 2. **Design Patterns**
 - Component-based architecture following React principles
 - Server-side rendering approach using Next.js App Router
 - Atomic design pattern evident in UI components organization
 - Utility-first CSS approach with Tailwind
 - Singleton pattern for configuration management
 - Dependency injection through React context (seen in theme implementation)
 3. **Key Dependencies**
 - Core: Next.js 14.2, React 19, TypeScript
 - UI: Radix UI components, Tailwind CSS, shadcn/ui
 - 3D: Three.js, React Three Fiber
 - Forms: React Hook Form, Zod validation
 - Utilities: clsx, tailwind-merge
 - Development: PostCSS, TypeScript, ESLint
 4. **Potential Issues**
 - Build errors being ignored (typescript.ignoreBuildErrors, eslint.ignoreDuringBuilds)
 - Unoptimized images configuration could impact performance
 - Missing error boundaries and proper error handling
 - Security considerations for client-side rendering of 3D content
 - No explicit API rate limiting or security headers
 - Missing proper environment variable handling
 5. **Testing Strategy**
 - No visible testing setup (Jest, React Testing Library, etc.)
 - Missing unit tests, integration tests, and e2e tests
 - Should add testing framework and implement test coverage
 - Consider adding Cypress or Playwright for e2e testing
 6. **Documentation**
 - Good README with clear project structure and setup instructions
 - Missing JSDoc comments for components and functions
 - Could benefit from more inline documentation
 - API documentation could be improved
 - Missing contribution guidelines and deployment docs
 7. **Maintainability**
 Strengths:
 - Clear project structure
 - Modern tooling and frameworks
 - Type safety with TypeScript
 - Component modularity
 - Consistent coding style
 Areas for Improvement:
 - Add comprehensive testing
 - Improve error handling
 - Better documentation
 - Implement proper CI/CD
 - Add proper logging system
 - Consider performance monitoring
 Additional Recommendations:
 1. Security:
 ```typescript
 // Add security headers
 const securityHeaders = [
  { key: 'X-XSS-Protection', value: '1; mode=block' },
  { key: 'X-Frame-Options', value: 'SAMEORIGIN' },
  { key: 'X-Content-Type-Options', value: 'nosniff' },
 ]
 ```
 2. Error Handling:
 ```typescript
 // Add error boundary component
 class ErrorBoundary extends React.Component {
  static getDerivedStateFromError(error) {
    return { hasError: true };
  }
  componentDidCatch(error, errorInfo) {
    // Log error to service
  }
 }
 ```
 3. Testing Setup:
 ```json
 // Add to package.json
 {
  "jest": {
    "setupFilesAfterEnv": ["<rootDir>/jest.setup.js"],
    "testEnvironment": "jsdom"
  },
  "scripts": {
    "test": "jest",
    "test:watch": "jest --watch",
    "test:coverage": "jest --coverage"
  }
 }
 ```
 4. Performance Monitoring:
 ```typescript
 // Add performance monitoring
 export function reportWebVitals(metric) {
  if (metric.label === 'web-vital') {
    console.log(metric); // Send to analytics
  }
 }
 ```
 The project has a solid foundation but would benefit from these improvements for production readiness.
 ---
 ## Security & Best Practices Analysis
 I'll analyze the repository based on the provided files and structure:
 1. **Security Issues**:
 - ⚠️ ESLint and TypeScript build errors are being ignored (`ignoreDuringBuilds: true` and `ignoreBuildErrors: true`), which could mask security-related issues
 - ⚠️ Image optimization is disabled (`unoptimized: true`), which could lead to performance and security concerns
 - ✅ Remote image patterns are properly restricted to specific domains (unsplash.com)
 - ⚠️ No explicit CSP (Content Security Policy) configuration visible
 2. **Secret Management**:
 - ✅ Uses environment variables (process.env)
 - ⚠️ No visible secret management solution or environment validation
 - 🔍 Recommend implementing a secret management solution (e.g., Vault, AWS Secrets Manager)
 3. **Dependencies**:
 - Cannot fully assess without package.json
 - Using Next.js and Tailwind CSS which are generally well-maintained
 - 🔍 Recommend implementing dependency scanning (e.g., Snyk, OWASP Dependency-Check)
 4. **Best Practices**:
 ✅ Good:
 - TypeScript implementation with strict mode enabled
 - Proper module resolution and ES6 target
 - Well-organized file structure
 - Using modern module systems
 - Proper tailwind configuration
 ⚠️ Concerns:
 - Disabling TypeScript and ESLint checks in production
 - Multiple next.config files (both .js and .mjs)
 - No visible testing configuration
 5. **Configuration**:
 ✅ Good:
 - Environment-based configuration for basePath
 - Proper TypeScript configuration
 - Well-structured Tailwind configuration
 ⚠️ Concerns:
 - Duplicate next.config files might cause confusion
 - Some hardcoded values could be externalized
 - No visible staging/production environment separation
 6. **Error Handling**:
 - Cannot fully assess without application code
 - ⚠️ Disabling TypeScript and ESLint checks could mask error handling issues
 - 🔍 Recommend implementing proper error boundaries and logging
 7. **Recommendations**:
 Security:
 ```typescript
 // Enable TypeScript and ESLint checks
 const nextConfig = {
  eslint: {
    ignoreDuringBuilds: false,
  },
  typescript: {
    ignoreBuildErrors: false,
  }
 }
 ```
 Configuration:
 ```javascript
 // Consolidate next.config files
 // Add proper environment validation
 const validateEnv = () => {
  const required = ['API_KEY', 'DATABASE_URL'];
  required.forEach(key => {
    if (!process.env[key]) throw new Error(`Missing ${key}`);
  });
 }
 ```
 Best Practices:
 1. Implement proper CSP:
 ```javascript
 // next.config.js
 {
  async headers() {
    return [
      {
        source: '/:path*',
        headers: [
          {
            key: 'Content-Security-Policy',
            value: "default-src 'self';"
          }
        ]
      }
    ]
  }
 }
 ```
 2. Enable image optimization:
 ```javascript
 images: {
  unoptimized: false,
  domains: ['images.unsplash.com'],
 }
 ```
 Additional Recommendations:
 1. Implement security headers
 2. Add input validation
 3. Set up proper error boundaries
 4. Add proper testing configuration
 5. Implement API rate limiting
 6. Add security scanning in CI/CD
 7. Implement proper logging
 8. Add environment validation
 9. Consider implementing authentication/authorization
 10. Add proper CORS configuration
 Environment Setup:
 ```bash
 # .env.example
 NODE_ENV=development
 API_KEY=
 DATABASE_URL=
 ```
 This analysis is based on the configuration files provided. For a more comprehensive security assessment, access to the actual application code, API endpoints, and authentication mechanisms would be needed.
 ---
 ## Recommendations Summary
 Based on the analysis, here are the key recommendations for this repository:
 1. **Immediate Actions**: Critical issues that should be addressed promptly
 2. **Code Quality Improvements**: Suggestions for better maintainability
 3. **Security Enhancements**: Steps to improve security posture
 4. **Documentation**: Areas where documentation could be enhanced
 5. **Architecture**: Potential architectural improvements
 ---
 *This analysis was generated using AI and should be reviewed by human developers for accuracy and context.*
--- a/services/ai-analysis-service/ai-analysis/app.py
+++ b/services/ai-analysis-service/ai-analysis/app.py
@ -0,0 +1,391 @@
 #!/usr/bin/env python3
 """
 GitHub Repository AI Analysis Tool
 Analyzes GitHub repositories using Claude API for comprehensive code insights.
 """
 import os
 import git
 import json
 import requests
 import tempfile
 import shutil
 from pathlib import Path
 from typing import Dict, List, Optional, Tuple
 import argparse
 from datetime import datetime
 import mimetypes
 import base64
 class GitHubRepoAnalyzer:
    def __init__(self, anthropic_api_key: str):
        self.api_key = anthropic_api_key
        self.api_url = "https://api.anthropic.com/v1/messages"
        self.temp_dir = None
        # File extensions to analyze
        self.code_extensions = {
            '.py', '.js', '.ts', '.jsx', '.tsx', '.java', '.cpp', '.c', '.h',
            '.cs', '.php', '.rb', '.go', '.rs', '.swift', '.kt', '.scala',
            '.html', '.css', '.scss', '.sass', '.less', '.vue', '.svelte',
            '.sql', '.sh', '.bash', '.yml', '.yaml', '.json', '.xml',
            '.dockerfile', '.md', '.rst', '.txt'
        }
        # Files to always include in analysis
        self.important_files = {
            'README.md', 'readme.md', 'README.txt', 'readme.txt',
            'package.json', 'requirements.txt', 'Cargo.toml', 'pom.xml',
            'build.gradle', 'Makefile', 'dockerfile', 'Dockerfile',
            'docker-compose.yml', '.gitignore', 'setup.py', 'pyproject.toml'
        }
    def clone_repository(self, repo_url: str) -> str:
        """Clone GitHub repository to temporary directory."""
        print(f"Cloning repository: {repo_url}")
        self.temp_dir = tempfile.mkdtemp(prefix="github_analysis_")
        try:
            git.Repo.clone_from(repo_url, self.temp_dir)
            print(f"Repository cloned to: {self.temp_dir}")
            return self.temp_dir
        except git.exc.GitCommandError as e:
            raise Exception(f"Failed to clone repository: {e}")
    def get_file_info(self, file_path: Path) -> Dict:
        """Get file information and content."""
        try:
            # Check file size (skip files larger than 1MB)
            if file_path.stat().st_size > 1024 * 1024:
                return {
                    'path': str(file_path.relative_to(self.temp_dir)),
                    'size': file_path.stat().st_size,
                    'content': '[File too large to analyze]',
                    'encoding': 'skipped'
                }
            # Try to read as text
            try:
                with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                    content = f.read()
                encoding = 'utf-8'
            except UnicodeDecodeError:
                # If text fails, try binary for certain file types
                with open(file_path, 'rb') as f:
                    raw_content = f.read()
                    if len(raw_content) < 10000:  # Only encode small binary files
                        content = base64.b64encode(raw_content).decode('ascii')
                        encoding = 'base64'
                    else:
                        content = '[Binary file - content not included]'
                        encoding = 'binary'
            return {
                'path': str(file_path.relative_to(self.temp_dir)),
                'size': file_path.stat().st_size,
                'content': content,
                'encoding': encoding,
                'mime_type': mimetypes.guess_type(str(file_path))[0]
            }
        except Exception as e:
            return {
                'path': str(file_path.relative_to(self.temp_dir)),
                'error': str(e),
                'content': '[Error reading file]'
            }
    def scan_repository(self, max_files: int = 50) -> Dict:
        """Scan repository and collect file information."""
        print("Scanning repository structure...")
        repo_data = {
            'structure': [],
            'files': [],
            'stats': {
                'total_files': 0,
                'analyzed_files': 0,
                'total_size': 0,
                'languages': {}
            }
        }
        # Get directory structure
        for root, dirs, files in os.walk(self.temp_dir):
            # Skip hidden directories and common build/cache directories
            dirs[:] = [d for d in dirs if not d.startswith('.') and 
                      d not in {'node_modules', '__pycache__', 'build', 'dist', 'target', 'venv', 'env'}]
            level = root.replace(self.temp_dir, '').count(os.sep)
            indent = '  ' * level
            folder_name = os.path.basename(root) if root != self.temp_dir else '.'
            repo_data['structure'].append(f"{indent}{folder_name}/")
            # Process files
            for file in files:
                if file.startswith('.'):
                    continue
                file_path = Path(root) / file
                repo_data['stats']['total_files'] += 1
                repo_data['stats']['total_size'] += file_path.stat().st_size
                # Track languages
                ext = file_path.suffix.lower()
                if ext:
                    repo_data['stats']['languages'][ext] = repo_data['stats']['languages'].get(ext, 0) + 1
                # Add to structure
                repo_data['structure'].append(f"{indent}  {file}")
                # Decide if we should analyze this file
                should_analyze = (
                    file.lower() in self.important_files or
                    ext in self.code_extensions or
                    repo_data['stats']['analyzed_files'] < max_files
                )
                if should_analyze and repo_data['stats']['analyzed_files'] < max_files:
                    file_info = self.get_file_info(file_path)
                    repo_data['files'].append(file_info)
                    repo_data['stats']['analyzed_files'] += 1
        return repo_data
    def call_claude_api(self, prompt: str, max_tokens: int = 4000) -> str:
        """Make API call to Claude."""
        headers = {
            "Content-Type": "application/json",
            "x-api-key": self.api_key,
            "anthropic-version": "2023-06-01"
        }
        data = {
            "model": "claude-3-5-sonnet-20241022",
            "max_tokens": max_tokens,
            "messages": [
                {"role": "user", "content": prompt}
            ]
        }
        try:
            response = requests.post(self.api_url, headers=headers, json=data)
            response.raise_for_status()
            result = response.json()
            return result['content'][0]['text']
        except requests.exceptions.RequestException as e:
            raise Exception(f"API request failed: {e}")
    def analyze_repository_overview(self, repo_data: Dict) -> str:
        """Get high-level repository analysis."""
        print("Analyzing repository overview...")
        structure_summary = "\n".join(repo_data['structure'][:100])  # Limit structure size
        prompt = f"""
 Analyze this GitHub repository and provide a comprehensive overview:
 REPOSITORY STRUCTURE:
 {structure_summary}
 STATISTICS:
 - Total files: {repo_data['stats']['total_files']}
 - Files analyzed: {repo_data['stats']['analyzed_files']}
 - Total size: {repo_data['stats']['total_size']} bytes
 - Languages found: {dict(list(repo_data['stats']['languages'].items())[:10])}
 Please provide:
 1. **Project Type & Purpose**: What kind of project is this?
 2. **Technology Stack**: What technologies, frameworks, and languages are used?
 3. **Architecture Overview**: How is the project structured?
 4. **Key Components**: What are the main modules/components?
 5. **Development Setup**: What's needed to run this project?
 6. **Code Quality Assessment**: Initial observations about code organization
 """
        return self.call_claude_api(prompt)
    def analyze_code_files(self, repo_data: Dict) -> str:
        """Analyze individual code files."""
        print("Analyzing code files...")
        # Prepare file contents for analysis
        files_content = []
        for file_info in repo_data['files'][:20]:  # Limit to first 20 files
            if file_info.get('encoding') == 'utf-8' and len(file_info.get('content', '')) < 5000:
                files_content.append(f"=== {file_info['path']} ===\n{file_info['content']}\n")
        files_text = "\n".join(files_content)
        prompt = f"""
 Analyze these key files from the repository:
 {files_text}
 Please provide detailed analysis covering:
 1. **Code Quality**: Code style, organization, and best practices
 2. **Design Patterns**: What patterns and architectural approaches are used?
 3. **Dependencies & Libraries**: Key external dependencies identified
 4. **Potential Issues**: Any code smells, security concerns, or improvements needed
 5. **Testing Strategy**: How is testing implemented (if at all)?
 6. **Documentation**: Quality of inline documentation and comments
 7. **Maintainability**: How maintainable and extensible is this code?
 """
        return self.call_claude_api(prompt, max_tokens=6000)
    def analyze_security_and_best_practices(self, repo_data: Dict) -> str:
        """Analyze security and best practices."""
        print("Analyzing security and best practices...")
        # Look for security-sensitive files
        security_files = []
        for file_info in repo_data['files']:
            path_lower = file_info['path'].lower()
            if any(term in path_lower for term in ['config', 'env', 'secret', 'key', 'auth', 'security']):
                if file_info.get('encoding') == 'utf-8':
                    security_files.append(f"=== {file_info['path']} ===\n{file_info['content'][:2000]}\n")
        security_content = "\n".join(security_files[:10])
        prompt = f"""
 Analyze this repository for security and best practices:
 SECURITY-RELEVANT FILES:
 {security_content}
 FILE STRUCTURE ANALYSIS:
 {json.dumps(repo_data['stats'], indent=2)}
 Please analyze:
 1. **Security Issues**: Potential security vulnerabilities or concerns
 2. **Secret Management**: How are secrets/credentials handled?
 3. **Dependencies**: Are there any vulnerable dependencies?
 4. **Best Practices**: Adherence to language/framework best practices
 5. **Configuration**: Are configurations properly externalized?
 6. **Error Handling**: How are errors handled throughout the codebase?
 7. **Recommendations**: Specific suggestions for improvement
 """
        return self.call_claude_api(prompt, max_tokens=5000)
    def generate_comprehensive_report(self, repo_url: str, overview: str, code_analysis: str, security_analysis: str) -> str:
        """Generate final comprehensive report."""
        print("Generating comprehensive report...")
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        report = f"""
 # GitHub Repository Analysis Report
 **Repository:** {repo_url}
 **Analysis Date:** {timestamp}
 **Analyzed by:** Claude AI Assistant
 ---
 ## Executive Summary
 {overview}
 ---
 ## Detailed Code Analysis
 {code_analysis}
 ---
 ## Security & Best Practices Analysis
 {security_analysis}
 ---
 ## Recommendations Summary
 Based on the analysis, here are the key recommendations for this repository:
 1. **Immediate Actions**: Critical issues that should be addressed promptly
 2. **Code Quality Improvements**: Suggestions for better maintainability
 3. **Security Enhancements**: Steps to improve security posture
 4. **Documentation**: Areas where documentation could be enhanced
 5. **Architecture**: Potential architectural improvements
 ---
 *This analysis was generated using AI and should be reviewed by human developers for accuracy and context.*
 """
        return report
    def analyze(self, repo_url: str, output_file: Optional[str] = None) -> str:
        """Main analysis function."""
        try:
            # Clone repository
            self.clone_repository(repo_url)
            # Scan repository structure and files
            repo_data = self.scan_repository()
            # Perform different types of analysis
            overview = self.analyze_repository_overview(repo_data)
            code_analysis = self.analyze_code_files(repo_data)
            security_analysis = self.analyze_security_and_best_practices(repo_data)
            # Generate comprehensive report
            final_report = self.generate_comprehensive_report(
                repo_url, overview, code_analysis, security_analysis
            )
            # Save report if output file specified
            if output_file:
                with open(output_file, 'w', encoding='utf-8') as f:
                    f.write(final_report)
                print(f"Report saved to: {output_file}")
            return final_report
        finally:
            # Cleanup temporary directory
            if self.temp_dir and os.path.exists(self.temp_dir):
                shutil.rmtree(self.temp_dir)
                print("Temporary files cleaned up")
 def main():
    parser = argparse.ArgumentParser(description="Analyze GitHub repository using Claude AI")
    parser.add_argument("repo_url", help="GitHub repository URL")
    parser.add_argument("--api-key", required=True, help="Anthropic API key")
    parser.add_argument("--output", "-o", help="Output file path (optional)")
    parser.add_argument("--max-files", type=int, default=50, help="Maximum files to analyze")
    args = parser.parse_args()
    # Initialize analyzer
    analyzer = GitHubRepoAnalyzer(args.api_key)
    try:
        print("Starting GitHub repository analysis...")
        print("=" * 50)
        # Perform analysis
        report = analyzer.analyze(args.repo_url, args.output)
        # Print report if no output file specified
        if not args.output:
            print("\n" + "=" * 50)
            print("ANALYSIS REPORT")
            print("=" * 50)
            print(report)
        print("\nAnalysis completed successfully!")
    except Exception as e:
        print(f"Error during analysis: {e}")
        return 1
    return 0
 if __name__ == "__main__":
    exit(main())
--- a/services/ai-analysis-service/ai-analysis/github_analyzer.py
+++ b/services/ai-analysis-service/ai-analysis/github_analyzer.py
@ -0,0 +1,391 @@
 #!/usr/bin/env python3
 """
 GitHub Repository AI Analysis Tool
 Analyzes GitHub repositories using Claude API for comprehensive code insights.
 """
 import os
 import git
 import json
 import tempfile
 import shutil
 from pathlib import Path
 from typing import Dict, List, Optional, Tuple
 import argparse
 from datetime import datetime
 import mimetypes
 import base64
 from dotenv import load_dotenv
 import anthropic
 class GitHubRepoAnalyzer:
    def __init__(self, anthropic_api_key: str = None):
        # Load environment variables
        load_dotenv()
        # Get API key from parameter or environment
        self.api_key = anthropic_api_key or os.getenv('ANTHROPIC_API_KEY')
        if not self.api_key:
            raise ValueError("Anthropic API key not found. Please set ANTHROPIC_API_KEY in .env file or pass as parameter.")
        # Initialize Anthropic client
        self.client = anthropic.Anthropic(api_key=self.api_key)
        self.temp_dir = None
        # File extensions to analyze
        self.code_extensions = {
            '.py', '.js', '.ts', '.jsx', '.tsx', '.java', '.cpp', '.c', '.h',
            '.cs', '.php', '.rb', '.go', '.rs', '.swift', '.kt', '.scala',
            '.html', '.css', '.scss', '.sass', '.less', '.vue', '.svelte',
            '.sql', '.sh', '.bash', '.yml', '.yaml', '.json', '.xml',
            '.dockerfile', '.md', '.rst', '.txt'
        }
        # Files to always include in analysis
        self.important_files = {
            'README.md', 'readme.md', 'README.txt', 'readme.txt',
            'package.json', 'requirements.txt', 'Cargo.toml', 'pom.xml',
            'build.gradle', 'Makefile', 'dockerfile', 'Dockerfile',
            'docker-compose.yml', '.gitignore', 'setup.py', 'pyproject.toml'
        }
    def clone_repository(self, repo_url: str) -> str:
        """Clone GitHub repository to temporary directory."""
        print(f"Cloning repository: {repo_url}")
        self.temp_dir = tempfile.mkdtemp(prefix="github_analysis_")
        try:
            git.Repo.clone_from(repo_url, self.temp_dir)
            print(f"Repository cloned to: {self.temp_dir}")
            return self.temp_dir
        except git.exc.GitCommandError as e:
            raise Exception(f"Failed to clone repository: {e}")
    def get_file_info(self, file_path: Path) -> Dict:
        """Get file information and content."""
        try:
            # Check file size (skip files larger than 1MB)
            if file_path.stat().st_size > 1024 * 1024:
                return {
                    'path': str(file_path.relative_to(self.temp_dir)),
                    'size': file_path.stat().st_size,
                    'content': '[File too large to analyze]',
                    'encoding': 'skipped'
                }
            # Try to read as text
            try:
                with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                    content = f.read()
                encoding = 'utf-8'
            except UnicodeDecodeError:
                # If text fails, try binary for certain file types
                with open(file_path, 'rb') as f:
                    raw_content = f.read()
                    if len(raw_content) < 10000:  # Only encode small binary files
                        content = base64.b64encode(raw_content).decode('ascii')
                        encoding = 'base64'
                    else:
                        content = '[Binary file - content not included]'
                        encoding = 'binary'
            return {
                'path': str(file_path.relative_to(self.temp_dir)),
                'size': file_path.stat().st_size,
                'content': content,
                'encoding': encoding,
                'mime_type': mimetypes.guess_type(str(file_path))[0]
            }
        except Exception as e:
            return {
                'path': str(file_path.relative_to(self.temp_dir)),
                'error': str(e),
                'content': '[Error reading file]'
            }
    def scan_repository(self, max_files: int = 50) -> Dict:
        """Scan repository and collect file information."""
        print("Scanning repository structure...")
        repo_data = {
            'structure': [],
            'files': [],
            'stats': {
                'total_files': 0,
                'analyzed_files': 0,
                'total_size': 0,
                'languages': {}
            }
        }
        # Get directory structure
        for root, dirs, files in os.walk(self.temp_dir):
            # Skip hidden directories and common build/cache directories
            dirs[:] = [d for d in dirs if not d.startswith('.') and 
                      d not in {'node_modules', '__pycache__', 'build', 'dist', 'target', 'venv', 'env'}]
            level = root.replace(self.temp_dir, '').count(os.sep)
            indent = '  ' * level
            folder_name = os.path.basename(root) if root != self.temp_dir else '.'
            repo_data['structure'].append(f"{indent}{folder_name}/")
            # Process files
            for file in files:
                if file.startswith('.'):
                    continue
                file_path = Path(root) / file
                repo_data['stats']['total_files'] += 1
                repo_data['stats']['total_size'] += file_path.stat().st_size
                # Track languages
                ext = file_path.suffix.lower()
                if ext:
                    repo_data['stats']['languages'][ext] = repo_data['stats']['languages'].get(ext, 0) + 1
                # Add to structure
                repo_data['structure'].append(f"{indent}  {file}")
                # Decide if we should analyze this file
                should_analyze = (
                    file.lower() in self.important_files or
                    ext in self.code_extensions or
                    repo_data['stats']['analyzed_files'] < max_files
                )
                if should_analyze and repo_data['stats']['analyzed_files'] < max_files:
                    file_info = self.get_file_info(file_path)
                    repo_data['files'].append(file_info)
                    repo_data['stats']['analyzed_files'] += 1
        return repo_data
    def call_claude_api(self, prompt: str, max_tokens: int = 4000) -> str:
        """Make API call to Claude using official Anthropic client."""
        try:
            message = self.client.messages.create(
                model="claude-3-sonnet-20240229",
                max_tokens=max_tokens,
                messages=[
                    {"role": "user", "content": prompt}
                ]
            )
            return message.content[0].text
        except Exception as e:
            raise Exception(f"Claude API call failed: {e}")
    def analyze_repository_overview(self, repo_data: Dict) -> str:
        """Get high-level repository analysis."""
        print("Analyzing repository overview...")
        structure_summary = "\n".join(repo_data['structure'][:100])  # Limit structure size
        prompt = f"""
 Analyze this GitHub repository and provide a comprehensive overview:
 REPOSITORY STRUCTURE:
 {structure_summary}
 STATISTICS:
 - Total files: {repo_data['stats']['total_files']}
 - Files analyzed: {repo_data['stats']['analyzed_files']}
 - Total size: {repo_data['stats']['total_size']} bytes
 - Languages found: {dict(list(repo_data['stats']['languages'].items())[:10])}
 Please provide:
 1. **Project Type & Purpose**: What kind of project is this?
 2. **Technology Stack**: What technologies, frameworks, and languages are used?
 3. **Architecture Overview**: How is the project structured?
 4. **Key Components**: What are the main modules/components?
 5. **Development Setup**: What's needed to run this project?
 6. **Code Quality Assessment**: Initial observations about code organization
 """
        return self.call_claude_api(prompt)
    def analyze_code_files(self, repo_data: Dict) -> str:
        """Analyze individual code files."""
        print("Analyzing code files...")
        # Prepare file contents for analysis
        files_content = []
        for file_info in repo_data['files'][:20]:  # Limit to first 20 files
            if file_info.get('encoding') == 'utf-8' and len(file_info.get('content', '')) < 5000:
                files_content.append(f"=== {file_info['path']} ===\n{file_info['content']}\n")
        files_text = "\n".join(files_content)
        prompt = f"""
 Analyze these key files from the repository:
 {files_text}
 Please provide detailed analysis covering:
 1. **Code Quality**: Code style, organization, and best practices
 2. **Design Patterns**: What patterns and architectural approaches are used?
 3. **Dependencies & Libraries**: Key external dependencies identified
 4. **Potential Issues**: Any code smells, security concerns, or improvements needed
 5. **Testing Strategy**: How is testing implemented (if at all)?
 6. **Documentation**: Quality of inline documentation and comments
 7. **Maintainability**: How maintainable and extensible is this code?
 """
        return self.call_claude_api(prompt, max_tokens=6000)
    def analyze_security_and_best_practices(self, repo_data: Dict) -> str:
        """Analyze security and best practices."""
        print("Analyzing security and best practices...")
        # Look for security-sensitive files
        security_files = []
        for file_info in repo_data['files']:
            path_lower = file_info['path'].lower()
            if any(term in path_lower for term in ['config', 'env', 'secret', 'key', 'auth', 'security']):
                if file_info.get('encoding') == 'utf-8':
                    security_files.append(f"=== {file_info['path']} ===\n{file_info['content'][:2000]}\n")
        security_content = "\n".join(security_files[:10])
        prompt = f"""
 Analyze this repository for security and best practices:
 SECURITY-RELEVANT FILES:
 {security_content}
 FILE STRUCTURE ANALYSIS:
 {json.dumps(repo_data['stats'], indent=2)}
 Please analyze:
 1. **Security Issues**: Potential security vulnerabilities or concerns
 2. **Secret Management**: How are secrets/credentials handled?
 3. **Dependencies**: Are there any vulnerable dependencies?
 4. **Best Practices**: Adherence to language/framework best practices
 5. **Configuration**: Are configurations properly externalized?
 6. **Error Handling**: How are errors handled throughout the codebase?
 7. **Recommendations**: Specific suggestions for improvement
 """
        return self.call_claude_api(prompt, max_tokens=5000)
    def generate_comprehensive_report(self, repo_url: str, overview: str, code_analysis: str, security_analysis: str) -> str:
        """Generate final comprehensive report."""
        print("Generating comprehensive report...")
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        report = f"""
 # GitHub Repository Analysis Report
 **Repository:** {repo_url}
 **Analysis Date:** {timestamp}
 **Analyzed by:** Claude AI Assistant
 ---
 ## Executive Summary
 {overview}
 ---
 ## Detailed Code Analysis
 {code_analysis}
 ---
 ## Security & Best Practices Analysis
 {security_analysis}
 ---
 ## Recommendations Summary
 Based on the analysis, here are the key recommendations for this repository:
 1. **Immediate Actions**: Critical issues that should be addressed promptly
 2. **Code Quality Improvements**: Suggestions for better maintainability
 3. **Security Enhancements**: Steps to improve security posture
 4. **Documentation**: Areas where documentation could be enhanced
 5. **Architecture**: Potential architectural improvements
 ---
 *This analysis was generated using AI and should be reviewed by human developers for accuracy and context.*
 """
        return report
    def analyze(self, repo_url: str, output_file: Optional[str] = None) -> str:
        """Main analysis function."""
        try:
            # Clone repository
            self.clone_repository(repo_url)
            # Scan repository structure and files
            repo_data = self.scan_repository()
            # Perform different types of analysis
            overview = self.analyze_repository_overview(repo_data)
            code_analysis = self.analyze_code_files(repo_data)
            security_analysis = self.analyze_security_and_best_practices(repo_data)
            # Generate comprehensive report
            final_report = self.generate_comprehensive_report(
                repo_url, overview, code_analysis, security_analysis
            )
            # Save report if output file specified
            if output_file:
                with open(output_file, 'w', encoding='utf-8') as f:
                    f.write(final_report)
                print(f"Report saved to: {output_file}")
            return final_report
        finally:
            # Cleanup temporary directory
            if self.temp_dir and os.path.exists(self.temp_dir):
                shutil.rmtree(self.temp_dir)
                print("Temporary files cleaned up")
 def main():
    parser = argparse.ArgumentParser(description="Analyze GitHub repository using Claude AI")
    parser.add_argument("repo_url", help="GitHub repository URL")
    parser.add_argument("--api-key", help="Anthropic API key (optional if set in .env)")
    parser.add_argument("--output", "-o", help="Output file path (optional)")
    parser.add_argument("--max-files", type=int, default=50, help="Maximum files to analyze")
    args = parser.parse_args()
    try:
        # Initialize analyzer
        analyzer = GitHubRepoAnalyzer(args.api_key)
        print("Starting GitHub repository analysis...")
        print("=" * 50)
        # Perform analysis
        report = analyzer.analyze(args.repo_url, args.output)
        # Print report if no output file specified
        if not args.output:
            print("\n" + "=" * 50)
            print("ANALYSIS REPORT")
            print("=" * 50)
            print(report)
        print("\nAnalysis completed successfully!")
    except Exception as e:
        print(f"Error during analysis: {e}")
        return 1
    return 0
 if __name__ == "__main__":
    exit(main())
--- a/services/ai-analysis-service/ai-analysis/requirements.txt
+++ b/services/ai-analysis-service/ai-analysis/requirements.txt
@ -0,0 +1,69 @@
 # Core AI and API
 anthropic>=0.7.0
 openai>=1.0.0
 # Environment management
 python-dotenv>=1.0.0
 # Git operations
 GitPython>=3.1.0
 # PDF generation
 reportlab>=4.0.0
 matplotlib>=3.7.0
 pillow>=10.0.0
 # Code analysis and parsing
 ast-comments>=1.1.0
 astroid>=3.0.0
 pygments>=2.15.0
 radon>=6.0.1
 bandit>=1.7.5
 flake8>=6.0.0
 pylint>=3.0.0
 # File operations and utilities
 pathlib2>=2.3.7
 chardet>=5.2.0
 python-magic>=0.4.27
 # Async operations
 aiohttp>=3.8.0
 aiofiles>=23.0.0
 asyncio-throttle>=1.0.2
 # Data processing
 pandas>=2.0.0
 numpy>=1.24.0
 python-dateutil>=2.8.0
 # Web scraping (for additional repo info)
 requests>=2.31.0
 beautifulsoup4>=4.12.0
 # Testing and code quality
 pytest>=7.4.0
 pytest-asyncio>=0.21.0
 coverage>=7.3.0
 # Additional utilities for advanced analysis
 networkx>=3.1.0  # For dependency graph analysis
 graphviz>=0.20.0  # For visualization
 jinja2>=3.1.0    # For report templating
 markdown>=3.4.0  # For markdown processing
 pyyaml>=6.0.0    # For YAML config files
 toml>=0.10.2     # For TOML config files
 xmltodict>=0.13.0  # For XML processing
 # Performance monitoring
 psutil>=5.9.0
 memory-profiler>=0.61.0
 # Progress bars and UI
 tqdm>=4.65.0
 rich>=13.5.0
 click>=8.1.0
 # Security scanning
 safety>=2.3.0
 pip-audit>=2.6.0
--- a/services/ai-analysis-service/ai-analyze.py
+++ b/services/ai-analysis-service/ai-analyze.py
--- a/services/ai-analysis-service/env.example
+++ b/services/ai-analysis-service/env.example
@ -0,0 +1,46 @@
 # AI Analysis Service Environment Configuration
 # Service Configuration
 PORT=8022
 HOST=0.0.0.0
 NODE_ENV=development
 # AI API Keys
 ANTHROPIC_API_KEY=your_anthropic_api_key_here
 # Database Configuration
 POSTGRES_HOST=localhost
 POSTGRES_PORT=5432
 POSTGRES_DB=dev_pipeline
 POSTGRES_USER=pipeline_admin
 POSTGRES_PASSWORD=secure_pipeline_2024
 # Redis Configuration
 REDIS_HOST=localhost
 REDIS_PORT=6379
 REDIS_PASSWORD=redis_secure_2024
 REDIS_DB=0
 # MongoDB Configuration
 MONGODB_URL=mongodb://pipeline_admin:mongo_secure_2024@localhost:27017/
 MONGODB_DB=repo_analyzer
 # JWT Configuration
 JWT_ACCESS_SECRET=access-secret-key-2024-tech4biz-secure_pipeline_2024
 # Service URLs
 USER_AUTH_SERVICE_URL=http://localhost:8011
 # Analysis Configuration
 MAX_FILES_PER_ANALYSIS=100
 MAX_FILE_SIZE_MB=2
 ANALYSIS_TIMEOUT_SECONDS=300
 # Memory System Configuration
 WORKING_MEMORY_TTL=3600
 EPISODIC_RETENTION_DAYS=365
 PERSISTENT_MEMORY_THRESHOLD=0.8
 # Logging Configuration
 LOG_LEVEL=INFO
 LOG_FILE_PATH=/app/logs/ai-analysis.log
--- a/services/ai-analysis-service/migrate.sh
+++ b/services/ai-analysis-service/migrate.sh
@ -0,0 +1,104 @@
 #!/bin/bash
 # Database Migration Script using psql
 # Executes the complete 001-schema.sql file
 set -e  # Exit on any error
 # Colors for output
 RED='\033[0;31m'
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 BLUE='\033[0;34m'
 NC='\033[0m' # No Color
 # Load environment variables
 if [ -f .env ]; then
    export $(cat .env | grep -v '^#' | xargs)
 fi
 # Database connection parameters
 DB_HOST=${POSTGRES_HOST:-localhost}
 DB_PORT=${POSTGRES_PORT:-5432}
 DB_NAME=${POSTGRES_DB:-dev_pipeline}
 DB_USER=${POSTGRES_USER:-pipeline_admin}
 DB_PASSWORD=${POSTGRES_PASSWORD:-secure_pipeline_2024}
 # Schema file
 SCHEMA_FILE="001-schema.sql"
 echo -e "${BLUE}🔧 AI Repository Analysis Database Migration${NC}"
 echo "=================================================="
 echo -e "Database: ${YELLOW}${DB_NAME}@${DB_HOST}:${DB_PORT}${NC}"
 echo -e "User: ${YELLOW}${DB_USER}${NC}"
 echo -e "Schema file: ${YELLOW}${SCHEMA_FILE}${NC}"
 echo ""
 # Check if psql is available
 if ! command -v psql &> /dev/null; then
    echo -e "${RED}❌ psql command not found!${NC}"
    echo "Please install PostgreSQL client tools:"
    echo "  Ubuntu/Debian: sudo apt-get install postgresql-client"
    echo "  CentOS/RHEL: sudo yum install postgresql"
    echo "  macOS: brew install postgresql"
    exit 1
 fi
 # Check if schema file exists
 if [ ! -f "$SCHEMA_FILE" ]; then
    echo -e "${RED}❌ Schema file not found: ${SCHEMA_FILE}${NC}"
    exit 1
 fi
 echo -e "${BLUE}• Executing migration...${NC}"
 # Set password for psql
 export PGPASSWORD="$DB_PASSWORD"
 # Run migration
 if psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" \
        -f "$SCHEMA_FILE" \
        -v ON_ERROR_STOP=1 \
        --echo-errors \
        --echo-queries; then
    echo -e "${GREEN}✅ Migration completed successfully!${NC}"
    # Verify migration
    echo -e "${BLUE}• Verifying migration...${NC}"
    TABLES=$(psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -c "
        SELECT table_name 
        FROM information_schema.tables 
        WHERE table_schema = 'public' 
        AND table_name IN ('code_embeddings', 'query_embeddings', 'knowledge_embeddings', 
                          'repository_metadata', 'analysis_sessions', 'file_analysis_history')
        ORDER BY table_name;
    " | tr -d ' ')
    if [ -n "$TABLES" ]; then
        TABLE_COUNT=$(echo "$TABLES" | wc -l)
        echo -e "${GREEN}✓ Found ${TABLE_COUNT} core tables: ${TABLES}${NC}"
    else
        echo -e "${YELLOW}⚠ Could not verify table creation${NC}"
    fi
    # Check for pgvector extension
    VECTOR_AVAILABLE=$(psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -c "
        SELECT EXISTS(SELECT 1 FROM pg_extension WHERE extname = 'vector');
    " | tr -d ' ')
    if [ "$VECTOR_AVAILABLE" = "t" ]; then
        echo -e "${GREEN}✓ pgvector extension is available${NC}"
    else
        echo -e "${YELLOW}⚠ pgvector extension not available - vector operations will be limited${NC}"
    fi
    echo ""
    echo -e "${GREEN}🚀 Database migration completed successfully!${NC}"
    echo -e "${GREEN}📊 Production-level database ready for AI repository analysis${NC}"
 else
    echo -e "${RED}❌ Migration failed!${NC}"
    exit 1
 fi
--- a/services/ai-analysis-service/migrate_database.py
+++ b/services/ai-analysis-service/migrate_database.py
@ -0,0 +1,203 @@
 #!/usr/bin/env python3
 """
 Database Migration Script using psql command
 Executes the complete 001-schema.sql file using PostgreSQL's psql command
 """
 import os
 import subprocess
 import sys
 from dotenv import load_dotenv
 import logging
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
 logger = logging.getLogger(__name__)
 def run_migration():
    """Run the database migration using psql command."""
    load_dotenv()
    # Database connection parameters
    db_config = {
        'host': os.getenv('POSTGRES_HOST', 'localhost'),
        'port': os.getenv('POSTGRES_PORT', 5432),
        'database': os.getenv('POSTGRES_DB', 'dev_pipeline'),
        'user': os.getenv('POSTGRES_USER', 'pipeline_admin'),
        'password': os.getenv('POSTGRES_PASSWORD', 'secure_pipeline_2024')
    }
    # Schema file path
    schema_file = os.path.join(os.path.dirname(__file__), '001-schema.sql')
    if not os.path.exists(schema_file):
        logger.error(f"❌ Schema file not found: {schema_file}")
        return False
    try:
        logger.info("🔧 Starting database migration with psql...")
        logger.info(f"  • Database: {db_config['database']}@{db_config['host']}:{db_config['port']}")
        logger.info(f"  • User: {db_config['user']}")
        logger.info(f"  • Schema file: {schema_file}")
        # Set PGPASSWORD environment variable for psql
        env = os.environ.copy()
        env['PGPASSWORD'] = db_config['password']
        # Build psql command
        psql_cmd = [
            'psql',
            '-h', db_config['host'],
            '-p', str(db_config['port']),
            '-U', db_config['user'],
            '-d', db_config['database'],
            '-f', schema_file,
            '-v', 'ON_ERROR_STOP=1',  # Stop on first error
            '--echo-errors',          # Show errors
            '--echo-queries'          # Show queries being executed
        ]
        logger.info("  • Executing migration...")
        logger.info(f"  • Command: {' '.join(psql_cmd)}")
        # Run psql command
        result = subprocess.run(
            psql_cmd,
            env=env,
            capture_output=True,
            text=True,
            timeout=300  # 5 minute timeout
        )
        # Check if psql command exists
        if result.returncode == 127:
            logger.error("❌ psql command not found. Please install PostgreSQL client tools.")
            logger.error("   On Ubuntu/Debian: sudo apt-get install postgresql-client")
            logger.error("   On CentOS/RHEL: sudo yum install postgresql")
            return False
        # Check for errors
        if result.returncode != 0:
            logger.error(f"❌ Migration failed with return code: {result.returncode}")
            if result.stderr:
                logger.error("STDERR:")
                logger.error(result.stderr)
            if result.stdout:
                logger.error("STDOUT:")
                logger.error(result.stdout)
            return False
        # Log success
        logger.info("✅ Migration completed successfully!")
        if result.stdout:
            logger.info("Migration output:")
            # Filter out common psql output noise
            lines = result.stdout.split('\n')
            for line in lines:
                if line.strip() and not line.startswith('SET') and not line.startswith('NOTICE'):
                    logger.info(f"  {line}")
        # Verify migration by checking if key tables exist
        logger.info("  • Verifying migration...")
        verify_cmd = [
            'psql',
            '-h', db_config['host'],
            '-p', str(db_config['port']),
            '-U', db_config['user'],
            '-d', db_config['database'],
            '-t',  # tuples only
            '-c', """
                SELECT table_name 
                FROM information_schema.tables 
                WHERE table_schema = 'public' 
                AND table_name IN ('code_embeddings', 'query_embeddings', 'knowledge_embeddings', 
                                  'repository_metadata', 'analysis_sessions', 'file_analysis_history')
                ORDER BY table_name;
            """
        ]
        verify_result = subprocess.run(
            verify_cmd,
            env=env,
            capture_output=True,
            text=True,
            timeout=30
        )
        if verify_result.returncode == 0:
            tables = [line.strip() for line in verify_result.stdout.split('\n') if line.strip()]
            logger.info(f"  ✓ Found {len(tables)} core tables: {', '.join(tables)}")
        else:
            logger.warning("  ⚠ Could not verify table creation")
        # Check for pgvector extension
        vector_cmd = [
            'psql',
            '-h', db_config['host'],
            '-p', str(db_config['port']),
            '-U', db_config['user'],
            '-d', db_config['database'],
            '-t',
            '-c', "SELECT EXISTS(SELECT 1 FROM pg_extension WHERE extname = 'vector');"
        ]
        vector_result = subprocess.run(
            vector_cmd,
            env=env,
            capture_output=True,
            text=True,
            timeout=30
        )
        if vector_result.returncode == 0:
            has_vector = vector_result.stdout.strip() == 't'
            if has_vector:
                logger.info("  ✓ pgvector extension is available")
            else:
                logger.warning("  ⚠ pgvector extension not available - vector operations will be limited")
        logger.info("🚀 Database migration completed successfully!")
        logger.info("📊 Production-level database ready for AI repository analysis")
        return True
    except subprocess.TimeoutExpired:
        logger.error("❌ Migration timed out after 5 minutes")
        return False
    except FileNotFoundError:
        logger.error("❌ psql command not found. Please install PostgreSQL client tools.")
        return False
    except Exception as e:
        logger.error(f"❌ Migration failed: {e}")
        return False
 def check_psql_available():
    """Check if psql command is available."""
    try:
        result = subprocess.run(['psql', '--version'], capture_output=True, text=True)
        if result.returncode == 0:
            logger.info(f"✓ Found psql: {result.stdout.strip()}")
            return True
        else:
            return False
    except FileNotFoundError:
        return False
 if __name__ == "__main__":
    logger.info("🔧 AI Repository Analysis Database Migration")
    logger.info("=" * 50)
    # Check if psql is available
    if not check_psql_available():
        logger.error("❌ psql command not found!")
        logger.error("Please install PostgreSQL client tools:")
        logger.error("  Ubuntu/Debian: sudo apt-get install postgresql-client")
        logger.error("  CentOS/RHEL: sudo yum install postgresql")
        logger.error("  macOS: brew install postgresql")
        sys.exit(1)
    # Run migration
    success = run_migration()
    sys.exit(0 if success else 1)
--- a/services/ai-analysis-service/requirements.txt
+++ b/services/ai-analysis-service/requirements.txt
@ -0,0 +1,25 @@
 # Core AI and API dependencies
 anthropic>=0.7.0
 python-dotenv>=1.0.0
 # Web framework
 fastapi>=0.104.1
 uvicorn>=0.24.0
 pydantic>=2.5.0
 # Git operations
 GitPython>=3.1.40
 # Database dependencies
 redis>=4.5.0
 pymongo>=4.5.0
 psycopg2-binary>=2.9.7
 # Data processing
 numpy>=1.24.0
 # PDF generation
 reportlab>=4.0.0
 # Optional: For better performance (if needed)
 # sentence-transformers>=2.2.2  # Commented out - using Claude API instead
--- a/services/ai-analysis-service/run_migration.py
+++ b/services/ai-analysis-service/run_migration.py
@ -0,0 +1,94 @@
 #!/usr/bin/env python3
 """
 AI Analysis Service Database Migration Runner
 Runs the database migration for AI Analysis Service during container startup.
 """
 import os
 import sys
 import subprocess
 import time
 from pathlib import Path
 def log(message):
    """Log with timestamp."""
    print(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] {message}")
 def check_database_connection():
    """Check if database is available."""
    try:
        import psycopg2
        from dotenv import load_dotenv
        load_dotenv()
        conn = psycopg2.connect(
            host=os.getenv('POSTGRES_HOST', 'localhost'),
            port=os.getenv('POSTGRES_PORT', 5432),
            database=os.getenv('POSTGRES_DB', 'dev_pipeline'),
            user=os.getenv('POSTGRES_USER', 'pipeline_admin'),
            password=os.getenv('POSTGRES_PASSWORD', 'secure_pipeline_2024')
        )
        conn.close()
        return True
    except Exception as e:
        log(f"Database connection failed: {e}")
        return False
 def run_migration():
    """Run the database migration."""
    try:
        log("Starting AI Analysis Service database migration...")
        # Check if database is available
        max_retries = 30
        retry_count = 0
        while retry_count < max_retries:
            if check_database_connection():
                log("Database connection successful")
                break
            else:
                retry_count += 1
                log(f"Database not ready, retrying in 2 seconds... ({retry_count}/{max_retries})")
                time.sleep(2)
        else:
            log("ERROR: Could not connect to database after 60 seconds")
            return False
        # Run the migration script
        schema_file = Path(__file__).parent / "001-schema.sql"
        if not schema_file.exists():
            log("ERROR: Schema file not found")
            return False
        log(f"Running migration from {schema_file}")
        # Use psql to run the migration
        env = os.environ.copy()
        env['PGPASSWORD'] = os.getenv('POSTGRES_PASSWORD', 'secure_pipeline_2024')
        result = subprocess.run([
            'psql',
            '-h', os.getenv('POSTGRES_HOST', 'localhost'),
            '-p', os.getenv('POSTGRES_PORT', '5432'),
            '-U', os.getenv('POSTGRES_USER', 'pipeline_admin'),
            '-d', os.getenv('POSTGRES_DB', 'dev_pipeline'),
            '-f', str(schema_file),
            '-v', 'ON_ERROR_STOP=1'
        ], env=env, capture_output=True, text=True)
        if result.returncode == 0:
            log("✅ AI Analysis Service database migration completed successfully")
            return True
        else:
            log(f"❌ Migration failed: {result.stderr}")
            return False
    except Exception as e:
        log(f"❌ Migration error: {e}")
        return False
 if __name__ == "__main__":
    success = run_migration()
    sys.exit(0 if success else 1)
--- a/services/ai-analysis-service/server.py
+++ b/services/ai-analysis-service/server.py
@ -0,0 +1,230 @@
 #!/usr/bin/env python3
 """
 AI Analysis Service HTTP Server
 Provides REST API endpoints for repository analysis.
 """
 import os
 import asyncio
 import json
 import tempfile
 import shutil
 from pathlib import Path
 from typing import Dict, Any
 from datetime import datetime
 from fastapi import FastAPI, HTTPException, BackgroundTasks
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import FileResponse
 from pydantic import BaseModel
 import uvicorn
 # Import the AI analysis components
 # Note: ai-analyze.py has a hyphen, so we need to handle the import specially
 import sys
 import importlib.util
 # Load the ai-analyze.py module
 spec = importlib.util.spec_from_file_location("ai_analyze", "/app/ai-analyze.py")
 ai_analyze_module = importlib.util.module_from_spec(spec)
 sys.modules["ai_analyze"] = ai_analyze_module
 spec.loader.exec_module(ai_analyze_module)
 # Now import the classes
 from ai_analyze import EnhancedGitHubAnalyzer, get_memory_config
 app = FastAPI(
    title="AI Analysis Service",
    description="AI-powered repository analysis with memory system",
    version="1.0.0"
 )
 # CORS middleware
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
 )
 # Global analyzer instance
 analyzer = None
 class AnalysisRequest(BaseModel):
    repo_path: str
    output_format: str = "pdf"  # pdf, json
    max_files: int = 50
 class AnalysisResponse(BaseModel):
    success: bool
    message: str
    analysis_id: str = None
    report_path: str = None
    stats: Dict[str, Any] = None
@app.on_event("startup")
 async def startup_event():
    """Initialize the analyzer on startup."""
    global analyzer
    try:
        # Load environment variables
        from dotenv import load_dotenv
        load_dotenv()
        # Get API key
        api_key = os.getenv('ANTHROPIC_API_KEY')
        if not api_key:
            raise Exception("ANTHROPIC_API_KEY not found in environment")
        # Initialize analyzer
        config = get_memory_config()
        analyzer = EnhancedGitHubAnalyzer(api_key, config)
        print("✅ AI Analysis Service initialized successfully")
    except Exception as e:
        print(f"❌ Failed to initialize AI Analysis Service: {e}")
        raise
@app.get("/health")
 async def health_check():
    """Health check endpoint."""
    return {
        "status": "healthy",
        "service": "ai-analysis-service",
        "timestamp": datetime.now().isoformat(),
        "version": "1.0.0"
    }
@app.post("/analyze", response_model=AnalysisResponse)
 async def analyze_repository(request: AnalysisRequest, background_tasks: BackgroundTasks):
    """Analyze a repository."""
    try:
        if not analyzer:
            raise HTTPException(status_code=500, detail="Analyzer not initialized")
        # Generate unique analysis ID
        analysis_id = f"analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
        # Create temporary directory for this analysis
        temp_dir = tempfile.mkdtemp(prefix=f"ai_analysis_{analysis_id}_")
        try:
            # Run analysis
            analysis = await analyzer.analyze_repository_with_memory(
                request.repo_path, 
                max_files=request.max_files
            )
            # Generate report
            if request.output_format == "pdf":
                report_path = f"/app/reports/{analysis_id}_analysis.pdf"
                analyzer.create_pdf_report(analysis, report_path)
            else:
                report_path = f"/app/reports/{analysis_id}_analysis.json"
                with open(report_path, 'w') as f:
                    json.dump({
                        "repo_path": analysis.repo_path,
                        "total_files": analysis.total_files,
                        "total_lines": analysis.total_lines,
                        "languages": analysis.languages,
                        "code_quality_score": analysis.code_quality_score,
                        "architecture_assessment": analysis.architecture_assessment,
                        "security_assessment": analysis.security_assessment,
                        "executive_summary": analysis.executive_summary,
                        "file_analyses": [
                            {
                                "path": fa.path,
                                "language": fa.language,
                                "lines_of_code": fa.lines_of_code,
                                "severity_score": fa.severity_score,
                                "issues_found": fa.issues_found,
                                "recommendations": fa.recommendations
                            } for fa in analysis.file_analyses
                        ]
                    }, f, indent=2)
            # Calculate stats
            stats = {
                "total_files": analysis.total_files,
                "total_lines": analysis.total_lines,
                "languages": analysis.languages,
                "code_quality_score": analysis.code_quality_score,
                "high_quality_files": len([fa for fa in analysis.file_analyses if fa.severity_score >= 8]),
                "medium_quality_files": len([fa for fa in analysis.file_analyses if 5 <= fa.severity_score < 8]),
                "low_quality_files": len([fa for fa in analysis.file_analyses if fa.severity_score < 5]),
                "total_issues": sum(len(fa.issues_found) for fa in analysis.file_analyses)
            }
            return AnalysisResponse(
                success=True,
                message="Analysis completed successfully",
                analysis_id=analysis_id,
                report_path=report_path,
                stats=stats
            )
        finally:
            # Cleanup temporary directory
            if os.path.exists(temp_dir):
                shutil.rmtree(temp_dir)
    except Exception as e:
        return AnalysisResponse(
            success=False,
            message=f"Analysis failed: {str(e)}",
            analysis_id=None,
            report_path=None,
            stats=None
        )
@app.get("/reports/{filename}")
 async def download_report(filename: str):
    """Download analysis report."""
    report_path = f"/app/reports/{filename}"
    if not os.path.exists(report_path):
        raise HTTPException(status_code=404, detail="Report not found")
    return FileResponse(
        report_path,
        media_type='application/octet-stream',
        filename=filename
    )
@app.get("/memory/stats")
 async def get_memory_stats():
    """Get memory system statistics."""
    try:
        if not analyzer:
            raise HTTPException(status_code=500, detail="Analyzer not initialized")
        stats = await analyzer.memory_manager.get_memory_stats()
        return {
            "success": True,
            "memory_stats": stats
        }
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Failed to get memory stats: {str(e)}")
@app.post("/memory/query")
 async def query_memory(query: str, repo_context: str = ""):
    """Query the memory system."""
    try:
        if not analyzer:
            raise HTTPException(status_code=500, detail="Analyzer not initialized")
        result = await analyzer.query_memory(query, repo_context)
        return {
            "success": True,
            "query": query,
            "result": result
        }
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Memory query failed: {str(e)}")
 if __name__ == "__main__":
    port = int(os.getenv('PORT', 8022))
    host = os.getenv('HOST', '0.0.0.0')
    print(f"🚀 Starting AI Analysis Service on {host}:{port}")
    uvicorn.run(app, host=host, port=port)
--- a/services/api-gateway/src/server.js
+++ b/services/api-gateway/src/server.js
@ -68,6 +68,7 @@ const serviceTargets = {
  DASHBOARD_URL: process.env.DASHBOARD_URL || 'http://localhost:8008',
  SELF_IMPROVING_GENERATOR_URL: process.env.SELF_IMPROVING_GENERATOR_URL || 'http://localhost:8007',
  AI_MOCKUP_URL: process.env.AI_MOCKUP_URL || 'http://localhost:8021',
  AI_ANALYSIS_URL: process.env.AI_ANALYSIS_URL || 'http://localhost:8022',
 };
 // Log service targets for debugging
@ -1984,6 +1985,76 @@ app.use('/api/mockup',
  }
 );
 // AI Analysis Service - Direct HTTP forwarding
 console.log('🔧 Registering /api/ai-analysis proxy route...');
 app.use('/api/ai-analysis',
  createServiceLimiter(200),
  // Allow unauthenticated access for AI analysis (public feature)
  (req, res, next) => {
    console.log(`🤖 [AI ANALYSIS PROXY] ${req.method} ${req.originalUrl}`);
    return next();
  },
  (req, res, next) => {
    const aiAnalysisServiceUrl = serviceTargets.AI_ANALYSIS_URL;
    // Strip the /api/ai-analysis prefix so /api/ai-analysis/analyze -> /analyze at target
    const rewrittenPath = (req.originalUrl || '').replace(/^\/api\/ai-analysis/, '');
    const targetUrl = `${aiAnalysisServiceUrl}${rewrittenPath}`;
    console.log(`🔥 [AI ANALYSIS PROXY] ${req.method} ${req.originalUrl} → ${targetUrl}`);
    res.setTimeout(300000, () => { // 5 minutes timeout for analysis
      console.error('❌ [AI ANALYSIS PROXY] Response timeout');
      if (!res.headersSent) {
        res.status(504).json({ error: 'Gateway timeout', service: 'ai-analysis' });
      }
    });
    const options = {
      method: req.method,
      url: targetUrl,
      headers: {
        'Content-Type': 'application/json',
        'User-Agent': 'API-Gateway/1.0',
        'Connection': 'keep-alive',
        'Authorization': req.headers.authorization,
        'X-User-ID': req.user?.id || req.user?.userId,
        ...(req.user?.role && { 'X-User-Role': req.user.role })
      },
      timeout: 240000, // 4 minutes timeout
      validateStatus: () => true,
      maxRedirects: 0,
      maxContentLength: 100 * 1024 * 1024, // 100MB max content length
      maxBodyLength: 100 * 1024 * 1024 // 100MB max body length
    };
    if (req.method === 'POST' || req.method === 'PUT' || req.method === 'PATCH') {
      options.data = req.body || {};
      console.log(`📦 [AI ANALYSIS PROXY] Request body:`, JSON.stringify(req.body));
    }
    axios(options)
      .then(response => {
        console.log(`✅ [AI ANALYSIS PROXY] Response: ${response.status} for ${req.method} ${req.originalUrl}`);
        if (!res.headersSent) {
          res.status(response.status).json(response.data);
        }
      })
      .catch(error => {
        console.error(`❌ [AI ANALYSIS PROXY ERROR]:`, error.message);
        if (!res.headersSent) {
          if (error.response) {
            res.status(error.response.status).json(error.response.data);
          } else {
            res.status(502).json({
              error: 'AI Analysis service unavailable',
              message: error.code || error.message,
              service: 'ai-analysis'
            });
          }
        }
      });
  }
 );
 // Gateway management endpoints
 app.get('/api/gateway/info', authMiddleware.verifyToken, (req, res) => {
  res.json({
@ -2041,9 +2112,10 @@ app.get('/', (req, res) => {
      deploy: '/api/deploy',
      dashboard: '/api/dashboard',
      self_improving: '/api/self-improving',
-  mockup: '/api/mockup',
+      mockup: '/api/mockup',
-  unison: '/api/unison',
+      ai_analysis: '/api/ai-analysis',
-  unified: '/api/recommendations'
+      unison: '/api/unison',
      unified: '/api/recommendations'
    },
    websocket: {
      endpoint: '/socket.io/',