chenges in the frontend

This commit is contained in:
Pradeep 2025-10-16 10:52:33 +05:30
parent 5e39839d42
commit b3a6bb8fdc
19 changed files with 5507 additions and 13 deletions

View File

@ -4,7 +4,7 @@ services:
# =====================================
postgres:
image: postgres:15
image: pgvector/pgvector:pg15
container_name: pipeline_postgres
environment:
POSTGRES_USER: pipeline_admin
@ -31,7 +31,7 @@ services:
volumes:
- redis_data:/data
ports:
- "6379:6379"
- "6380:6379"
networks:
- pipeline_network
healthcheck:
@ -714,6 +714,55 @@ services:
timeout: 10s
retries: 3
start_period: 40s
# =====================================
# AI Analysis Service
# =====================================
ai-analysis-service:
build: ./services/ai-analysis-service
container_name: pipeline_ai_analysis_service
ports:
- "8022:8022"
environment:
- PORT=8022
- HOST=0.0.0.0
- ANTHROPIC_API_KEY=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
- POSTGRES_HOST=postgres
- POSTGRES_PORT=5432
- POSTGRES_DB=dev_pipeline
- POSTGRES_USER=pipeline_admin
- POSTGRES_PASSWORD=secure_pipeline_2024
- REDIS_HOST=redis
- REDIS_PORT=6379
- REDIS_PASSWORD=redis_secure_2024
- MONGODB_URL=mongodb://pipeline_admin:mongo_secure_2024@mongodb:27017/
- MONGODB_DB=repo_analyzer
- JWT_ACCESS_SECRET=access-secret-key-2024-tech4biz-secure_pipeline_2024
- USER_AUTH_SERVICE_URL=http://user-auth:8011
- PYTHONUNBUFFERED=1
volumes:
- ai_analysis_logs:/app/logs
- ai_analysis_reports:/app/reports
- ai_analysis_temp:/app/temp
networks:
- pipeline_network
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
mongodb:
condition: service_started
migrations:
condition: service_completed_successfully
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8022/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 60s
restart: unless-stopped
# =====================================
# Workflow Orchestration
# =====================================
@ -827,6 +876,12 @@ volumes:
driver: local
migration_state:
driver: local
ai_analysis_logs:
driver: local
ai_analysis_reports:
driver: local
ai_analysis_temp:
driver: local
# =====================================
# Networks
@ -834,11 +889,3 @@ volumes:
networks:
pipeline_network:
driver: bridge
# =====================================
# Self-Improving Code Generator
# =====================================
# =====================================
# Self-Improving Code Generator
# =====================================

95
fix_provider_names.sql Normal file
View File

@ -0,0 +1,95 @@
-- Fix provider_name based on repository URLs across ALL tables
-- This script updates the provider_name field to match the actual provider from the repository URL
-- =============================================
-- 1. Fix all_repositories table
-- =============================================
UPDATE all_repositories
SET provider_name = 'github'
WHERE repository_url LIKE '%github.com%'
OR repository_url LIKE '%github.io%';
UPDATE all_repositories
SET provider_name = 'gitlab'
WHERE repository_url LIKE '%gitlab.com%'
OR repository_url LIKE '%gitlab.io%';
UPDATE all_repositories
SET provider_name = 'bitbucket'
WHERE repository_url LIKE '%bitbucket.org%'
OR repository_url LIKE '%bitbucket.io%';
UPDATE all_repositories
SET provider_name = 'gitea'
WHERE repository_url LIKE '%gitea.com%'
OR repository_url LIKE '%gitea.io%';
-- =============================================
-- 2. Fix repository_storage table (linked to all_repositories)
-- =============================================
UPDATE repository_storage
SET provider_name = ar.provider_name
FROM all_repositories ar
WHERE repository_storage.repository_id = ar.id;
-- =============================================
-- 3. Fix repository_commit_details table (linked to all_repositories)
-- =============================================
UPDATE repository_commit_details
SET provider_name = ar.provider_name
FROM all_repositories ar
WHERE repository_commit_details.repository_id = ar.id;
-- =============================================
-- 4. Fix repository_commit_files table (linked to all_repositories)
-- =============================================
UPDATE repository_commit_files
SET provider_name = ar.provider_name
FROM all_repositories ar
WHERE repository_commit_files.repository_id = ar.id;
-- =============================================
-- 5. Fix repository_directories table (linked to all_repositories)
-- =============================================
UPDATE repository_directories
SET provider_name = ar.provider_name
FROM all_repositories ar
WHERE repository_directories.repository_id = ar.id;
-- =============================================
-- 6. Fix repository_files table (linked to all_repositories)
-- =============================================
UPDATE repository_files
SET provider_name = ar.provider_name
FROM all_repositories ar
WHERE repository_files.repository_id = ar.id;
-- =============================================
-- 7. Show results for verification
-- =============================================
-- Show all_repositories results
SELECT
'all_repositories' as table_name,
repository_url,
repository_name,
owner_name,
provider_name,
CASE
WHEN repository_url LIKE '%github.com%' OR repository_url LIKE '%github.io%' THEN 'github'
WHEN repository_url LIKE '%gitlab.com%' OR repository_url LIKE '%gitlab.io%' THEN 'gitlab'
WHEN repository_url LIKE '%bitbucket.org%' OR repository_url LIKE '%bitbucket.io%' THEN 'bitbucket'
WHEN repository_url LIKE '%gitea.com%' OR repository_url LIKE '%gitea.io%' THEN 'gitea'
ELSE 'unknown'
END as detected_provider
FROM all_repositories
ORDER BY provider_name, repository_name;
-- Show summary counts by provider
SELECT
'Summary by Provider' as info,
provider_name,
COUNT(*) as count
FROM all_repositories
GROUP BY provider_name
ORDER BY provider_name;

View File

@ -0,0 +1,613 @@
-- ================================================
-- Repository Analyzer Memory System Database Migration
-- Version: 1.0
-- Description: Complete database setup for AI memory system
-- ================================================
-- Enable required extensions
CREATE EXTENSION IF NOT EXISTS vector;
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
-- ================================================
-- CORE TABLES
-- ================================================
-- Code embeddings table for semantic search of analyzed code
CREATE TABLE IF NOT EXISTS code_embeddings (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
repo_id VARCHAR(255) NOT NULL,
file_path TEXT NOT NULL,
content_hash VARCHAR(64) NOT NULL,
embedding vector(384) NOT NULL,
metadata JSONB DEFAULT '{}',
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_accessed TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
access_count INTEGER DEFAULT 0,
-- Ensure uniqueness per repo/file/hash combination
CONSTRAINT unique_code_analysis UNIQUE(repo_id, file_path, content_hash)
);
-- Query embeddings for episodic memory (user interactions)
CREATE TABLE IF NOT EXISTS query_embeddings (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
session_id VARCHAR(255) NOT NULL,
query_text TEXT NOT NULL,
query_embedding vector(384) NOT NULL,
response_embedding vector(384),
repo_context VARCHAR(255),
timestamp TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
metadata JSONB DEFAULT '{}',
-- Index for session-based queries
CONSTRAINT valid_session_id CHECK (LENGTH(session_id) > 0)
);
-- Persistent knowledge embeddings for long-term learning
CREATE TABLE IF NOT EXISTS knowledge_embeddings (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
fact_id VARCHAR(255) UNIQUE NOT NULL,
content TEXT NOT NULL,
category VARCHAR(100) NOT NULL,
embedding vector(384) NOT NULL,
confidence REAL DEFAULT 1.0 CHECK (confidence >= 0.0 AND confidence <= 1.0),
source_repos TEXT[] DEFAULT '{}',
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
last_accessed TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
access_frequency INTEGER DEFAULT 0,
-- Ensure valid categories
CONSTRAINT valid_category CHECK (category IN ('code_pattern', 'best_practice', 'vulnerability', 'architecture', 'security_vulnerability', 'performance'))
);
-- Repository metadata for tracking analyzed repositories
CREATE TABLE IF NOT EXISTS repository_metadata (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
repo_id VARCHAR(255) UNIQUE NOT NULL,
repo_path TEXT NOT NULL,
repo_name VARCHAR(500),
primary_language VARCHAR(100),
total_files INTEGER DEFAULT 0,
total_lines INTEGER DEFAULT 0,
last_analyzed TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
analysis_count INTEGER DEFAULT 0,
quality_score REAL DEFAULT 5.0 CHECK (quality_score >= 0.0 AND quality_score <= 10.0),
metadata JSONB DEFAULT '{}'
);
-- Session tracking for episodic memory correlation
CREATE TABLE IF NOT EXISTS analysis_sessions (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
session_id VARCHAR(255) UNIQUE NOT NULL,
user_identifier VARCHAR(255),
start_time TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
end_time TIMESTAMP WITH TIME ZONE,
total_queries INTEGER DEFAULT 0,
repositories_analyzed TEXT[] DEFAULT '{}',
session_metadata JSONB DEFAULT '{}'
);
-- File analysis history for change tracking
CREATE TABLE IF NOT EXISTS file_analysis_history (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
repo_id VARCHAR(255) NOT NULL,
file_path TEXT NOT NULL,
content_hash VARCHAR(64) NOT NULL,
language VARCHAR(100),
lines_of_code INTEGER DEFAULT 0,
complexity_score REAL DEFAULT 0.0,
severity_score REAL DEFAULT 5.0 CHECK (severity_score >= 0.0 AND severity_score <= 10.0),
issues_count INTEGER DEFAULT 0,
analyzed_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
analysis_version VARCHAR(50) DEFAULT '1.0'
);
-- Memory consolidation log for tracking knowledge extraction
CREATE TABLE IF NOT EXISTS memory_consolidation_log (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
source_type VARCHAR(50) NOT NULL, -- 'episodic', 'code_analysis', 'manual'
source_id VARCHAR(255) NOT NULL,
target_memory_type VARCHAR(50) NOT NULL, -- 'persistent', 'working'
target_id VARCHAR(255),
consolidation_confidence REAL DEFAULT 0.5,
consolidation_timestamp TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
consolidation_metadata JSONB DEFAULT '{}'
);
-- ================================================
-- PERFORMANCE INDEXES
-- ================================================
-- Code embeddings indexes
CREATE INDEX IF NOT EXISTS idx_code_embeddings_repo_id ON code_embeddings(repo_id);
CREATE INDEX IF NOT EXISTS idx_code_embeddings_file_path ON code_embeddings(file_path);
CREATE INDEX IF NOT EXISTS idx_code_embeddings_accessed ON code_embeddings(last_accessed DESC);
CREATE INDEX IF NOT EXISTS idx_code_embeddings_metadata ON code_embeddings USING gin(metadata);
-- Vector similarity indexes (using IVFFlat for better performance)
CREATE INDEX IF NOT EXISTS idx_code_embeddings_vector
ON code_embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
-- Query embeddings indexes
CREATE INDEX IF NOT EXISTS idx_query_embeddings_session ON query_embeddings(session_id);
CREATE INDEX IF NOT EXISTS idx_query_embeddings_timestamp ON query_embeddings(timestamp DESC);
CREATE INDEX IF NOT EXISTS idx_query_embeddings_repo_context ON query_embeddings(repo_context);
CREATE INDEX IF NOT EXISTS idx_query_embeddings_vector
ON query_embeddings USING ivfflat (query_embedding vector_cosine_ops) WITH (lists = 100);
-- Knowledge embeddings indexes
CREATE INDEX IF NOT EXISTS idx_knowledge_embeddings_category ON knowledge_embeddings(category);
CREATE INDEX IF NOT EXISTS idx_knowledge_embeddings_confidence ON knowledge_embeddings(confidence DESC);
CREATE INDEX IF NOT EXISTS idx_knowledge_embeddings_access_freq ON knowledge_embeddings(access_frequency DESC);
CREATE INDEX IF NOT EXISTS idx_knowledge_embeddings_vector
ON knowledge_embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
CREATE INDEX IF NOT EXISTS idx_knowledge_source_repos ON knowledge_embeddings USING gin(source_repos);
-- Repository metadata indexes
CREATE INDEX IF NOT EXISTS idx_repository_metadata_repo_id ON repository_metadata(repo_id);
CREATE INDEX IF NOT EXISTS idx_repository_metadata_analyzed ON repository_metadata(last_analyzed DESC);
CREATE INDEX IF NOT EXISTS idx_repository_metadata_language ON repository_metadata(primary_language);
-- File history indexes
CREATE INDEX IF NOT EXISTS idx_file_history_repo_file ON file_analysis_history(repo_id, file_path);
CREATE INDEX IF NOT EXISTS idx_file_history_analyzed ON file_analysis_history(analyzed_at DESC);
CREATE INDEX IF NOT EXISTS idx_file_history_severity ON file_analysis_history(severity_score);
-- ================================================
-- MATERIALIZED VIEWS FOR COMMON QUERIES
-- ================================================
-- High confidence knowledge view
CREATE MATERIALIZED VIEW IF NOT EXISTS high_confidence_knowledge AS
SELECT
fact_id,
content,
category,
confidence,
source_repos,
created_at,
last_accessed,
access_frequency
FROM knowledge_embeddings
WHERE confidence > 0.8
ORDER BY confidence DESC, access_frequency DESC;
CREATE INDEX ON high_confidence_knowledge (category);
CREATE INDEX ON high_confidence_knowledge (confidence DESC);
-- Repository quality summary view
CREATE MATERIALIZED VIEW IF NOT EXISTS repository_quality_summary AS
SELECT
rm.repo_id,
rm.repo_path,
rm.repo_name,
rm.primary_language,
rm.total_files,
rm.total_lines,
rm.quality_score,
rm.last_analyzed,
COUNT(ce.id) as total_embeddings,
AVG(fah.severity_score) as avg_file_quality,
COUNT(DISTINCT fah.file_path) as analyzed_files_count
FROM repository_metadata rm
LEFT JOIN code_embeddings ce ON rm.repo_id = ce.repo_id
LEFT JOIN file_analysis_history fah ON rm.repo_id = fah.repo_id
GROUP BY rm.repo_id, rm.repo_path, rm.repo_name, rm.primary_language,
rm.total_files, rm.total_lines, rm.quality_score, rm.last_analyzed;
CREATE INDEX ON repository_quality_summary (quality_score DESC);
CREATE INDEX ON repository_quality_summary (last_analyzed DESC);
-- Recent activity view
CREATE MATERIALIZED VIEW IF NOT EXISTS recent_activity AS
SELECT
'query' as activity_type,
session_id as identifier,
query_text as description,
timestamp as activity_time,
repo_context
FROM query_embeddings
WHERE timestamp >= CURRENT_TIMESTAMP - INTERVAL '7 days'
UNION ALL
SELECT
'analysis' as activity_type,
repo_id as identifier,
file_path as description,
analyzed_at as activity_time,
repo_id as repo_context
FROM file_analysis_history
WHERE analyzed_at >= CURRENT_TIMESTAMP - INTERVAL '7 days'
ORDER BY activity_time DESC;
CREATE INDEX ON recent_activity (activity_time DESC);
CREATE INDEX ON recent_activity (activity_type);
-- ================================================
-- STORED FUNCTIONS AND PROCEDURES
-- ================================================
-- Function to refresh all materialized views
CREATE OR REPLACE FUNCTION refresh_memory_views()
RETURNS void AS $$
BEGIN
REFRESH MATERIALIZED VIEW CONCURRENTLY high_confidence_knowledge;
REFRESH MATERIALIZED VIEW CONCURRENTLY repository_quality_summary;
REFRESH MATERIALIZED VIEW CONCURRENTLY recent_activity;
-- Log the refresh
INSERT INTO memory_consolidation_log (
source_type, source_id, target_memory_type, target_id,
consolidation_confidence, consolidation_metadata
) VALUES (
'system', 'materialized_views', 'system', 'view_refresh',
1.0, '{"refresh_time": "' || CURRENT_TIMESTAMP || '"}'::jsonb
);
END;
$$ LANGUAGE plpgsql;
-- Function to calculate semantic similarity between texts
CREATE OR REPLACE FUNCTION calculate_similarity(embedding1 vector(384), embedding2 vector(384))
RETURNS real AS $$
BEGIN
RETURN 1 - (embedding1 <=> embedding2);
END;
$$ LANGUAGE plpgsql IMMUTABLE STRICT;
-- Function to update access patterns
CREATE OR REPLACE FUNCTION update_access_pattern(table_name text, id_column text, id_value text)
RETURNS void AS $$
BEGIN
CASE table_name
WHEN 'knowledge_embeddings' THEN
EXECUTE 'UPDATE knowledge_embeddings SET last_accessed = CURRENT_TIMESTAMP, access_frequency = access_frequency + 1 WHERE fact_id = $1'
USING id_value;
WHEN 'code_embeddings' THEN
EXECUTE 'UPDATE code_embeddings SET last_accessed = CURRENT_TIMESTAMP, access_count = access_count + 1 WHERE id = $1::uuid'
USING id_value;
ELSE
RAISE EXCEPTION 'Unsupported table: %', table_name;
END CASE;
END;
$$ LANGUAGE plpgsql;
-- Function to cleanup old memories
CREATE OR REPLACE FUNCTION cleanup_old_memories(retention_days integer DEFAULT 365)
RETURNS integer AS $$
DECLARE
deleted_count integer := 0;
cutoff_date timestamp;
BEGIN
cutoff_date := CURRENT_TIMESTAMP - (retention_days || ' days')::interval;
-- Delete old query embeddings (episodic memories)
DELETE FROM query_embeddings WHERE timestamp < cutoff_date;
GET DIAGNOSTICS deleted_count = ROW_COUNT;
-- Update knowledge confidence based on access patterns
UPDATE knowledge_embeddings
SET confidence = LEAST(confidence * (
CASE
WHEN EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - last_accessed)) / 86400 < 30
THEN 1.05
ELSE 0.98
END *
(1.0 + LOG(access_frequency + 1) / 20.0)
), 1.0);
-- Log cleanup activity
INSERT INTO memory_consolidation_log (
source_type, source_id, target_memory_type, target_id,
consolidation_confidence, consolidation_metadata
) VALUES (
'system', 'cleanup_function', 'system', 'memory_cleanup',
1.0, ('{"deleted_records": ' || deleted_count || ', "cutoff_date": "' || cutoff_date || '"}')::jsonb
);
RETURN deleted_count;
END;
$$ LANGUAGE plpgsql;
-- Function to find similar code patterns
CREATE OR REPLACE FUNCTION find_similar_code(
query_embedding vector(384),
repo_filter text DEFAULT NULL,
similarity_threshold real DEFAULT 0.7,
max_results integer DEFAULT 10
)
RETURNS TABLE (
id uuid,
repo_id varchar(255),
file_path text,
similarity real,
metadata jsonb
) AS $$
BEGIN
RETURN QUERY
SELECT
ce.id,
ce.repo_id,
ce.file_path,
(1 - (ce.embedding <=> query_embedding))::real as similarity,
ce.metadata
FROM code_embeddings ce
WHERE (repo_filter IS NULL OR ce.repo_id = repo_filter)
AND (1 - (ce.embedding <=> query_embedding)) > similarity_threshold
ORDER BY similarity DESC
LIMIT max_results;
END;
$$ LANGUAGE plpgsql;
-- Function to get knowledge by category
CREATE OR REPLACE FUNCTION get_knowledge_by_category(
category_filter varchar(100),
min_confidence real DEFAULT 0.5,
max_results integer DEFAULT 20
)
RETURNS TABLE (
fact_id varchar(255),
content text,
confidence real,
access_frequency integer,
source_repos text[]
) AS $$
BEGIN
RETURN QUERY
SELECT
ke.fact_id,
ke.content,
ke.confidence,
ke.access_frequency,
ke.source_repos
FROM knowledge_embeddings ke
WHERE ke.category = category_filter
AND ke.confidence >= min_confidence
ORDER BY ke.confidence DESC, ke.access_frequency DESC
LIMIT max_results;
END;
$$ LANGUAGE plpgsql;
-- ================================================
-- TRIGGERS FOR AUTOMATIC MAINTENANCE
-- ================================================
-- Trigger function to update repository metadata when embeddings are added
CREATE OR REPLACE FUNCTION update_repository_stats()
RETURNS trigger AS $$
BEGIN
-- Update or insert repository metadata
INSERT INTO repository_metadata (repo_id, repo_path, analysis_count, last_analyzed)
VALUES (NEW.repo_id, NEW.repo_id, 1, CURRENT_TIMESTAMP)
ON CONFLICT (repo_id)
DO UPDATE SET
analysis_count = repository_metadata.analysis_count + 1,
last_analyzed = CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Create triggers
DROP TRIGGER IF EXISTS trigger_update_repo_stats ON code_embeddings;
CREATE TRIGGER trigger_update_repo_stats
AFTER INSERT ON code_embeddings
FOR EACH ROW
EXECUTE FUNCTION update_repository_stats();
-- Trigger to automatically update access patterns
CREATE OR REPLACE FUNCTION auto_update_access()
RETURNS trigger AS $$
BEGIN
NEW.last_accessed = CURRENT_TIMESTAMP;
NEW.access_count = COALESCE(OLD.access_count, 0) + 1;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
DROP TRIGGER IF EXISTS trigger_auto_access_update ON code_embeddings;
CREATE TRIGGER trigger_auto_access_update
BEFORE UPDATE ON code_embeddings
FOR EACH ROW
EXECUTE FUNCTION auto_update_access();
-- ================================================
-- SECURITY AND PERMISSIONS
-- ================================================
-- Create roles for different access levels
DO $$
BEGIN
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'repo_analyzer_read') THEN
CREATE ROLE repo_analyzer_read;
END IF;
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'repo_analyzer_write') THEN
CREATE ROLE repo_analyzer_write;
END IF;
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'repo_analyzer_admin') THEN
CREATE ROLE repo_analyzer_admin;
END IF;
END
$$;
-- Grant permissions
GRANT SELECT ON ALL TABLES IN SCHEMA public TO repo_analyzer_read;
GRANT SELECT ON high_confidence_knowledge TO repo_analyzer_read;
GRANT SELECT ON repository_quality_summary TO repo_analyzer_read;
GRANT SELECT ON recent_activity TO repo_analyzer_read;
GRANT SELECT, INSERT, UPDATE ON ALL TABLES IN SCHEMA public TO repo_analyzer_write;
GRANT SELECT ON high_confidence_knowledge TO repo_analyzer_write;
GRANT SELECT ON repository_quality_summary TO repo_analyzer_write;
GRANT SELECT ON recent_activity TO repo_analyzer_write;
GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public TO repo_analyzer_write;
GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO repo_analyzer_admin;
GRANT ALL PRIVILEGES ON high_confidence_knowledge TO repo_analyzer_admin;
GRANT ALL PRIVILEGES ON repository_quality_summary TO repo_analyzer_admin;
GRANT ALL PRIVILEGES ON recent_activity TO repo_analyzer_admin;
GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO repo_analyzer_admin;
GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO repo_analyzer_admin;
-- ================================================
-- DATA VALIDATION AND CONSTRAINTS
-- ================================================
-- Add check constraints for data quality
-- Note: Vector dimensions are validated at insertion time, no need for runtime checks
-- Add constraints for reasonable data ranges
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'reasonable_lines_of_code') THEN
ALTER TABLE file_analysis_history ADD CONSTRAINT reasonable_lines_of_code
CHECK (lines_of_code >= 0 AND lines_of_code <= 1000000);
END IF;
IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'reasonable_complexity') THEN
ALTER TABLE file_analysis_history ADD CONSTRAINT reasonable_complexity
CHECK (complexity_score >= 0.0 AND complexity_score <= 100.0);
END IF;
END
$$;
-- ================================================
-- INITIAL DATA AND CONFIGURATION
-- ================================================
-- Insert initial system configuration
INSERT INTO memory_consolidation_log (
source_type, source_id, target_memory_type, target_id,
consolidation_confidence, consolidation_metadata
) VALUES (
'system', 'database_migration', 'system', 'initial_setup',
1.0, ('{"migration_version": "1.0", "setup_time": "' || CURRENT_TIMESTAMP || '"}')::jsonb
) ON CONFLICT DO NOTHING;
-- Create initial knowledge categories
INSERT INTO knowledge_embeddings (
fact_id, content, category, embedding, confidence, source_repos
) VALUES
(
'init_security_001',
'Always validate and sanitize user input to prevent injection attacks',
'security_vulnerability',
array_fill(0.0, ARRAY[384])::vector(384),
0.95,
ARRAY[]::text[]
),
(
'init_performance_001',
'Use appropriate data structures and algorithms for better performance',
'performance',
array_fill(0.0, ARRAY[384])::vector(384),
0.9,
ARRAY[]::text[]
),
(
'init_best_practice_001',
'Follow consistent naming conventions and code formatting standards',
'best_practice',
array_fill(0.0, ARRAY[384])::vector(384),
0.85,
ARRAY[]::text[]
)
ON CONFLICT (fact_id) DO NOTHING;
-- ================================================
-- BACKUP AND MAINTENANCE PROCEDURES
-- ================================================
-- Function to create backup of critical memory data
CREATE OR REPLACE FUNCTION backup_memory_data(backup_path text DEFAULT '/tmp/memory_backup')
RETURNS text AS $$
DECLARE
backup_file text;
result_message text;
BEGIN
backup_file := backup_path || '_' || to_char(CURRENT_TIMESTAMP, 'YYYY-MM-DD_HH24-MI-SS') || '.sql';
-- This would need to be implemented with actual backup logic
-- For now, just return the intended backup file name
result_message := 'Backup would be created at: ' || backup_file;
-- Log backup activity
INSERT INTO memory_consolidation_log (
source_type, source_id, target_memory_type, target_id,
consolidation_confidence, consolidation_metadata
) VALUES (
'system', 'backup_function', 'system', 'backup_created',
1.0, ('{"backup_file": "' || backup_file || '"}')::jsonb
);
RETURN result_message;
END;
$$ LANGUAGE plpgsql;
-- ================================================
-- MONITORING AND ANALYTICS
-- ================================================
-- View for system health monitoring
CREATE OR REPLACE VIEW system_health_monitor AS
SELECT
'code_embeddings' as table_name,
COUNT(*) as record_count,
MAX(created_at) as latest_record,
AVG(access_count) as avg_access_count
FROM code_embeddings
UNION ALL
SELECT
'query_embeddings' as table_name,
COUNT(*) as record_count,
MAX(timestamp) as latest_record,
NULL as avg_access_count
FROM query_embeddings
UNION ALL
SELECT
'knowledge_embeddings' as table_name,
COUNT(*) as record_count,
MAX(created_at) as latest_record,
AVG(access_frequency) as avg_access_count
FROM knowledge_embeddings;
-- Function to get comprehensive system statistics
CREATE OR REPLACE FUNCTION get_system_statistics()
RETURNS jsonb AS $$
DECLARE
stats jsonb;
BEGIN
SELECT jsonb_build_object(
'total_code_embeddings', (SELECT COUNT(*) FROM code_embeddings),
'total_query_embeddings', (SELECT COUNT(*) FROM query_embeddings),
'total_knowledge_embeddings', (SELECT COUNT(*) FROM knowledge_embeddings),
'unique_repositories', (SELECT COUNT(DISTINCT repo_id) FROM code_embeddings),
'high_confidence_knowledge', (SELECT COUNT(*) FROM knowledge_embeddings WHERE confidence > 0.8),
'recent_activity_7d', (SELECT COUNT(*) FROM query_embeddings WHERE timestamp >= CURRENT_TIMESTAMP - INTERVAL '7 days'),
'average_code_quality', (SELECT AVG(quality_score) FROM repository_metadata),
'last_updated', CURRENT_TIMESTAMP
) INTO stats;
RETURN stats;
END;
$$ LANGUAGE plpgsql;
-- ================================================
-- COMPLETION MESSAGE
-- ================================================
DO $$
BEGIN
RAISE NOTICE '================================================';
RAISE NOTICE 'Repository Analyzer Memory System Database Setup Complete';
RAISE NOTICE '================================================';
RAISE NOTICE 'Tables created: code_embeddings, query_embeddings, knowledge_embeddings';
RAISE NOTICE 'Indexes created: Vector similarity indexes with IVFFlat';
RAISE NOTICE 'Functions created: Similarity search, cleanup, statistics';
RAISE NOTICE 'Materialized views created: High confidence knowledge, repository summary';
RAISE NOTICE 'Triggers created: Auto-update repository stats and access patterns';
RAISE NOTICE '================================================';
RAISE NOTICE 'Ready for AI-enhanced repository analysis with persistent memory';
RAISE NOTICE '================================================';
END
$$;

View File

@ -0,0 +1,37 @@
FROM python:3.11-slim
# Set working directory
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
git \
postgresql-client \
curl \
build-essential \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements and install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy the service code
COPY . .
# Create necessary directories
RUN mkdir -p /app/logs /app/temp /app/reports
# Set environment variables
ENV PYTHONPATH=/app
ENV PYTHONUNBUFFERED=1
ENV PORT=8022
# Expose port
EXPOSE 8022
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD curl -f http://localhost:8022/health || exit 1
# Run migration and then start the service
CMD ["sh", "-c", "python run_migration.py && python server.py"]

View File

@ -0,0 +1,202 @@
# Complete AI Repository Analysis Service
A comprehensive AI-powered repository analysis tool that automatically analyzes **ALL files** in a repository without any limits or user queries required.
## 🚀 Features
- **Complete Analysis**: Analyzes ALL files in the repository (no max-files limit)
- **Fully Automated**: No user query required - runs completely automatically
- **Memory-Enhanced**: Learns from previous analyses using advanced memory systems
- **Comprehensive Reports**: Generates detailed PDF reports with executive summaries
- **Multi-Database Support**: Uses PostgreSQL, MongoDB, and Redis for optimal performance
- **Security Focus**: Identifies security vulnerabilities and code quality issues
- **Architecture Assessment**: Provides architectural insights and recommendations
## 📋 Requirements
### System Dependencies
- Python 3.8+
- PostgreSQL with pgvector extension
- MongoDB
- Redis
### Python Dependencies
```bash
pip install anthropic python-dotenv git redis pymongo psycopg2-binary numpy reportlab
```
## 🛠️ Setup
1. **Install Dependencies**:
```bash
pip install -r requirements.txt
```
2. **Database Setup**:
```bash
# Run the database migration
psql -U postgres -d repo_vectors -f 001-schema.sql
```
3. **Environment Variables**:
Create a `.env` file with:
```env
ANTHROPIC_API_KEY=your_api_key_here
REDIS_HOST=localhost
REDIS_PORT=6379
REDIS_DB=0
MONGODB_URL=mongodb://localhost:27017/
MONGODB_DB=repo_analyzer
POSTGRES_HOST=localhost
POSTGRES_PORT=5432
POSTGRES_DB=repo_vectors
POSTGRES_USER=postgres
POSTGRES_PASSWORD=your_password
```
## 🎯 Usage
### Basic Usage
```bash
python ai-analyze.py /path/to/repository
```
### With Custom Output
```bash
python ai-analyze.py /path/to/repository --output my_analysis.pdf
```
### With API Key Override
```bash
python ai-analyze.py /path/to/repository --api-key your_api_key
```
## 📊 What It Analyzes
### File Types Supported
- **Programming Languages**: Python, JavaScript, TypeScript, Java, C++, C#, Go, Rust, PHP, Ruby, Swift, Kotlin
- **Web Technologies**: HTML, CSS, SCSS, SASS
- **Configuration Files**: JSON, YAML, XML, SQL
- **Build Files**: Dockerfile, Makefile, CMake, package.json, requirements.txt, Cargo.toml, pom.xml, build.gradle
- **Documentation**: README.md, Markdown files
### Analysis Coverage
- **Code Quality**: Complexity, maintainability, best practices
- **Security**: Vulnerabilities, injection attacks, authentication issues
- **Architecture**: Project structure, scalability, design patterns
- **Performance**: Optimization opportunities, bottlenecks
- **Documentation**: Completeness and quality
## 📈 Output
### Console Output
- Real-time analysis progress
- Repository statistics
- Quality breakdown by file
- Language distribution
- Memory system statistics
### PDF Report
- Executive summary for leadership
- Repository overview with metrics
- Detailed file-by-file analysis
- Security assessment
- Architecture evaluation
- Recommendations and next steps
## 🧠 Memory System
The tool uses a sophisticated three-tier memory system:
1. **Working Memory (Redis)**: Temporary, fast access for current analysis
2. **Episodic Memory (MongoDB)**: User interactions and analysis sessions
3. **Persistent Memory (PostgreSQL)**: Long-term knowledge and best practices
This allows the tool to learn from previous analyses and provide increasingly accurate insights.
## 🔧 Configuration
### File Size Limits
- Default: 2MB per file (configurable in code)
- Large files are skipped with notification
### Excluded Directories
- `.git`, `node_modules`, `__pycache__`, `build`, `dist`, `target`
- `venv`, `env`, `.next`, `coverage`, `vendor`
- `bower_components`, `.gradle`, `.m2`, `.cargo`
### Rate Limiting
- 0.1 second delay between file analyses to avoid API rate limits
- Configurable in the code
## 📝 Example Output
```
🚀 Starting Complete AI Repository Analysis
============================================================
Repository: /path/to/my-project
Output: complete_repository_analysis.pdf
Mode: Complete automated analysis of ALL files
============================================================
Scanning repository: /path/to/my-project
Found 127 files to analyze
Starting comprehensive analysis of 127 files...
Analyzing file 1/127: main.py
Analyzing file 2/127: config.js
...
🎯 COMPLETE ANALYSIS FINISHED
============================================================
📊 Repository Statistics:
• Files Analyzed: 127
• Lines of Code: 15,432
• Languages: 8
• Code Quality: 7.2/10
📈 Quality Breakdown:
• High Quality Files (8-10): 45
• Medium Quality Files (5-7): 67
• Low Quality Files (1-4): 15
• Total Issues Found: 89
🔤 Language Distribution:
• Python: 45 files
• JavaScript: 32 files
• TypeScript: 28 files
• HTML: 12 files
• CSS: 10 files
📄 Complete PDF Report: complete_repository_analysis.pdf
✅ Complete analysis finished successfully!
```
## 🚨 Troubleshooting
### Common Issues
1. **Database Connection Errors**:
- Ensure PostgreSQL, MongoDB, and Redis are running
- Check connection credentials in `.env` file
2. **API Key Issues**:
- Verify Anthropic API key is valid and has sufficient credits
- Check rate limits if analysis fails
3. **Memory Issues**:
- Large repositories may require more RAM
- Consider increasing system memory or processing in batches
4. **File Permission Errors**:
- Ensure read access to repository files
- Check write permissions for output directory
## 🤝 Contributing
This is a complete automated analysis system. The tool will:
- Analyze every file in the repository
- Generate comprehensive reports
- Learn from previous analyses
- Provide actionable insights
No user interaction required - just run and get results!

View File

@ -0,0 +1,710 @@
#!/usr/bin/env python3
"""
Robust GitHub Repository AI Analysis Tool
Simplified version with better error handling and JSON parsing.
"""
import os
import asyncio
from pathlib import Path
from typing import Dict, List, Optional, Tuple
from datetime import datetime
import argparse
from dataclasses import dataclass
import shutil
import tempfile
import json
import re
from collections import Counter
# Core packages
import anthropic
from dotenv import load_dotenv
import git
# PDF generation
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.enums import TA_CENTER, TA_LEFT
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak, Table, TableStyle
from reportlab.lib import colors
@dataclass
class FileAnalysis:
path: str
language: str
lines_of_code: int
complexity_score: float
issues_found: List[str]
recommendations: List[str]
detailed_analysis: str
severity_score: float
@dataclass
class RepositoryAnalysis:
repo_path: str
total_files: int
total_lines: int
languages: Dict[str, int]
architecture_assessment: str
security_assessment: str
code_quality_score: float
file_analyses: List[FileAnalysis]
executive_summary: str
class RobustGitHubAnalyzer:
def __init__(self, api_key: str):
self.client = anthropic.Anthropic(api_key=api_key)
self.temp_dir = None
# Language mapping for file detection
self.language_map = {
'.py': 'Python', '.js': 'JavaScript', '.ts': 'TypeScript',
'.tsx': 'TypeScript', '.jsx': 'JavaScript', '.java': 'Java',
'.cpp': 'C++', '.c': 'C', '.cs': 'C#', '.go': 'Go', '.rs': 'Rust',
'.php': 'PHP', '.rb': 'Ruby', '.swift': 'Swift', '.kt': 'Kotlin',
'.html': 'HTML', '.css': 'CSS', '.scss': 'SCSS', '.sass': 'SASS',
'.sql': 'SQL', '.yaml': 'YAML', '.yml': 'YAML', '.json': 'JSON',
'.xml': 'XML', '.sh': 'Shell', '.dockerfile': 'Docker',
'.md': 'Markdown', '.txt': 'Text'
}
# Code file extensions to analyze
self.code_extensions = set(self.language_map.keys())
def clone_repository(self, repo_path: str) -> str:
"""Clone repository or use existing path."""
if os.path.exists(repo_path):
print(f"Using existing repository: {repo_path}")
return repo_path
else:
print(f"Cloning repository: {repo_path}")
self.temp_dir = tempfile.mkdtemp(prefix="repo_analysis_")
try:
git.Repo.clone_from(repo_path, self.temp_dir)
return self.temp_dir
except Exception as e:
raise Exception(f"Failed to clone repository: {e}")
def get_file_language(self, file_path: Path) -> str:
"""Get programming language from file extension."""
return self.language_map.get(file_path.suffix.lower(), 'Unknown')
def calculate_complexity_score(self, content: str) -> float:
"""Calculate basic complexity score based on code patterns."""
lines = content.split('\n')
complexity_indicators = ['if', 'else', 'elif', 'for', 'while', 'try', 'except', 'catch', 'switch']
complexity = 1
for line in lines:
line_lower = line.lower().strip()
for indicator in complexity_indicators:
if indicator in line_lower:
complexity += 1
# Normalize to 1-10 scale
return min(complexity / max(len(lines), 1) * 100, 10.0)
async def analyze_file_comprehensive(self, file_path: Path, content: str) -> FileAnalysis:
"""Perform comprehensive file analysis using a single, robust prompt."""
language = self.get_file_language(file_path)
lines_of_code = len([line for line in content.split('\n') if line.strip()])
complexity_score = self.calculate_complexity_score(content)
# Truncate content if too long
if len(content) > 4000:
content = content[:4000] + "\n... [truncated for analysis]"
print(f" Analyzing {file_path.name} ({language}, {lines_of_code} lines)")
# Create comprehensive analysis prompt
prompt = f"""
You are a senior software engineer with 25 years of experience. Analyze this {language} code file:
FILENAME: {file_path.name}
LANGUAGE: {language}
LINES OF CODE: {lines_of_code}
CODE:
```{language.lower()}
{content}
```
Provide a comprehensive analysis covering:
1. ISSUES FOUND: List specific problems, bugs, security vulnerabilities, or code smells
2. RECOMMENDATIONS: Actionable suggestions for improvement
3. CODE QUALITY: Overall assessment of code quality and maintainability
4. SECURITY: Any security concerns or vulnerabilities
5. PERFORMANCE: Potential performance issues or optimizations
6. BEST PRACTICES: Adherence to coding standards and best practices
Provide your analysis in clear, structured text (not JSON). Be specific and actionable.
Rate the overall code quality from 1-10 where 10 is excellent.
ANALYSIS:
"""
try:
message = self.client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=3000,
temperature=0.1,
messages=[{"role": "user", "content": prompt}]
)
analysis_text = message.content[0].text.strip()
# Extract severity score from analysis
severity_match = re.search(r'(\d+(?:\.\d+)?)/10', analysis_text)
severity_score = float(severity_match.group(1)) if severity_match else 5.0
# Parse issues and recommendations from the text
issues = self.extract_issues_from_analysis(analysis_text)
recommendations = self.extract_recommendations_from_analysis(analysis_text)
return FileAnalysis(
path=str(file_path.relative_to(Path(self.temp_dir or '.'))),
language=language,
lines_of_code=lines_of_code,
complexity_score=complexity_score,
issues_found=issues,
recommendations=recommendations,
detailed_analysis=analysis_text,
severity_score=severity_score
)
except Exception as e:
print(f" Error analyzing {file_path.name}: {e}")
return FileAnalysis(
path=str(file_path),
language=language,
lines_of_code=lines_of_code,
complexity_score=complexity_score,
issues_found=[f"Analysis failed: {str(e)}"],
recommendations=["Review file manually due to analysis error"],
detailed_analysis=f"Analysis failed due to error: {str(e)}",
severity_score=5.0
)
def extract_issues_from_analysis(self, analysis_text: str) -> List[str]:
"""Extract issues from analysis text."""
issues = []
lines = analysis_text.split('\n')
# Look for common issue indicators
issue_keywords = ['issue', 'problem', 'bug', 'vulnerability', 'error', 'warning', 'concern']
for line in lines:
line_lower = line.lower().strip()
if any(keyword in line_lower for keyword in issue_keywords):
if line.strip() and not line.strip().startswith('#'):
issues.append(line.strip())
return issues[:10] # Limit to top 10 issues
def extract_recommendations_from_analysis(self, analysis_text: str) -> List[str]:
"""Extract recommendations from analysis text."""
recommendations = []
lines = analysis_text.split('\n')
# Look for recommendation indicators
rec_keywords = ['recommend', 'suggest', 'should', 'consider', 'improve']
for line in lines:
line_lower = line.lower().strip()
if any(keyword in line_lower for keyword in rec_keywords):
if line.strip() and not line.strip().startswith('#'):
recommendations.append(line.strip())
return recommendations[:10] # Limit to top 10 recommendations
def scan_repository(self, repo_path: str, max_files: int = 50) -> List[Tuple[Path, str]]:
"""Scan repository and collect files for analysis."""
print(f"Scanning repository: {repo_path}")
files_to_analyze = []
# Important files to always include
important_files = {
'README.md', 'package.json', 'requirements.txt', 'Dockerfile',
'docker-compose.yml', 'tsconfig.json', 'next.config.js',
'tailwind.config.js', 'webpack.config.js', '.env.example'
}
for root, dirs, files in os.walk(repo_path):
# Skip common build/cache directories
dirs[:] = [d for d in dirs if not d.startswith('.') and
d not in {'node_modules', '__pycache__', 'build', 'dist', 'target',
'venv', 'env', '.git', '.next', 'coverage'}]
for file in files:
if len(files_to_analyze) >= max_files:
break
file_path = Path(root) / file
# Skip large files
try:
if file_path.stat().st_size > 1000000: # 1MB limit
continue
except:
continue
# Include important files or files with code extensions
should_include = (
file.lower() in important_files or
file_path.suffix.lower() in self.code_extensions or
file.lower().startswith('dockerfile')
)
if should_include:
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
if content.strip(): # Only non-empty files
files_to_analyze.append((file_path, content))
except Exception as e:
print(f"Could not read {file_path}: {e}")
print(f"Found {len(files_to_analyze)} files to analyze")
return files_to_analyze
async def analyze_repository_overview(self, repo_path: str, file_analyses: List[FileAnalysis]) -> Tuple[str, str]:
"""Analyze repository architecture and security."""
print("Analyzing repository overview...")
# Prepare summary data
languages = dict(Counter(fa.language for fa in file_analyses))
total_lines = sum(fa.lines_of_code for fa in file_analyses)
avg_quality = sum(fa.severity_score for fa in file_analyses) / len(file_analyses) if file_analyses else 5.0
# Get repository structure
structure_lines = []
try:
for root, dirs, files in os.walk(repo_path):
dirs[:] = [d for d in dirs if not d.startswith('.') and d not in {'node_modules', '__pycache__'}]
level = root.replace(repo_path, '').count(os.sep)
indent = ' ' * level
structure_lines.append(f"{indent}{os.path.basename(root)}/")
for file in files[:3]: # Limit files shown per directory
structure_lines.append(f"{indent} {file}")
if len(structure_lines) > 50: # Limit total structure size
break
except Exception as e:
structure_lines = [f"Error reading structure: {e}"]
# Architecture analysis
arch_prompt = f"""
You are a Senior Software Architect with 25 years of experience.
Analyze this repository:
REPOSITORY STRUCTURE:
{chr(10).join(structure_lines[:30])}
STATISTICS:
- Total files analyzed: {len(file_analyses)}
- Total lines of code: {total_lines:,}
- Languages: {languages}
- Average code quality: {avg_quality:.1f}/10
TOP FILE ISSUES:
{chr(10).join([f"- {fa.path}: {len(fa.issues_found)} issues" for fa in file_analyses[:10]])}
Provide an architectural assessment covering:
1. Project type and purpose
2. Technology stack evaluation
3. Code organization and structure
4. Scalability and maintainability concerns
5. Key recommendations for improvement
Keep response under 1500 words and focus on actionable insights.
"""
# Security analysis
security_issues = []
for fa in file_analyses:
security_issues.extend([issue for issue in fa.issues_found if
any(keyword in issue.lower() for keyword in
['security', 'vulnerability', 'injection', 'xss', 'auth', 'password'])])
sec_prompt = f"""
You are a Senior Security Engineer with 20+ years of experience.
Security Analysis for repository with {len(file_analyses)} files:
SECURITY ISSUES FOUND:
{chr(10).join(security_issues[:20]) if security_issues else "No obvious security issues detected"}
HIGH-RISK FILE TYPES PRESENT:
{[lang for lang, count in languages.items() if lang in ['JavaScript', 'TypeScript', 'Python', 'PHP', 'SQL']]}
Provide security assessment covering:
1. Overall security posture
2. Main security risks and vulnerabilities
3. Authentication and authorization concerns
4. Data protection and privacy issues
5. Immediate security priorities
Keep response under 1000 words and focus on actionable security recommendations.
"""
try:
# Run both analyses
arch_task = self.client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=2000,
temperature=0.1,
messages=[{"role": "user", "content": arch_prompt}]
)
sec_task = self.client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1500,
temperature=0.1,
messages=[{"role": "user", "content": sec_prompt}]
)
architecture_assessment = arch_task.content[0].text
security_assessment = sec_task.content[0].text
return architecture_assessment, security_assessment
except Exception as e:
return f"Architecture analysis failed: {e}", f"Security analysis failed: {e}"
async def generate_executive_summary(self, analysis: RepositoryAnalysis) -> str:
"""Generate executive summary for leadership."""
print("Generating executive summary...")
prompt = f"""
You are presenting to C-level executives. Create an executive summary of this technical analysis:
REPOSITORY METRICS:
- Total Files: {analysis.total_files}
- Lines of Code: {analysis.total_lines:,}
- Languages: {analysis.languages}
- Code Quality Score: {analysis.code_quality_score:.1f}/10
KEY FINDINGS:
- Total issues identified: {sum(len(fa.issues_found) for fa in analysis.file_analyses)}
- Files needing attention: {len([fa for fa in analysis.file_analyses if fa.severity_score < 7])}
- High-quality files: {len([fa for fa in analysis.file_analyses if fa.severity_score >= 8])}
Create an executive summary for non-technical leadership covering:
1. Business impact of code quality findings
2. Risk assessment and implications
3. Investment priorities and recommendations
4. Expected ROI from addressing technical debt
5. Competitive implications
Focus on business outcomes, not technical details. Keep under 800 words.
"""
try:
message = self.client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1200,
temperature=0.1,
messages=[{"role": "user", "content": prompt}]
)
return message.content[0].text
except Exception as e:
return f"Executive summary generation failed: {e}"
def create_pdf_report(self, analysis: RepositoryAnalysis, output_path: str):
"""Generate comprehensive PDF report."""
print(f"Generating PDF report: {output_path}")
doc = SimpleDocTemplate(output_path, pagesize=A4,
leftMargin=72, rightMargin=72,
topMargin=72, bottomMargin=72)
styles = getSampleStyleSheet()
story = []
# Custom styles
title_style = ParagraphStyle(
'CustomTitle',
parent=styles['Heading1'],
fontSize=24,
textColor=colors.darkblue,
spaceAfter=30,
alignment=TA_CENTER
)
heading_style = ParagraphStyle(
'CustomHeading',
parent=styles['Heading2'],
fontSize=16,
textColor=colors.darkblue,
spaceBefore=20,
spaceAfter=10
)
# Title Page
story.append(Paragraph("Repository Analysis Report", title_style))
story.append(Spacer(1, 20))
story.append(Paragraph(f"<b>Repository:</b> {analysis.repo_path}", styles['Normal']))
story.append(Paragraph(f"<b>Analysis Date:</b> {datetime.now().strftime('%B %d, %Y at %H:%M')}", styles['Normal']))
story.append(Paragraph("<b>Generated by:</b> AI Senior Engineering Team", styles['Normal']))
story.append(PageBreak())
# Executive Summary
story.append(Paragraph("Executive Summary", heading_style))
story.append(Paragraph(analysis.executive_summary, styles['Normal']))
story.append(PageBreak())
# Repository Overview
story.append(Paragraph("Repository Overview", heading_style))
overview_data = [
['Metric', 'Value'],
['Total Files Analyzed', str(analysis.total_files)],
['Total Lines of Code', f"{analysis.total_lines:,}"],
['Primary Languages', ', '.join(list(analysis.languages.keys())[:5])],
['Overall Code Quality', f"{analysis.code_quality_score:.1f}/10"],
]
overview_table = Table(overview_data, colWidths=[200, 300])
overview_table.setStyle(TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.grey),
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
('ALIGN', (0, 0), (-1, -1), 'LEFT'),
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
('FONTSIZE', (0, 0), (-1, 0), 12),
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
('BACKGROUND', (0, 1), (-1, -1), colors.beige),
('GRID', (0, 0), (-1, -1), 1, colors.black)
]))
story.append(overview_table)
story.append(Spacer(1, 20))
# Languages Distribution
if analysis.languages:
story.append(Paragraph("Language Distribution", heading_style))
lang_data = [['Language', 'Files']]
for lang, count in sorted(analysis.languages.items(), key=lambda x: x[1], reverse=True):
lang_data.append([lang, str(count)])
lang_table = Table(lang_data, colWidths=[200, 100])
lang_table.setStyle(TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.grey),
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
('ALIGN', (0, 0), (-1, -1), 'LEFT'),
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
('GRID', (0, 0), (-1, -1), 1, colors.black)
]))
story.append(lang_table)
story.append(PageBreak())
# Architecture Assessment
story.append(Paragraph("Architecture Assessment", heading_style))
# Split long text into paragraphs
arch_paragraphs = analysis.architecture_assessment.split('\n\n')
for para in arch_paragraphs[:10]: # Limit paragraphs
if para.strip():
story.append(Paragraph(para.strip(), styles['Normal']))
story.append(Spacer(1, 10))
story.append(PageBreak())
# Security Assessment
story.append(Paragraph("Security Assessment", heading_style))
sec_paragraphs = analysis.security_assessment.split('\n\n')
for para in sec_paragraphs[:10]: # Limit paragraphs
if para.strip():
story.append(Paragraph(para.strip(), styles['Normal']))
story.append(Spacer(1, 10))
story.append(PageBreak())
# File Analysis Summary
story.append(Paragraph("File Analysis Summary", heading_style))
# Summary statistics
high_quality_files = [fa for fa in analysis.file_analyses if fa.severity_score >= 8]
medium_quality_files = [fa for fa in analysis.file_analyses if 5 <= fa.severity_score < 8]
low_quality_files = [fa for fa in analysis.file_analyses if fa.severity_score < 5]
quality_data = [
['Quality Level', 'Files', 'Percentage'],
['High Quality (8-10)', str(len(high_quality_files)), f"{len(high_quality_files)/len(analysis.file_analyses)*100:.1f}%"],
['Medium Quality (5-7)', str(len(medium_quality_files)), f"{len(medium_quality_files)/len(analysis.file_analyses)*100:.1f}%"],
['Low Quality (1-4)', str(len(low_quality_files)), f"{len(low_quality_files)/len(analysis.file_analyses)*100:.1f}%"]
]
quality_table = Table(quality_data)
quality_table.setStyle(TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.grey),
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
('GRID', (0, 0), (-1, -1), 1, colors.black),
('BACKGROUND', (0, 1), (-1, 1), colors.lightgreen),
('BACKGROUND', (0, 2), (-1, 2), colors.lightyellow),
('BACKGROUND', (0, 3), (-1, 3), colors.lightcoral)
]))
story.append(quality_table)
story.append(Spacer(1, 20))
# Top Issues Found
story.append(Paragraph("Files Requiring Attention", heading_style))
# Sort files by severity (lowest scores first - need most attention)
files_by_priority = sorted(analysis.file_analyses, key=lambda x: x.severity_score)
for i, file_analysis in enumerate(files_by_priority[:15]): # Top 15 files needing attention
story.append(Paragraph(f"<b>{i+1}. {file_analysis.path}</b>", styles['Heading4']))
story.append(Paragraph(f"Language: {file_analysis.language} | Quality Score: {file_analysis.severity_score:.1f}/10 | Lines: {file_analysis.lines_of_code}", styles['Normal']))
# Show top issues
if file_analysis.issues_found:
story.append(Paragraph("Key Issues:", styles['Heading5']))
for issue in file_analysis.issues_found[:3]: # Top 3 issues
story.append(Paragraph(f"{issue}", styles['Normal']))
# Show top recommendations
if file_analysis.recommendations:
story.append(Paragraph("Recommendations:", styles['Heading5']))
for rec in file_analysis.recommendations[:2]: # Top 2 recommendations
story.append(Paragraph(f"{rec}", styles['Normal']))
story.append(Spacer(1, 15))
# Build PDF
try:
doc.build(story)
print(f"✅ PDF report generated successfully: {output_path}")
except Exception as e:
print(f"❌ Error generating PDF: {e}")
async def analyze_repository(self, repo_path: str, max_files: int = 50) -> RepositoryAnalysis:
"""Main analysis function."""
try:
# Clone/access repository
actual_repo_path = self.clone_repository(repo_path)
# Scan files
files_to_analyze = self.scan_repository(actual_repo_path, max_files)
if not files_to_analyze:
raise Exception("No files found to analyze")
# Analyze each file
print(f"Starting analysis of {len(files_to_analyze)} files...")
file_analyses = []
for i, (file_path, content) in enumerate(files_to_analyze):
print(f"Analyzing file {i+1}/{len(files_to_analyze)}: {file_path.name}")
analysis = await self.analyze_file_comprehensive(file_path, content)
file_analyses.append(analysis)
# Small delay to avoid rate limiting
await asyncio.sleep(0.2)
# Repository-level analyses
print("Performing repository-level analysis...")
architecture_assessment, security_assessment = await self.analyze_repository_overview(
actual_repo_path, file_analyses)
# Calculate overall quality score
avg_quality = sum(fa.severity_score for fa in file_analyses) / len(file_analyses)
# Generate statistics
languages = dict(Counter(fa.language for fa in file_analyses))
total_lines = sum(fa.lines_of_code for fa in file_analyses)
# Create repository analysis
repo_analysis = RepositoryAnalysis(
repo_path=repo_path,
total_files=len(file_analyses),
total_lines=total_lines,
languages=languages,
architecture_assessment=architecture_assessment,
security_assessment=security_assessment,
code_quality_score=avg_quality,
file_analyses=file_analyses,
executive_summary=""
)
# Generate executive summary
print("Generating executive summary...")
repo_analysis.executive_summary = await self.generate_executive_summary(repo_analysis)
return repo_analysis
finally:
# Cleanup
if self.temp_dir and os.path.exists(self.temp_dir):
shutil.rmtree(self.temp_dir)
print("Temporary files cleaned up")
async def main():
# Load environment variables
load_dotenv()
parser = argparse.ArgumentParser(description="Robust GitHub Repository AI Analysis")
parser.add_argument("repo_path", help="Repository path (local directory or Git URL)")
parser.add_argument("--output", "-o", default="repository_analysis.pdf",
help="Output PDF file path")
parser.add_argument("--max-files", type=int, default=50,
help="Maximum files to analyze")
parser.add_argument("--api-key", help="Anthropic API key (overrides .env)")
args = parser.parse_args()
# Get API key
api_key = args.api_key or os.getenv('ANTHROPIC_API_KEY')
if not api_key:
print("❌ Error: ANTHROPIC_API_KEY not found in .env file or command line")
print("Please create a .env file with: ANTHROPIC_API_KEY=your_key_here")
return 1
try:
print("🚀 Starting Repository Analysis")
print("=" * 60)
print(f"Repository: {args.repo_path}")
print(f"Max files: {args.max_files}")
print(f"Output: {args.output}")
print("=" * 60)
# Initialize analyzer
analyzer = RobustGitHubAnalyzer(api_key)
# Perform analysis
analysis = await analyzer.analyze_repository(args.repo_path, args.max_files)
# Generate PDF report
analyzer.create_pdf_report(analysis, args.output)
# Print summary to console
print("\n" + "=" * 60)
print("🎯 ANALYSIS COMPLETE")
print("=" * 60)
print(f"📊 Repository Statistics:")
print(f" • Files Analyzed: {analysis.total_files}")
print(f" • Lines of Code: {analysis.total_lines:,}")
print(f" • Languages: {len(analysis.languages)}")
print(f" • Code Quality: {analysis.code_quality_score:.1f}/10")
# Quality breakdown
high_quality = len([fa for fa in analysis.file_analyses if fa.severity_score >= 8])
low_quality = len([fa for fa in analysis.file_analyses if fa.severity_score < 5])
print(f"\n📈 Quality Breakdown:")
print(f" • High Quality Files: {high_quality}")
print(f" • Files Needing Attention: {low_quality}")
print(f" • Total Issues Found: {sum(len(fa.issues_found) for fa in analysis.file_analyses)}")
print(f"\n📄 Detailed PDF Report: {args.output}")
print("\n✅ Analysis completed successfully!")
return 0
except Exception as e:
print(f"❌ Error during analysis: {e}")
return 1
if __name__ == "__main__":
exit(asyncio.run(main()))

View File

@ -0,0 +1,232 @@
%PDF-1.4
%“Œ‹ž ReportLab Generated PDF document http://www.reportlab.com
1 0 obj
<<
/F1 2 0 R /F2 3 0 R /F3 9 0 R
>>
endobj
2 0 obj
<<
/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
>>
endobj
3 0 obj
<<
/BaseFont /Helvetica-Bold /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font
>>
endobj
4 0 obj
<<
/Contents 17 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 16 0 R /Resources <<
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
>> /Rotate 0 /Trans <<
>>
/Type /Page
>>
endobj
5 0 obj
<<
/Contents 18 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 16 0 R /Resources <<
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
>> /Rotate 0 /Trans <<
>>
/Type /Page
>>
endobj
6 0 obj
<<
/Contents 19 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 16 0 R /Resources <<
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
>> /Rotate 0 /Trans <<
>>
/Type /Page
>>
endobj
7 0 obj
<<
/Contents 20 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 16 0 R /Resources <<
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
>> /Rotate 0 /Trans <<
>>
/Type /Page
>>
endobj
8 0 obj
<<
/Contents 21 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 16 0 R /Resources <<
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
>> /Rotate 0 /Trans <<
>>
/Type /Page
>>
endobj
9 0 obj
<<
/BaseFont /Helvetica-BoldOblique /Encoding /WinAnsiEncoding /Name /F3 /Subtype /Type1 /Type /Font
>>
endobj
10 0 obj
<<
/Contents 22 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 16 0 R /Resources <<
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
>> /Rotate 0 /Trans <<
>>
/Type /Page
>>
endobj
11 0 obj
<<
/Contents 23 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 16 0 R /Resources <<
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
>> /Rotate 0 /Trans <<
>>
/Type /Page
>>
endobj
12 0 obj
<<
/Contents 24 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 16 0 R /Resources <<
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
>> /Rotate 0 /Trans <<
>>
/Type /Page
>>
endobj
13 0 obj
<<
/Contents 25 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 16 0 R /Resources <<
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
>> /Rotate 0 /Trans <<
>>
/Type /Page
>>
endobj
14 0 obj
<<
/PageMode /UseNone /Pages 16 0 R /Type /Catalog
>>
endobj
15 0 obj
<<
/Author (\(anonymous\)) /CreationDate (D:20250919123308+05'00') /Creator (\(unspecified\)) /Keywords () /ModDate (D:20250919123308+05'00') /Producer (ReportLab PDF Library - www.reportlab.com)
/Subject (\(unspecified\)) /Title (\(anonymous\)) /Trapped /False
>>
endobj
16 0 obj
<<
/Count 9 /Kids [ 4 0 R 5 0 R 6 0 R 7 0 R 8 0 R 10 0 R 11 0 R 12 0 R 13 0 R ] /Type /Pages
>>
endobj
17 0 obj
<<
/Filter [ /ASCII85Decode /FlateDecode ] /Length 367
>>
stream
Gat>Ob>,r/&-^F/^>^aQ+qM;2mo!"Z,rU:'+DFN<Wd3O48H!l^(6k\u=B$Mj0cj[B%tdBkbdhVAKn0'=^c97;R.'e]03ASIdpbP*;2iS/:)kW9]qC:gm0%mr;+">!-*UmX9fWY/Ec?M%jF#/Z\\ge'p)luOhIPLQ[I2NF=e"ji6TniD.=DH+Kt)n$GsIg"Wei,tr^>pN;0%8ZkR<IhdR[p*8G#TTl4fO&M-5e*R:2k55GYGdeU"PTS9<Gn6>lCGNkJ`@0/m+gMd9CE2":C%X7.gS;0UgGA$4o>n6P`k2MG+<aTK&n"6>p1deWfJ:Cu=FH'YR36n(u<fiPU+;-S5ObI4ET.952)&2J1L1pF[pP3AK!~>endstream
endobj
18 0 obj
<<
/Filter [ /ASCII85Decode /FlateDecode ] /Length 2039
>>
stream
Gat%#?$"aY&:Dg-\;-rFFG?eDbDtmI7q"KL`h-_gFs\jr#uPA,J,qpglEBXt5Z*^1cEu!O1SKW:]t<)`32J&fC%tuB7.1N[n`Q.b)&4YokE@n@+"8^HI=%4hDn\<2GOs;*q>!hL3.WaXn`4e@3lM2*^I!Tq%#Q_j!mW2W$N\R6gmdY%QG$?=8^"hbL#'J>i_M%Qi'_ea*$m[,9b3C-76c&VkP,JZ@t[#,/CX*n2%okZ/NspFkDY_!Y-'DGs.G(F,i/-f;1;0q;^'>l<i'IXf./AA[sdLf/*YJBl!,aHhdLkqr:b\_o/XG_S7fUa9lEU:d;5@oA6.dCP"L?0&%tm[.7ePd!sDpJ9ic/-B\\tQ:bB)^U1q'C`]&@[`T]uY#Uek6q)*G+C[#D!_Fibui*3CddP[^4iFT,`;L0RYk>EX++MHH]M"E9B@8,eb/ms&c3VsDZm#4l%b#&\6%lf;?<KFq"&fd\/.Qjh5]l*?";..unl[+V4da?3>P'S^%.60J81ZiG+dN1WOVX:0\JIJ:,#X#6NK\h2^k1A:,8bpp(jeAE$(;7*qKZi7=-eF-,%b6Gl7ZQHJk*cc>@hGD?kHicFiCYuCf1KRCWu0tt.:pKu)+/bE.q'r`gr7u>N6MDN;^IqTF2aH?2f4HYkW&t<JX#?^%<Xd3i9j9`LuV7aN@H5Sb1XLei0"0hY"Xo#iB0*[9V\V?_E;NT`H0_R1:4/P8$#a@M-\8t/(?FCCJ\M!S^".Bjq/&!4<)-"*8Tf$?g2UA7%^ZglXEPNO-M7)AnSN`!5L*[_[iki?b)$*3\X->a%CTRi.u*D9idts<89Mf>80)0fG=oJHTlK`<=oI7R_GcJcq]gS3"9IY8j'%+Rlq]E,p6q+b<e#H&Sh'k!Ssj@<!<_2Z>7Z"*IOZJ'J+>r+-!E:<7"P"N_0]ps+6OkIXd<"5c77US33[UeBE*Ki]tYA/Z#AeD#,%[T_fj@[A$ucW^:0MaX"6PeN$%TiT=krA5J"<XhoEpNiEWDm\KWh2\8ejRD@:j94@rCj2bU=nCk(oUPouLL-T0ld\7Fm2PKIl;7S9(&Gc"^CfrGPkk73T\^-0r>LL1f2CQ.'"d`d?qj07PVAfo#0K!a!#\r%AH$_jA":#,tNUb[XP(6.bf?6Dus+8B)2fnJjH#cB8;LWaqhU63Q\Hp=g?E0%!Rlb7>kckrg&EX+)d=0>;:*sE+d@!B5_@!a!Sc&#Lo#;a!GDJ!.a2i_Ebn`bA@8(`lPLFO]m6s@TLO$(fkG)Z]\j+9s@Tll:ojniKhXUN91eQs7n&ALiR0NKtN"/9%1k-QfCaRf7.dk@Yh%.l/ZNM%`"Rl!UQqK.G2mH9e>/AQ(dmZorU4pRSOE2)CH#i`iKibBM]L`>$nQInMi8,9s?kqko>rnBZ%D!]12Aeh)a_9m_*8@g0\[p%C4D]:ZMi[\nZH-seQZNtjNNmDWF`qb4+9#V@=&^krFr'dUetY-PZrKuT/701G@&e2Qn(G-NU9T_;o<<k89j$Ep`D1r?X&_*p4u7/g>(r6-cu3$qk)o>DhlCR/<.cEBWP0d,'eU9Q4GA5.+%D4D<u`sNBBU7ErF'A>b$s"kI['JUFRIS]66\-:S&U\$%7k,X>@N%H1g&J:H?\(<5d_O'*nM:<'07lq!nrfI5i9cTnrf'#(XVelQJB^qYl$ul+7Lf;7ZJnpbWHO7eC><;G]lg9\\S*V_Q5aTQ;[bq2JTR"bD>qF^,qfZIne5Y$SQ*f*B#f_eW*a[0lT:,CRRKJ)t4FVk:,K9QSf\h\R2"FjUQGoL4O]+$N_+L=2/C\_&$#$\:R%;\<jqNrl;E-\4?cDLHEpKFGC;\?4k)@>Y!rlH5e+^aq@bi)hnuJ18.BD:f0VnGZ;r?[:D=dVXp!c9#W$Y;U@>5qhkgkR9L@I?5X!dgLNYNkE:9GT140pL;Z_<4#a7BNIjZ?Wh?-6j/<O/rX`34WXc'^TKOM!8j.b&=>M$Cfg%URGaj>&I]Nci7+I0Tk+I477c0\ScaE7WoF):_lgUMP!9TmO`C-p/##-kDNW~>endstream
endobj
19 0 obj
<<
/Filter [ /ASCII85Decode /FlateDecode ] /Length 764
>>
stream
GatU0?#SFN'Rf.GgrHR,WU2Z?o%8*NU^G[MU.K_(MF$Jn_En7-?b[P0OHe^U2FV$:ptTq#qjpH3i'[2;o+KtK"ul8j."c=GPQr26&U__*^BW1Cirig4"\Fk((kE&H*(2n5#h4b5.aWerat-DO!>SclC#uLhe>c^89i^Z@ENAAAY'07VH\(Op9f9bb9?6'XKU>\kU6dZl#YbJVit:mL(m_$1&H_E(%(1]_ocQd/M%^AS0bFh<iV5,`HlP:s4.?-4%@Il=p3_1u(4\g.p=38&FoL1N=c^MBJbDeR)qAF19lZTj/r2_jP\Q&VC1IA>H(if.>KUFT>L!(kD,<Vq;Zq'n;]XsGW`F2NX(KL-C1AY2$:]H\/C^K!FhX)bq'0#p2KGMBIT[VPm8n'!>j&/"#S5D)01-T"qWFs6Q1uu@d]Ir4*KPTi,H]H2S1G#\)jkGPOZ3.rN_7?"$/X&.Bsm'uJA2nI=\k[[A`l[(WJ_'1"\^dC/4<ef4c3D?.h9bM?oW..`%?]WLP<TS3,od=c!WLc(pce9QXk,I[ao)uo@_Mh'IWolH["<80jg\3IVbIc._Wj"cM=!:bFMFsJ?ZF:k.K?hD]"F2;h4;jrMAM@g4fQ?k;56"G(2PGj^&V@YpQhqnZ,b)pNjidMU#[D]*^XK<:6X4ZVAB=@Cq*^rn4a=D+*OUr8"+=5/#26D;?ddjgDG/c,147ml2KT=T(c_&r2Y2J>S?qP1NDP4OGFk'29Z5d3M%cPAoDh\c`H@!#HR!U&~>endstream
endobj
20 0 obj
<<
/Filter [ /ASCII85Decode /FlateDecode ] /Length 1610
>>
stream
Gat=*968iG&AJ$Cln(tJaeIY;c-`=]3_AX,b,'4k+M":UK)c:0P1a4">u77:[Zl_@1Ro$XmOn3[/0a<*0+-%$!-l8/lX(ilqQS$`)Kpn?p^A5[(]Rf0S"5`l9ST>1FF#a>05,oDG=TPJO'^K:Jg*U":^U,t^ck0H&9,eN/oPU4PTCKF=bL#Bd('4cIg_/>=T$,%rhSF[b5U<IAa`SRWkK:4og?$GB^q(7U'@kq.hmdt0:31G'#A$kSFjhPn'743m7KBB)NTpuo=W5^oYiS\0&0V$5h3]F/eNb3iNBIc&]/!+*Jh!h@3'Rp0eGb$;R@@n)"WoRM=Ar,Lb#GeXJK:@n+btU&+0dhHpgtQcZJEuY,'@B!u*(:g/AMCR7Sl`.?mI"bh/Wr&M'&P$gVAucp:^sKTcZfIGLa*&5%ijFb8DdmCoXjfi=N&O5`k_'*m)fScZGPcd@"Cd_FHc!XbViNX[<NaspH?/\0AJ<D?BSD&(.@VN<k##2gh;jnL(?<9&;Z#D`RK_eiPn="_AXdfI)D!$@7k-NT9G^/)rBMh(Qd/=7urTjH8'pEFm9%kF?R#03,TD1oCeT[Ot@rphQHU.AlbJSSa;n))PC'5!h<=aJdUlp2s)KkJ*X[M`sU>mBq";f\`^Jrj_A)dtXs;iFg4'rVH@-Bi_5EnEISS2UU&NHldA(u$AuTLU+F_(M5_D7n(N"Ef:KKo)cu;Of9%Q!C"0/Y9qSGB4+DdId=1MhWlo0_Z?*m[&r\r$;X6MYi#H-SfQVK+`if:C/Mi`(Y0)b*5::I%mMIm-h`[7"r)0ABMs@'T/@7[O)T_TG'sOM5#Gj1<<[JE_B+mI:*qiQCDm0c)(IRQE];O'Xf.j$'*A(W8t:E)bj(jG;OP%H1)1-<K`>jQA+r?Z@SqY9Y?OcEnif%h4CF5;,o#m-(Tu$IV*Y)4^J(VN$;;-s(8p*bd"Tp+Z`J_PjOmG;A8Y+q6TStbFtaBC>Z.8i&qrd\fl%#l'Wb?M\JQgNMDV4.5+?%F-3+7W_,'$c'Q72rC.e4mp,aF209Ucrb:diP?3dP6'k\@>l2G$6HfCto<G[ba6*/+8\N6FP(o1aorOhV8c5EQ?4qGa+_GA3gFt_.`h-."V5;LsAW`(Zd(YJjW8GF/%VL[j2<36?2g&77TZjhk=_<mRCM(lk&;Z0;jCr/#1'mtpo!4pT'NO[8PI67q>)P]ogW=Sfq6s:&r_ILMDdEXKgDV/R*cm6b3"/Y^agaK4:&BE?-76iNlJ<uM*)a[oF:tSPpPnVE;R`G(a&'tu-lAqEOuu<;7808F($SuBo2HfGBO<#*e>mK@p!<<8Vr=1J(j8H.8r@Rtd#^0qWVk<cmk:YQfDON'<!(!Tl3R@F\j7Lg<8j:Ace!j)]2&ks]*hAL"'_Z0`q7JAA+XO^\,H#7O(psK1#F*e,QS?eR@M5D3pEichJ;S]__0.HTph*@LPieO8YhYuqel*0hESM1GuG`BI_^27k0AEeZ_cTZ8=-k3o1t&VOJ,Ieoo/qp:!bb1".'WXPAK-fB`8Xm]G[j-]2Z_Gd]"Ab*%@B0^r)SrUk/2`g)Q'u:tq9E,^$go1'u\lHl"9@[;=!NGeUf-I1M$irrFJGr6Y~>endstream
endobj
21 0 obj
<<
/Filter [ /ASCII85Decode /FlateDecode ] /Length 1572
>>
stream
Gat=*gJ[&k&:N^lqIu;LJkSij>i("!/Z9S2Z6W-2"##P5,T:L@/'3@dfC*E6EL`-+(6p?t>?5+Vl-nGp[IHoL?^VR5NTfu+#pgrURS_FLF_UK-^5`^&4\1lGSt=>D\(<Z&4$UUl*:W8pYM3jQKps.!6;ebVR2cE,.-X6SB5I!TW+>.7=O<o=b55G3n;0V!C%MYBqA[1DG:/q5GO9g=&:$>u3f/kL4UE#VUTL<FkuT[k7O`OL1Zd-TT(3'\eY/odU9X2;-#4pQ'G\p=Fc'g?7(2U=?b@7rZAs1qW8HLPmWC:UZ^obp/c%Y4:1W$=R=Y%)FGc)mKIEZk'o,MX.$S\BJu(2n.*$lpoW6&[P\DRu7V8LKB"CUqE3q%C:c_m8,J$rCih%nRN93.7ICQ+IO.C5nH+:I!?HY3D\Ios'!,i"8%2G:mMB3:H7\df(p7TXJ3iNtQ,P!`oUmqNG1\f''b2XjO*]T_.9=CLMZR.)a*`&jgR>bc!AgB0lqo9b"OMe&<\;>QVqF.6gX'C<-1'CNGWUh<lhIHBf<]]P0B388gf::KQ'(Z88B3H.J&Gap14YDD?Kc5QPkjS7lHg,I@I['hq(>T:-;fdGlrKE9Vr?sIS_AMT4#H$Z&kMS>3?oT_\$sI36cYuGH`g7'Dk%m&K;/*Zs\FQ[$i6CKR)j"J0!mH&>:<<J?9PU3ANdpV`O:l;b[+I.<8.#Ns%IZm6P^Hq"^Bs^(FqQNQC8QsAF&b<LVSKmUaE1Tc,h(tf.d;9D\l%akgBD?WH8n0`+LHusQ>Uj6f(a8@d?9DtX/p&[N)aJfe&K"*r:S?2p[Ql$-h$f(r_EI\=G%eG-KTRCE3)&a7Y@KjF5_tl>8F*CAX8K7@[nnD@YZ3q&/CkCbQ5-BX#fAUW)EhZJocT)[?1s)A2((M"GolUQ])[nP,T!s>?]0_W#!M[\@!f$-VXp,3Z#VZOS4jNO=&54\-'h[^GVT5eEO3dU<=2:fn<?qq^oOd:B.>c;+2+gO&O^-EjHQYWe/Tc-Y$#7g1pn!Rl]S2rP)4/c=Z@ORMJO^Y\`eE[<d=FH&Z8b5eo.6^FUflKdu1]"5f*0A]-FF[%Z5$g.d<rMt*OWbT[jM_-1aH4&tP>V5^[X8S[_]>M];S7nN!SkR/3g^`ar5A-ktZ/th?2n&m[d*fS;sZ>.Wb8O+AK'b[QnNHfhU[]GIiR&=>gc*i^7OM[aE`Hr9^BNDe\Q:G*6*#nD!DLAYu<)qBs-3C"=Mj7b]N*lr49*\-GOer\k?anWmn996BHf=G-5;m\g5eRrhk.+)A3_uN;3ika"XEZl*mLV=7G76P'!d"D3e!jchp3+Joo)>MPFEb`MUB1$CXMk>h*;5Po34OjWHFSH2VJ/2_RWZDu8emc57MhT7KYjh+RO=1>.\`g/7jSCV7bFQA=ZD:kkfogXD=?<F3iD-_9cHIuX-pq$A$\C>Q>6VhEaCX4g1V1Z"h,AN9-RH`eiblG*EEt:cca-VFH@7RKBLKQ48lj8fQjn#s6iWCO\rJ_[G;<nKN@YJ/%#[KjQA=b%,Mf/(p@sbCNHgtCbGfN1[[(=+!(7a'TqHE~>endstream
endobj
22 0 obj
<<
/Filter [ /ASCII85Decode /FlateDecode ] /Length 1697
>>
stream
GauHKgN)%,&:O:Slq:u]1dVK)PrCXA)Q1<fQ85#VjFnp(M$Za-!,5Ljq=l"-+sj9&-/3.9Ysa2%4_GSCLB&\0+5VCc3RMbTHMsEK[qRrJmP:-KkB&aKqL6ALQ9e&ml`i,`L7!MsAsdkHB44&"O5PS5:>mOT6^tUC3"1eYj7d77kbO$?\P>#Al9(-Wqur(pdeKX>]>eIeaG2D>\K-k%4);(EZhVo1[.t(:"m,tHfp9r8Ns7jLJgN-*`HMF--T6(j+1:jd.A$G*.=`c]#,1@)SfN<=kFp(Ei9qil].Hs/$[ug]GEK`hB3(3PHas8pM7#A84S"4R]>rNGPblp#cGc?qf!;etcT,W52o2:smkAj3`nf58P>JM4Wb<m5XKBLLA]]:$Ef4?@>i,8POA9H<;Z1VU%_22n`@eS"j.Y)MFSH>%04_uG^MbpoQgKN00;l(c&4p'gCFm+aY`H_C.NeAI=B[`D:(l=r0mSc3\\)8o_$BCG&jqn;\"%'t0_.43>*Fa:VMRLrs6F^UDLTXNIG5ih>ElYCB[dGpX&83!TXD)jSo8\#-L->*h%$2o\m\jQ_ruhm(tX[SDL&*_NW8*OkF]APWR'_Sic=kYH:'N^;SKc+Mp4cCo*%h:NVHhX.P7N>;H;qE<#.Pa%%pqjCk,^$i1($XFj(_g7@=ZA)1Q/f.*m3Jr8:D=LWt0n*Ym-Bc2NIs3k75J+'jkd@];&=<I0W25Nk3]KRSR'4$bM5GK?jK9K3F7$kF:,(0dU0%l't<X_Rq0@Q;Y>N:##AiB]_AUXA8R&\YsUI/0oea#Y=YG;mln-7G1:TL@kHd$9J<<7"UeKZY_BL9+;p(&5mJ85uT;Y0n.&[rk-G8<\e<X!j`6$/A7NC<7NpH)]k11QOCIH'B)[.Uo(YD'nlZjpm%,,S6et,IG\_9H-qr^Cd,6+KNm"PaW?RKb$L^5IEsu6nEB^Sj]IMq?t9]$Xt2S:RjAADm(cG:+tLqN6eXZ8,Z8Uoc,BLd9B#&-:/:[2f"`1Uc/l3@MM<oVjU5$1KI(`-^$M[@F\qT`[ImrTQDhWa6P^&52sk]oJ^k!b]SWILtPkcYD!IdmV++RM:BSne`D7Bnp>)DqV;*QTc=d'5)fIF4'89u'](X=I\j@pcKYP<,F">uK`kPI77EB5e9Z\Jr@p@l!U>L$^n`Sle':GLMM0t_6q&>QGhJh$D^18<U-E;Q%$=QuH=1PiIp%3`N^\"'66uCBX?*W1cl1UdmG,(7<)j(/KiUssBrr9g+^$^4@Pei]/C@\aIP=c!UY]Q/kGBEq6>T:@1ceNrS9,kq`oBi>&d:D9$U$G"Ce:T4\!/qUdQ@!!M:!a8`'ec%lR\`6;2>O1S1'e(NX.]T#To^P!]k=V\4'XQ1r1[lK`We,N8_%`?PLfpe:Sl$lW[(&)\rDQct")"Q$kpr6MVI$[QX(>BS2R"7nI/f3YNnJV)R\[e4mOr]l^K.osZHUc,2o:DCDa,aAdmF9SL3PA25p"0IS0"^-J0l9)m^?$B=tj*3F=.4>4Z%<bY_(=(f-h"3D)+0gAa2q3/;2O4up?DtD'?-leT$V-IcVXfToV.Y[0HDD?<j8osab"3:rZ-V<lj_jp-p<'k\QEGb1/kCCV:#sVg]rfd&#Md:D^s+EOAXc8^\*K:?&JjLFPVA)N'W!hH3P_hu^Lruj8.9!?!Hlp2O?4jh6U8*2<-5Y0_~>endstream
endobj
23 0 obj
<<
/Filter [ /ASCII85Decode /FlateDecode ] /Length 1467
>>
stream
GauHKgN)%,&:O:Sls$$@9aeUi;Voj?C#/R4=Nm841GB,,E(GK_5V("<j1>:;g"+*7/@ljI1_rCD*\>SX*"WtFLUcfc!r+@"PE,i;#h]n_*5mr0_eF;`cN.1^R>rCa82(sA7lUSU#&Z]N%WF&RKYmd)L5LKi>c?!R3fF0>C&XCC=E(17GQZV>AA?h$TCMM08X/S1KKMtL:;s<N'Q;sVajh^8JBnZV\QQcpQO9@^_;GGDr.&i@R!oF<aGY]/q].Z#*%d<b`X^)*5!UNgfFlK:DD7&/a+/9oq"\*38=IEKAFei(!["V[>^l2))%Mku4N$=q?/7;*bOPq_S85o)$<ib.Gd=<Qi+A"q.gAs-WaR@ln>]O[SlJIO!4"V;MK/a.'KK)YgDAJO%l&k%(oF#/6eWDC70+.TRYr%_bg:q[g4h=5T*q7>'!sq5OO#6!R0s:c/24T)]SX=0AU1AH\sCLCiWsE@"+i7dNm*"nB2+j.ed)hY;6gVC-&oOGNl981oU6\''p@!CnechBZG;&L!gdRDX9%=<MF+SV"EM).BRdSPXGVUV@``4T^0*6-^1_O'%C`+:SCd`dKjbP\bP5k=b6g21`UB`'E%pWPjIuPFl2.N>Mpoi[n$9:#bDA/X1627-M?9.^/2U?1s3<M*&P[DkPc]*t%GVr'p#;Zb)tg]#I`2J>2`6nSl'jV<R-AhtF*f<,G?r%NkU$=.kaMm?c;QpE*q^^B54;RO8$YeVkp0?C2Vm@c;D/ra@f.=+Dh9P,b4-D0)^>N5j?X,Z8ef6+jAO6eiuG)^K8.\H4VOd<mLa>YUKRs9e2.^,qGUp=&e+f$L6%O<B;/IBpc)%GUr$l]a45,k6*0W<SR!6K&s"e+B/k.^n6A?#l_bKlj,1\_Y3dC6l&Lk73d_bEXm;2)*l1<qoAu$pL&9E^TtdpFc$&LCB]O%"..)2'I2b:8f'VWG?e4-I;Q`M2'#;K/j"R=6bNF-.PkM"<6u63<[a6<V#!&ZP>O?ULG5/EVmX03tiC18cVd:T1X6R"`A8!JiL:3d:mq:/@,c;u]_egjoYH7o&H7<m=F'>:,ip>^9?Qr$<5ND\T5mmA[hT(8!6qK4/+^;#\B27OrAj,pJ$0THtd(3GVd-[Od(X<mnAQd00?CTnh7W[?HC.T\X'tL?1_X==$1Fi0D=1%W]"th._a@f"AOULr_q$m>X>4%Ua#bfYI#iH6(@-Ea>4b5'UMZtJ=[=&Pc]DsqbCn0dF75iK@6gWbei3f^r1>!:dHRKm$]%($MR^VKRQ/PgM]p$Zp,i"ScqoNXkO*kof3839<mjPN`b<?\gB:1'u83QO^gG4-ADHi!&.O4CS8_nBIpIVaX?6Dg`;>ic:'u_siqEcH)\$^Su]d..<a2>VZ01eB4SiecIm:FM-Oln7*FJ<es3+HR[*rgr+J9Ng!,]3%6Cp2K:a\cr9D$[d6$Oh\cI#,h]!`"NQpqX_\Zc;02~>endstream
endobj
24 0 obj
<<
/Filter [ /ASCII85Decode /FlateDecode ] /Length 1179
>>
stream
GauHK9lo&I'YO<E]VC[Y;?Xb&><\;&;d5hqb,'4k&.)!#8V.,]Nn)74cNE:5%dSWZl"2as%07'Qf_;UT&odA.g@)*GGdDt?HNM-<boMn]$".f\bK\^IBB8$DE<:,NS0UZ>]E9P:G!bWBn6XNpLC9?VPRk]LQh&?ekD9;JXr#hZlk[@:U=oLMW9K=&>2?rDbpV/V1ghEpo.?UWNWg]c!aa;if-%p\fGnY7c6TSNI"i.@/\"![3YN.h@`Md4D4fdM=%p;Z0FFn'#i77##8K94nfVfF\7P^YrQ5UhTi?Y(<Xb<'@Q_?c>8"tqZ!MobY2T?Pa437%:6_PqE/4TH!DH*/@8Er:i7/>*n:I"*3Y[2.m0MfB,FPhmM2,*=0_$-m.-lJMXO9p<;)A$`CFbi'Viih>aKX^#1t;\e_SkuAf(k&3U-paseQc)I@Rku.#\;Wbc1:8pe^\^5me,`%HF:1Kq<f6$'k=d06;-im/%$CVKH405.STfG4F/'3*^,0c=nO3;#dLY3D0_RsOOUbf_6+.EBD2\:TnY7m_`^?QVAVA0,PQG)9)7([b]53n)&CX%mQ9o!cbF61NA;ic1rF6lUBeRJ?&K&Hfk:P'l9mkR21iag"L9!/03%)%aB9"Ah.tde8ri>=pJce!Ml+R(b5eH-XK^gdZ.C4VaW*LB\6';3*E7O2&&tp=)%.4RFVZQDSuGT<&mu)Bg[[1:n[ue0a2caMb"6ZV`Q,-NQGlV*(-`i\17uDi*Ot+/4i9'SJ0,8ZC+&QoS*?*aM+iN[9_^0aid9XS.!Ea)p5)!=$=>4<ibj!Y[(Y(SB%gq/B@MkWH1-DOqZ:)<!K,E,U<<KS\Q*Q$9fd1jWAkeh]C.KlRJ0&GW>:J&5A<EKFp;VJmR2mYk*O/X?Td\laf0N\a`q,O-`)h])39_OU#1-K55k2mEd[])W%qeY_WN?MampcgdRqM"(=o'iIP/R)(/QoXD@:W0'Y$:Z=XiE?h-RLL_\GAuR8gXKn0FC!Hq(@YJ[^=fN01*:po?/NTk=^JJ!+>%Cd*b/OXQY3peJ(?1*SuI^m]($?TKN*$<RGt"0l^-IOIo^.m0Bi)ljT?@hc_!h-d;=07LgR@Bm%)VP/rW's,S&kEo0TSfK\pm_aspIIsGmE3,LV$hGED==L\k(edOnC;AimLRD8s@fp@N!3uorXT~>endstream
endobj
25 0 obj
<<
/Filter [ /ASCII85Decode /FlateDecode ] /Length 1292
>>
stream
GauI7>Ar7U&;B$5/*<ti,$%BCCo)'\\>[e7Bk$aPJH[9XAYq5^P#PTerUCoND@gHi<,/6;#74pHcJe9^j!Y=joO7.UHj+JX_9k=K/;.g?/.Q.1/msh_HoYTh/&s)tP:.9Fa!oh1-\h!p>oqA+m,;q880M3b_a6>#T/C^t>eoY9\;$t;%@XbU6$<Ld)u'8GqWqrbj%bSQUHj!XX[LeKr6<5kR*Ole-?FEUp$)&&Y7i72FJ8bI_AFFh5cp<+-'q'^%Ms5+X4::(g43IL$\abI/=<<;Q+!pV2IFb6&DV&>aLY"bs4Uhg7*&AZ+:S46\M+<MOPJrW_#fi(%aKNWT[D7])K@8W@FkOm='tJ=ocXSK(!f+6-UU#oC&E*>:KsO"g^5><>62@=&I`$>?%Z80>2>sc_?@U#Nm]TepC5[_k%[='7I.g_Y0gq4.HHoiS&s@6Cc8gd5KTd(QZKo@').NG"#t@c;P9o.I!3W#?(F_D-NBUm9MRd!]UE/=+QOR*QJ^+9deqHS01=LWp@qs5T^(.kLq^=mc$I&m`t)LKSmpaC%O9[J#,=%B1IKQ1o7(:!%2B@j.8ZjAN@Y-H^3NH#'%jC<(L3780C^W)PfA!O7;_!F>W:FA*9Of[FH/>%7(7T"$R#gK&2TrJKH_?7@J2"3c7Y*C?sc7Jm%Heo]Mr)^gq&p7>+fjAguX4@68\$]Vh]2$@)_S*b[B:@2lhsZW20O_YY3WDT=WEPX_AfKq+3#A[9O-KK\XS2(lcO4](M'oJE(ZE$FC5D\47[YE&UH7W2?t(2qCX0KX"qWIo%^\:-+)8Lh^oJooTS';6=PVca3EeXQIsX^:Bu4)N1,oVZg&0YX_aERgg+7V-@]amP7Nnm56mr+&"j]'p"sPs!c7Q*Lq*uBICi0:hnC7ZC'(S?e+j;fkBSl6b,nj0ZkSsA=(;/TIcg"p<\X;TkpWZbIP:KD<p9V:>kr77Q:`'l#efMY,oZ<'#7(9r0sdjYGtQ)Ftbf=e"6RLDk_\D3Xt[Df>YOF\=aI98oM^_m(1&Ndqk>MW<ui(4)Ku/<POQCe4/C/@@V:0W_Gq9==cnO*1VMK66T$.n[7m*U>c[_)ae&&51f+!$mdtP>#^CGa`;p^[a4A,;)f'[XO;PGMGgVsMX92Zs"dLd7aLL1H_Dj`r:SDSrF5</fPTKC]]-$)O<3qCbJ'YB:TpT1pLpRShUlgl]D3XU@cOgk?i)p5&F7rJ5CU5>rC->5[f8tP/7L#)DR&63066?9XE#u\=EEjVW3Pa%3\22;GATr'@1QDB&)c@N.11I*~>endstream
endobj
xref
0 26
0000000000 65535 f
0000000073 00000 n
0000000124 00000 n
0000000231 00000 n
0000000343 00000 n
0000000548 00000 n
0000000753 00000 n
0000000958 00000 n
0000001163 00000 n
0000001368 00000 n
0000001487 00000 n
0000001693 00000 n
0000001899 00000 n
0000002105 00000 n
0000002311 00000 n
0000002381 00000 n
0000002665 00000 n
0000002777 00000 n
0000003235 00000 n
0000005366 00000 n
0000006221 00000 n
0000007923 00000 n
0000009587 00000 n
0000011376 00000 n
0000012935 00000 n
0000014206 00000 n
trailer
<<
/ID
[<18e7918b3296693e83634aaf57fa33ad><18e7918b3296693e83634aaf57fa33ad>]
% ReportLab generated PDF document -- digest (http://www.reportlab.com)
/Info 15 0 R
/Root 14 0 R
/Size 26
>>
startxref
15590
%%EOF

View File

@ -0,0 +1,363 @@
# GitHub Repository Analysis Report
**Repository:** https://github.com/TejasTeju-dev/AI-Blog
**Analysis Date:** 2025-09-19 11:09:14
**Analyzed by:** Claude AI Assistant
---
## Executive Summary
Let me provide a comprehensive analysis:
1. **Project Type & Purpose**:
This appears to be a modern web application built with Next.js, likely a blog or content platform with articles and topics sections. The extensive UI component library suggests it's a full-featured web application with a sophisticated user interface.
2. **Technology Stack**:
- Frontend Framework: Next.js (React)
- Language: TypeScript
- Styling: Tailwind CSS
- Package Manager: pnpm
- UI Components: Extensive component library (possibly using shadcn/ui)
- State Management: Custom hooks
- Animations: Multiple background animation components
3. **Architecture Overview**:
The project follows Next.js 13+ App Router structure:
```
app/ # Main application routes
components/ # Reusable UI components
hooks/ # Custom React hooks
lib/ # Utility functions
public/ # Static assets
styles/ # Global styles
```
4. **Key Components**:
- **UI Components**: Comprehensive set of 40+ UI components including:
- Basic elements (Button, Input, Form)
- Navigation (Navbar, Menu, Breadcrumb)
- Feedback (Toast, Alert, Dialog)
- Data display (Table, Chart, Card)
- Layout (Grid, Sidebar)
- **Background Components**:
- AnimatedGrid
- FloatingElements
- ParticleField
- 3DBackground
- **Core Pages**:
- Home (page.tsx)
- Articles
- Blog
- Topics
- About
5. **Development Setup**:
Required setup likely includes:
```bash
# Install dependencies
pnpm install
# Development server
pnpm dev
# Build
pnpm build
```
Requirements:
- Node.js
- pnpm
- TypeScript knowledge
- Understanding of Next.js and React
6. **Code Quality Assessment**:
Strengths:
- Well-organized directory structure
- Consistent use of TypeScript
- Modular component architecture
- Separation of concerns (UI components, hooks, pages)
- Comprehensive UI component library
- Modern development practices (App Router, TypeScript)
Areas for consideration:
- Large number of UI components might indicate need for documentation
- Multiple background components might need performance optimization
- Could benefit from API documentation
- Might need testing infrastructure (not visible in structure)
Additional Observations:
- The project uses modern React patterns (hooks)
- Strong focus on UI/UX with multiple animation options
- Built with scalability in mind (modular structure)
- Follows Next.js best practices
- Uses modern tooling (pnpm, TypeScript, Tailwind)
This appears to be a well-structured, modern web application with a strong focus on UI components and user experience. The architecture suggests it's built for scalability and maintainability.
---
## Detailed Code Analysis
I'll analyze each aspect of this Next.js project:
1. **Code Quality**
- Strong TypeScript usage with proper type definitions and configurations
- Consistent code formatting and organization following Next.js 13+ conventions
- Clean project structure with clear separation of concerns
- Good use of modern React patterns and Next.js features
- Well-structured configuration files (next.config.js, tailwind.config.js, etc.)
- Follows React best practices with components organization
2. **Design Patterns**
- Component-based architecture following React principles
- Server-side rendering approach using Next.js App Router
- Atomic design pattern evident in UI components organization
- Utility-first CSS approach with Tailwind
- Singleton pattern for configuration management
- Dependency injection through React context (seen in theme implementation)
3. **Key Dependencies**
- Core: Next.js 14.2, React 19, TypeScript
- UI: Radix UI components, Tailwind CSS, shadcn/ui
- 3D: Three.js, React Three Fiber
- Forms: React Hook Form, Zod validation
- Utilities: clsx, tailwind-merge
- Development: PostCSS, TypeScript, ESLint
4. **Potential Issues**
- Build errors being ignored (typescript.ignoreBuildErrors, eslint.ignoreDuringBuilds)
- Unoptimized images configuration could impact performance
- Missing error boundaries and proper error handling
- Security considerations for client-side rendering of 3D content
- No explicit API rate limiting or security headers
- Missing proper environment variable handling
5. **Testing Strategy**
- No visible testing setup (Jest, React Testing Library, etc.)
- Missing unit tests, integration tests, and e2e tests
- Should add testing framework and implement test coverage
- Consider adding Cypress or Playwright for e2e testing
6. **Documentation**
- Good README with clear project structure and setup instructions
- Missing JSDoc comments for components and functions
- Could benefit from more inline documentation
- API documentation could be improved
- Missing contribution guidelines and deployment docs
7. **Maintainability**
Strengths:
- Clear project structure
- Modern tooling and frameworks
- Type safety with TypeScript
- Component modularity
- Consistent coding style
Areas for Improvement:
- Add comprehensive testing
- Improve error handling
- Better documentation
- Implement proper CI/CD
- Add proper logging system
- Consider performance monitoring
Additional Recommendations:
1. Security:
```typescript
// Add security headers
const securityHeaders = [
{ key: 'X-XSS-Protection', value: '1; mode=block' },
{ key: 'X-Frame-Options', value: 'SAMEORIGIN' },
{ key: 'X-Content-Type-Options', value: 'nosniff' },
]
```
2. Error Handling:
```typescript
// Add error boundary component
class ErrorBoundary extends React.Component {
static getDerivedStateFromError(error) {
return { hasError: true };
}
componentDidCatch(error, errorInfo) {
// Log error to service
}
}
```
3. Testing Setup:
```json
// Add to package.json
{
"jest": {
"setupFilesAfterEnv": ["<rootDir>/jest.setup.js"],
"testEnvironment": "jsdom"
},
"scripts": {
"test": "jest",
"test:watch": "jest --watch",
"test:coverage": "jest --coverage"
}
}
```
4. Performance Monitoring:
```typescript
// Add performance monitoring
export function reportWebVitals(metric) {
if (metric.label === 'web-vital') {
console.log(metric); // Send to analytics
}
}
```
The project has a solid foundation but would benefit from these improvements for production readiness.
---
## Security & Best Practices Analysis
I'll analyze the repository based on the provided files and structure:
1. **Security Issues**:
- ⚠️ ESLint and TypeScript build errors are being ignored (`ignoreDuringBuilds: true` and `ignoreBuildErrors: true`), which could mask security-related issues
- ⚠️ Image optimization is disabled (`unoptimized: true`), which could lead to performance and security concerns
- ✅ Remote image patterns are properly restricted to specific domains (unsplash.com)
- ⚠️ No explicit CSP (Content Security Policy) configuration visible
2. **Secret Management**:
- ✅ Uses environment variables (process.env)
- ⚠️ No visible secret management solution or environment validation
- 🔍 Recommend implementing a secret management solution (e.g., Vault, AWS Secrets Manager)
3. **Dependencies**:
- Cannot fully assess without package.json
- Using Next.js and Tailwind CSS which are generally well-maintained
- 🔍 Recommend implementing dependency scanning (e.g., Snyk, OWASP Dependency-Check)
4. **Best Practices**:
✅ Good:
- TypeScript implementation with strict mode enabled
- Proper module resolution and ES6 target
- Well-organized file structure
- Using modern module systems
- Proper tailwind configuration
⚠️ Concerns:
- Disabling TypeScript and ESLint checks in production
- Multiple next.config files (both .js and .mjs)
- No visible testing configuration
5. **Configuration**:
✅ Good:
- Environment-based configuration for basePath
- Proper TypeScript configuration
- Well-structured Tailwind configuration
⚠️ Concerns:
- Duplicate next.config files might cause confusion
- Some hardcoded values could be externalized
- No visible staging/production environment separation
6. **Error Handling**:
- Cannot fully assess without application code
- ⚠️ Disabling TypeScript and ESLint checks could mask error handling issues
- 🔍 Recommend implementing proper error boundaries and logging
7. **Recommendations**:
Security:
```typescript
// Enable TypeScript and ESLint checks
const nextConfig = {
eslint: {
ignoreDuringBuilds: false,
},
typescript: {
ignoreBuildErrors: false,
}
}
```
Configuration:
```javascript
// Consolidate next.config files
// Add proper environment validation
const validateEnv = () => {
const required = ['API_KEY', 'DATABASE_URL'];
required.forEach(key => {
if (!process.env[key]) throw new Error(`Missing ${key}`);
});
}
```
Best Practices:
1. Implement proper CSP:
```javascript
// next.config.js
{
async headers() {
return [
{
source: '/:path*',
headers: [
{
key: 'Content-Security-Policy',
value: "default-src 'self';"
}
]
}
]
}
}
```
2. Enable image optimization:
```javascript
images: {
unoptimized: false,
domains: ['images.unsplash.com'],
}
```
Additional Recommendations:
1. Implement security headers
2. Add input validation
3. Set up proper error boundaries
4. Add proper testing configuration
5. Implement API rate limiting
6. Add security scanning in CI/CD
7. Implement proper logging
8. Add environment validation
9. Consider implementing authentication/authorization
10. Add proper CORS configuration
Environment Setup:
```bash
# .env.example
NODE_ENV=development
API_KEY=
DATABASE_URL=
```
This analysis is based on the configuration files provided. For a more comprehensive security assessment, access to the actual application code, API endpoints, and authentication mechanisms would be needed.
---
## Recommendations Summary
Based on the analysis, here are the key recommendations for this repository:
1. **Immediate Actions**: Critical issues that should be addressed promptly
2. **Code Quality Improvements**: Suggestions for better maintainability
3. **Security Enhancements**: Steps to improve security posture
4. **Documentation**: Areas where documentation could be enhanced
5. **Architecture**: Potential architectural improvements
---
*This analysis was generated using AI and should be reviewed by human developers for accuracy and context.*

View File

@ -0,0 +1,391 @@
#!/usr/bin/env python3
"""
GitHub Repository AI Analysis Tool
Analyzes GitHub repositories using Claude API for comprehensive code insights.
"""
import os
import git
import json
import requests
import tempfile
import shutil
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import argparse
from datetime import datetime
import mimetypes
import base64
class GitHubRepoAnalyzer:
def __init__(self, anthropic_api_key: str):
self.api_key = anthropic_api_key
self.api_url = "https://api.anthropic.com/v1/messages"
self.temp_dir = None
# File extensions to analyze
self.code_extensions = {
'.py', '.js', '.ts', '.jsx', '.tsx', '.java', '.cpp', '.c', '.h',
'.cs', '.php', '.rb', '.go', '.rs', '.swift', '.kt', '.scala',
'.html', '.css', '.scss', '.sass', '.less', '.vue', '.svelte',
'.sql', '.sh', '.bash', '.yml', '.yaml', '.json', '.xml',
'.dockerfile', '.md', '.rst', '.txt'
}
# Files to always include in analysis
self.important_files = {
'README.md', 'readme.md', 'README.txt', 'readme.txt',
'package.json', 'requirements.txt', 'Cargo.toml', 'pom.xml',
'build.gradle', 'Makefile', 'dockerfile', 'Dockerfile',
'docker-compose.yml', '.gitignore', 'setup.py', 'pyproject.toml'
}
def clone_repository(self, repo_url: str) -> str:
"""Clone GitHub repository to temporary directory."""
print(f"Cloning repository: {repo_url}")
self.temp_dir = tempfile.mkdtemp(prefix="github_analysis_")
try:
git.Repo.clone_from(repo_url, self.temp_dir)
print(f"Repository cloned to: {self.temp_dir}")
return self.temp_dir
except git.exc.GitCommandError as e:
raise Exception(f"Failed to clone repository: {e}")
def get_file_info(self, file_path: Path) -> Dict:
"""Get file information and content."""
try:
# Check file size (skip files larger than 1MB)
if file_path.stat().st_size > 1024 * 1024:
return {
'path': str(file_path.relative_to(self.temp_dir)),
'size': file_path.stat().st_size,
'content': '[File too large to analyze]',
'encoding': 'skipped'
}
# Try to read as text
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
encoding = 'utf-8'
except UnicodeDecodeError:
# If text fails, try binary for certain file types
with open(file_path, 'rb') as f:
raw_content = f.read()
if len(raw_content) < 10000: # Only encode small binary files
content = base64.b64encode(raw_content).decode('ascii')
encoding = 'base64'
else:
content = '[Binary file - content not included]'
encoding = 'binary'
return {
'path': str(file_path.relative_to(self.temp_dir)),
'size': file_path.stat().st_size,
'content': content,
'encoding': encoding,
'mime_type': mimetypes.guess_type(str(file_path))[0]
}
except Exception as e:
return {
'path': str(file_path.relative_to(self.temp_dir)),
'error': str(e),
'content': '[Error reading file]'
}
def scan_repository(self, max_files: int = 50) -> Dict:
"""Scan repository and collect file information."""
print("Scanning repository structure...")
repo_data = {
'structure': [],
'files': [],
'stats': {
'total_files': 0,
'analyzed_files': 0,
'total_size': 0,
'languages': {}
}
}
# Get directory structure
for root, dirs, files in os.walk(self.temp_dir):
# Skip hidden directories and common build/cache directories
dirs[:] = [d for d in dirs if not d.startswith('.') and
d not in {'node_modules', '__pycache__', 'build', 'dist', 'target', 'venv', 'env'}]
level = root.replace(self.temp_dir, '').count(os.sep)
indent = ' ' * level
folder_name = os.path.basename(root) if root != self.temp_dir else '.'
repo_data['structure'].append(f"{indent}{folder_name}/")
# Process files
for file in files:
if file.startswith('.'):
continue
file_path = Path(root) / file
repo_data['stats']['total_files'] += 1
repo_data['stats']['total_size'] += file_path.stat().st_size
# Track languages
ext = file_path.suffix.lower()
if ext:
repo_data['stats']['languages'][ext] = repo_data['stats']['languages'].get(ext, 0) + 1
# Add to structure
repo_data['structure'].append(f"{indent} {file}")
# Decide if we should analyze this file
should_analyze = (
file.lower() in self.important_files or
ext in self.code_extensions or
repo_data['stats']['analyzed_files'] < max_files
)
if should_analyze and repo_data['stats']['analyzed_files'] < max_files:
file_info = self.get_file_info(file_path)
repo_data['files'].append(file_info)
repo_data['stats']['analyzed_files'] += 1
return repo_data
def call_claude_api(self, prompt: str, max_tokens: int = 4000) -> str:
"""Make API call to Claude."""
headers = {
"Content-Type": "application/json",
"x-api-key": self.api_key,
"anthropic-version": "2023-06-01"
}
data = {
"model": "claude-3-5-sonnet-20241022",
"max_tokens": max_tokens,
"messages": [
{"role": "user", "content": prompt}
]
}
try:
response = requests.post(self.api_url, headers=headers, json=data)
response.raise_for_status()
result = response.json()
return result['content'][0]['text']
except requests.exceptions.RequestException as e:
raise Exception(f"API request failed: {e}")
def analyze_repository_overview(self, repo_data: Dict) -> str:
"""Get high-level repository analysis."""
print("Analyzing repository overview...")
structure_summary = "\n".join(repo_data['structure'][:100]) # Limit structure size
prompt = f"""
Analyze this GitHub repository and provide a comprehensive overview:
REPOSITORY STRUCTURE:
{structure_summary}
STATISTICS:
- Total files: {repo_data['stats']['total_files']}
- Files analyzed: {repo_data['stats']['analyzed_files']}
- Total size: {repo_data['stats']['total_size']} bytes
- Languages found: {dict(list(repo_data['stats']['languages'].items())[:10])}
Please provide:
1. **Project Type & Purpose**: What kind of project is this?
2. **Technology Stack**: What technologies, frameworks, and languages are used?
3. **Architecture Overview**: How is the project structured?
4. **Key Components**: What are the main modules/components?
5. **Development Setup**: What's needed to run this project?
6. **Code Quality Assessment**: Initial observations about code organization
"""
return self.call_claude_api(prompt)
def analyze_code_files(self, repo_data: Dict) -> str:
"""Analyze individual code files."""
print("Analyzing code files...")
# Prepare file contents for analysis
files_content = []
for file_info in repo_data['files'][:20]: # Limit to first 20 files
if file_info.get('encoding') == 'utf-8' and len(file_info.get('content', '')) < 5000:
files_content.append(f"=== {file_info['path']} ===\n{file_info['content']}\n")
files_text = "\n".join(files_content)
prompt = f"""
Analyze these key files from the repository:
{files_text}
Please provide detailed analysis covering:
1. **Code Quality**: Code style, organization, and best practices
2. **Design Patterns**: What patterns and architectural approaches are used?
3. **Dependencies & Libraries**: Key external dependencies identified
4. **Potential Issues**: Any code smells, security concerns, or improvements needed
5. **Testing Strategy**: How is testing implemented (if at all)?
6. **Documentation**: Quality of inline documentation and comments
7. **Maintainability**: How maintainable and extensible is this code?
"""
return self.call_claude_api(prompt, max_tokens=6000)
def analyze_security_and_best_practices(self, repo_data: Dict) -> str:
"""Analyze security and best practices."""
print("Analyzing security and best practices...")
# Look for security-sensitive files
security_files = []
for file_info in repo_data['files']:
path_lower = file_info['path'].lower()
if any(term in path_lower for term in ['config', 'env', 'secret', 'key', 'auth', 'security']):
if file_info.get('encoding') == 'utf-8':
security_files.append(f"=== {file_info['path']} ===\n{file_info['content'][:2000]}\n")
security_content = "\n".join(security_files[:10])
prompt = f"""
Analyze this repository for security and best practices:
SECURITY-RELEVANT FILES:
{security_content}
FILE STRUCTURE ANALYSIS:
{json.dumps(repo_data['stats'], indent=2)}
Please analyze:
1. **Security Issues**: Potential security vulnerabilities or concerns
2. **Secret Management**: How are secrets/credentials handled?
3. **Dependencies**: Are there any vulnerable dependencies?
4. **Best Practices**: Adherence to language/framework best practices
5. **Configuration**: Are configurations properly externalized?
6. **Error Handling**: How are errors handled throughout the codebase?
7. **Recommendations**: Specific suggestions for improvement
"""
return self.call_claude_api(prompt, max_tokens=5000)
def generate_comprehensive_report(self, repo_url: str, overview: str, code_analysis: str, security_analysis: str) -> str:
"""Generate final comprehensive report."""
print("Generating comprehensive report...")
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
report = f"""
# GitHub Repository Analysis Report
**Repository:** {repo_url}
**Analysis Date:** {timestamp}
**Analyzed by:** Claude AI Assistant
---
## Executive Summary
{overview}
---
## Detailed Code Analysis
{code_analysis}
---
## Security & Best Practices Analysis
{security_analysis}
---
## Recommendations Summary
Based on the analysis, here are the key recommendations for this repository:
1. **Immediate Actions**: Critical issues that should be addressed promptly
2. **Code Quality Improvements**: Suggestions for better maintainability
3. **Security Enhancements**: Steps to improve security posture
4. **Documentation**: Areas where documentation could be enhanced
5. **Architecture**: Potential architectural improvements
---
*This analysis was generated using AI and should be reviewed by human developers for accuracy and context.*
"""
return report
def analyze(self, repo_url: str, output_file: Optional[str] = None) -> str:
"""Main analysis function."""
try:
# Clone repository
self.clone_repository(repo_url)
# Scan repository structure and files
repo_data = self.scan_repository()
# Perform different types of analysis
overview = self.analyze_repository_overview(repo_data)
code_analysis = self.analyze_code_files(repo_data)
security_analysis = self.analyze_security_and_best_practices(repo_data)
# Generate comprehensive report
final_report = self.generate_comprehensive_report(
repo_url, overview, code_analysis, security_analysis
)
# Save report if output file specified
if output_file:
with open(output_file, 'w', encoding='utf-8') as f:
f.write(final_report)
print(f"Report saved to: {output_file}")
return final_report
finally:
# Cleanup temporary directory
if self.temp_dir and os.path.exists(self.temp_dir):
shutil.rmtree(self.temp_dir)
print("Temporary files cleaned up")
def main():
parser = argparse.ArgumentParser(description="Analyze GitHub repository using Claude AI")
parser.add_argument("repo_url", help="GitHub repository URL")
parser.add_argument("--api-key", required=True, help="Anthropic API key")
parser.add_argument("--output", "-o", help="Output file path (optional)")
parser.add_argument("--max-files", type=int, default=50, help="Maximum files to analyze")
args = parser.parse_args()
# Initialize analyzer
analyzer = GitHubRepoAnalyzer(args.api_key)
try:
print("Starting GitHub repository analysis...")
print("=" * 50)
# Perform analysis
report = analyzer.analyze(args.repo_url, args.output)
# Print report if no output file specified
if not args.output:
print("\n" + "=" * 50)
print("ANALYSIS REPORT")
print("=" * 50)
print(report)
print("\nAnalysis completed successfully!")
except Exception as e:
print(f"Error during analysis: {e}")
return 1
return 0
if __name__ == "__main__":
exit(main())

View File

@ -0,0 +1,391 @@
#!/usr/bin/env python3
"""
GitHub Repository AI Analysis Tool
Analyzes GitHub repositories using Claude API for comprehensive code insights.
"""
import os
import git
import json
import tempfile
import shutil
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import argparse
from datetime import datetime
import mimetypes
import base64
from dotenv import load_dotenv
import anthropic
class GitHubRepoAnalyzer:
def __init__(self, anthropic_api_key: str = None):
# Load environment variables
load_dotenv()
# Get API key from parameter or environment
self.api_key = anthropic_api_key or os.getenv('ANTHROPIC_API_KEY')
if not self.api_key:
raise ValueError("Anthropic API key not found. Please set ANTHROPIC_API_KEY in .env file or pass as parameter.")
# Initialize Anthropic client
self.client = anthropic.Anthropic(api_key=self.api_key)
self.temp_dir = None
# File extensions to analyze
self.code_extensions = {
'.py', '.js', '.ts', '.jsx', '.tsx', '.java', '.cpp', '.c', '.h',
'.cs', '.php', '.rb', '.go', '.rs', '.swift', '.kt', '.scala',
'.html', '.css', '.scss', '.sass', '.less', '.vue', '.svelte',
'.sql', '.sh', '.bash', '.yml', '.yaml', '.json', '.xml',
'.dockerfile', '.md', '.rst', '.txt'
}
# Files to always include in analysis
self.important_files = {
'README.md', 'readme.md', 'README.txt', 'readme.txt',
'package.json', 'requirements.txt', 'Cargo.toml', 'pom.xml',
'build.gradle', 'Makefile', 'dockerfile', 'Dockerfile',
'docker-compose.yml', '.gitignore', 'setup.py', 'pyproject.toml'
}
def clone_repository(self, repo_url: str) -> str:
"""Clone GitHub repository to temporary directory."""
print(f"Cloning repository: {repo_url}")
self.temp_dir = tempfile.mkdtemp(prefix="github_analysis_")
try:
git.Repo.clone_from(repo_url, self.temp_dir)
print(f"Repository cloned to: {self.temp_dir}")
return self.temp_dir
except git.exc.GitCommandError as e:
raise Exception(f"Failed to clone repository: {e}")
def get_file_info(self, file_path: Path) -> Dict:
"""Get file information and content."""
try:
# Check file size (skip files larger than 1MB)
if file_path.stat().st_size > 1024 * 1024:
return {
'path': str(file_path.relative_to(self.temp_dir)),
'size': file_path.stat().st_size,
'content': '[File too large to analyze]',
'encoding': 'skipped'
}
# Try to read as text
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
encoding = 'utf-8'
except UnicodeDecodeError:
# If text fails, try binary for certain file types
with open(file_path, 'rb') as f:
raw_content = f.read()
if len(raw_content) < 10000: # Only encode small binary files
content = base64.b64encode(raw_content).decode('ascii')
encoding = 'base64'
else:
content = '[Binary file - content not included]'
encoding = 'binary'
return {
'path': str(file_path.relative_to(self.temp_dir)),
'size': file_path.stat().st_size,
'content': content,
'encoding': encoding,
'mime_type': mimetypes.guess_type(str(file_path))[0]
}
except Exception as e:
return {
'path': str(file_path.relative_to(self.temp_dir)),
'error': str(e),
'content': '[Error reading file]'
}
def scan_repository(self, max_files: int = 50) -> Dict:
"""Scan repository and collect file information."""
print("Scanning repository structure...")
repo_data = {
'structure': [],
'files': [],
'stats': {
'total_files': 0,
'analyzed_files': 0,
'total_size': 0,
'languages': {}
}
}
# Get directory structure
for root, dirs, files in os.walk(self.temp_dir):
# Skip hidden directories and common build/cache directories
dirs[:] = [d for d in dirs if not d.startswith('.') and
d not in {'node_modules', '__pycache__', 'build', 'dist', 'target', 'venv', 'env'}]
level = root.replace(self.temp_dir, '').count(os.sep)
indent = ' ' * level
folder_name = os.path.basename(root) if root != self.temp_dir else '.'
repo_data['structure'].append(f"{indent}{folder_name}/")
# Process files
for file in files:
if file.startswith('.'):
continue
file_path = Path(root) / file
repo_data['stats']['total_files'] += 1
repo_data['stats']['total_size'] += file_path.stat().st_size
# Track languages
ext = file_path.suffix.lower()
if ext:
repo_data['stats']['languages'][ext] = repo_data['stats']['languages'].get(ext, 0) + 1
# Add to structure
repo_data['structure'].append(f"{indent} {file}")
# Decide if we should analyze this file
should_analyze = (
file.lower() in self.important_files or
ext in self.code_extensions or
repo_data['stats']['analyzed_files'] < max_files
)
if should_analyze and repo_data['stats']['analyzed_files'] < max_files:
file_info = self.get_file_info(file_path)
repo_data['files'].append(file_info)
repo_data['stats']['analyzed_files'] += 1
return repo_data
def call_claude_api(self, prompt: str, max_tokens: int = 4000) -> str:
"""Make API call to Claude using official Anthropic client."""
try:
message = self.client.messages.create(
model="claude-3-sonnet-20240229",
max_tokens=max_tokens,
messages=[
{"role": "user", "content": prompt}
]
)
return message.content[0].text
except Exception as e:
raise Exception(f"Claude API call failed: {e}")
def analyze_repository_overview(self, repo_data: Dict) -> str:
"""Get high-level repository analysis."""
print("Analyzing repository overview...")
structure_summary = "\n".join(repo_data['structure'][:100]) # Limit structure size
prompt = f"""
Analyze this GitHub repository and provide a comprehensive overview:
REPOSITORY STRUCTURE:
{structure_summary}
STATISTICS:
- Total files: {repo_data['stats']['total_files']}
- Files analyzed: {repo_data['stats']['analyzed_files']}
- Total size: {repo_data['stats']['total_size']} bytes
- Languages found: {dict(list(repo_data['stats']['languages'].items())[:10])}
Please provide:
1. **Project Type & Purpose**: What kind of project is this?
2. **Technology Stack**: What technologies, frameworks, and languages are used?
3. **Architecture Overview**: How is the project structured?
4. **Key Components**: What are the main modules/components?
5. **Development Setup**: What's needed to run this project?
6. **Code Quality Assessment**: Initial observations about code organization
"""
return self.call_claude_api(prompt)
def analyze_code_files(self, repo_data: Dict) -> str:
"""Analyze individual code files."""
print("Analyzing code files...")
# Prepare file contents for analysis
files_content = []
for file_info in repo_data['files'][:20]: # Limit to first 20 files
if file_info.get('encoding') == 'utf-8' and len(file_info.get('content', '')) < 5000:
files_content.append(f"=== {file_info['path']} ===\n{file_info['content']}\n")
files_text = "\n".join(files_content)
prompt = f"""
Analyze these key files from the repository:
{files_text}
Please provide detailed analysis covering:
1. **Code Quality**: Code style, organization, and best practices
2. **Design Patterns**: What patterns and architectural approaches are used?
3. **Dependencies & Libraries**: Key external dependencies identified
4. **Potential Issues**: Any code smells, security concerns, or improvements needed
5. **Testing Strategy**: How is testing implemented (if at all)?
6. **Documentation**: Quality of inline documentation and comments
7. **Maintainability**: How maintainable and extensible is this code?
"""
return self.call_claude_api(prompt, max_tokens=6000)
def analyze_security_and_best_practices(self, repo_data: Dict) -> str:
"""Analyze security and best practices."""
print("Analyzing security and best practices...")
# Look for security-sensitive files
security_files = []
for file_info in repo_data['files']:
path_lower = file_info['path'].lower()
if any(term in path_lower for term in ['config', 'env', 'secret', 'key', 'auth', 'security']):
if file_info.get('encoding') == 'utf-8':
security_files.append(f"=== {file_info['path']} ===\n{file_info['content'][:2000]}\n")
security_content = "\n".join(security_files[:10])
prompt = f"""
Analyze this repository for security and best practices:
SECURITY-RELEVANT FILES:
{security_content}
FILE STRUCTURE ANALYSIS:
{json.dumps(repo_data['stats'], indent=2)}
Please analyze:
1. **Security Issues**: Potential security vulnerabilities or concerns
2. **Secret Management**: How are secrets/credentials handled?
3. **Dependencies**: Are there any vulnerable dependencies?
4. **Best Practices**: Adherence to language/framework best practices
5. **Configuration**: Are configurations properly externalized?
6. **Error Handling**: How are errors handled throughout the codebase?
7. **Recommendations**: Specific suggestions for improvement
"""
return self.call_claude_api(prompt, max_tokens=5000)
def generate_comprehensive_report(self, repo_url: str, overview: str, code_analysis: str, security_analysis: str) -> str:
"""Generate final comprehensive report."""
print("Generating comprehensive report...")
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
report = f"""
# GitHub Repository Analysis Report
**Repository:** {repo_url}
**Analysis Date:** {timestamp}
**Analyzed by:** Claude AI Assistant
---
## Executive Summary
{overview}
---
## Detailed Code Analysis
{code_analysis}
---
## Security & Best Practices Analysis
{security_analysis}
---
## Recommendations Summary
Based on the analysis, here are the key recommendations for this repository:
1. **Immediate Actions**: Critical issues that should be addressed promptly
2. **Code Quality Improvements**: Suggestions for better maintainability
3. **Security Enhancements**: Steps to improve security posture
4. **Documentation**: Areas where documentation could be enhanced
5. **Architecture**: Potential architectural improvements
---
*This analysis was generated using AI and should be reviewed by human developers for accuracy and context.*
"""
return report
def analyze(self, repo_url: str, output_file: Optional[str] = None) -> str:
"""Main analysis function."""
try:
# Clone repository
self.clone_repository(repo_url)
# Scan repository structure and files
repo_data = self.scan_repository()
# Perform different types of analysis
overview = self.analyze_repository_overview(repo_data)
code_analysis = self.analyze_code_files(repo_data)
security_analysis = self.analyze_security_and_best_practices(repo_data)
# Generate comprehensive report
final_report = self.generate_comprehensive_report(
repo_url, overview, code_analysis, security_analysis
)
# Save report if output file specified
if output_file:
with open(output_file, 'w', encoding='utf-8') as f:
f.write(final_report)
print(f"Report saved to: {output_file}")
return final_report
finally:
# Cleanup temporary directory
if self.temp_dir and os.path.exists(self.temp_dir):
shutil.rmtree(self.temp_dir)
print("Temporary files cleaned up")
def main():
parser = argparse.ArgumentParser(description="Analyze GitHub repository using Claude AI")
parser.add_argument("repo_url", help="GitHub repository URL")
parser.add_argument("--api-key", help="Anthropic API key (optional if set in .env)")
parser.add_argument("--output", "-o", help="Output file path (optional)")
parser.add_argument("--max-files", type=int, default=50, help="Maximum files to analyze")
args = parser.parse_args()
try:
# Initialize analyzer
analyzer = GitHubRepoAnalyzer(args.api_key)
print("Starting GitHub repository analysis...")
print("=" * 50)
# Perform analysis
report = analyzer.analyze(args.repo_url, args.output)
# Print report if no output file specified
if not args.output:
print("\n" + "=" * 50)
print("ANALYSIS REPORT")
print("=" * 50)
print(report)
print("\nAnalysis completed successfully!")
except Exception as e:
print(f"Error during analysis: {e}")
return 1
return 0
if __name__ == "__main__":
exit(main())

View File

@ -0,0 +1,69 @@
# Core AI and API
anthropic>=0.7.0
openai>=1.0.0
# Environment management
python-dotenv>=1.0.0
# Git operations
GitPython>=3.1.0
# PDF generation
reportlab>=4.0.0
matplotlib>=3.7.0
pillow>=10.0.0
# Code analysis and parsing
ast-comments>=1.1.0
astroid>=3.0.0
pygments>=2.15.0
radon>=6.0.1
bandit>=1.7.5
flake8>=6.0.0
pylint>=3.0.0
# File operations and utilities
pathlib2>=2.3.7
chardet>=5.2.0
python-magic>=0.4.27
# Async operations
aiohttp>=3.8.0
aiofiles>=23.0.0
asyncio-throttle>=1.0.2
# Data processing
pandas>=2.0.0
numpy>=1.24.0
python-dateutil>=2.8.0
# Web scraping (for additional repo info)
requests>=2.31.0
beautifulsoup4>=4.12.0
# Testing and code quality
pytest>=7.4.0
pytest-asyncio>=0.21.0
coverage>=7.3.0
# Additional utilities for advanced analysis
networkx>=3.1.0 # For dependency graph analysis
graphviz>=0.20.0 # For visualization
jinja2>=3.1.0 # For report templating
markdown>=3.4.0 # For markdown processing
pyyaml>=6.0.0 # For YAML config files
toml>=0.10.2 # For TOML config files
xmltodict>=0.13.0 # For XML processing
# Performance monitoring
psutil>=5.9.0
memory-profiler>=0.61.0
# Progress bars and UI
tqdm>=4.65.0
rich>=13.5.0
click>=8.1.0
# Security scanning
safety>=2.3.0
pip-audit>=2.6.0

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,46 @@
# AI Analysis Service Environment Configuration
# Service Configuration
PORT=8022
HOST=0.0.0.0
NODE_ENV=development
# AI API Keys
ANTHROPIC_API_KEY=your_anthropic_api_key_here
# Database Configuration
POSTGRES_HOST=localhost
POSTGRES_PORT=5432
POSTGRES_DB=dev_pipeline
POSTGRES_USER=pipeline_admin
POSTGRES_PASSWORD=secure_pipeline_2024
# Redis Configuration
REDIS_HOST=localhost
REDIS_PORT=6379
REDIS_PASSWORD=redis_secure_2024
REDIS_DB=0
# MongoDB Configuration
MONGODB_URL=mongodb://pipeline_admin:mongo_secure_2024@localhost:27017/
MONGODB_DB=repo_analyzer
# JWT Configuration
JWT_ACCESS_SECRET=access-secret-key-2024-tech4biz-secure_pipeline_2024
# Service URLs
USER_AUTH_SERVICE_URL=http://localhost:8011
# Analysis Configuration
MAX_FILES_PER_ANALYSIS=100
MAX_FILE_SIZE_MB=2
ANALYSIS_TIMEOUT_SECONDS=300
# Memory System Configuration
WORKING_MEMORY_TTL=3600
EPISODIC_RETENTION_DAYS=365
PERSISTENT_MEMORY_THRESHOLD=0.8
# Logging Configuration
LOG_LEVEL=INFO
LOG_FILE_PATH=/app/logs/ai-analysis.log

View File

@ -0,0 +1,104 @@
#!/bin/bash
# Database Migration Script using psql
# Executes the complete 001-schema.sql file
set -e # Exit on any error
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Load environment variables
if [ -f .env ]; then
export $(cat .env | grep -v '^#' | xargs)
fi
# Database connection parameters
DB_HOST=${POSTGRES_HOST:-localhost}
DB_PORT=${POSTGRES_PORT:-5432}
DB_NAME=${POSTGRES_DB:-dev_pipeline}
DB_USER=${POSTGRES_USER:-pipeline_admin}
DB_PASSWORD=${POSTGRES_PASSWORD:-secure_pipeline_2024}
# Schema file
SCHEMA_FILE="001-schema.sql"
echo -e "${BLUE}🔧 AI Repository Analysis Database Migration${NC}"
echo "=================================================="
echo -e "Database: ${YELLOW}${DB_NAME}@${DB_HOST}:${DB_PORT}${NC}"
echo -e "User: ${YELLOW}${DB_USER}${NC}"
echo -e "Schema file: ${YELLOW}${SCHEMA_FILE}${NC}"
echo ""
# Check if psql is available
if ! command -v psql &> /dev/null; then
echo -e "${RED}❌ psql command not found!${NC}"
echo "Please install PostgreSQL client tools:"
echo " Ubuntu/Debian: sudo apt-get install postgresql-client"
echo " CentOS/RHEL: sudo yum install postgresql"
echo " macOS: brew install postgresql"
exit 1
fi
# Check if schema file exists
if [ ! -f "$SCHEMA_FILE" ]; then
echo -e "${RED}❌ Schema file not found: ${SCHEMA_FILE}${NC}"
exit 1
fi
echo -e "${BLUE}• Executing migration...${NC}"
# Set password for psql
export PGPASSWORD="$DB_PASSWORD"
# Run migration
if psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" \
-f "$SCHEMA_FILE" \
-v ON_ERROR_STOP=1 \
--echo-errors \
--echo-queries; then
echo -e "${GREEN}✅ Migration completed successfully!${NC}"
# Verify migration
echo -e "${BLUE}• Verifying migration...${NC}"
TABLES=$(psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -c "
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'public'
AND table_name IN ('code_embeddings', 'query_embeddings', 'knowledge_embeddings',
'repository_metadata', 'analysis_sessions', 'file_analysis_history')
ORDER BY table_name;
" | tr -d ' ')
if [ -n "$TABLES" ]; then
TABLE_COUNT=$(echo "$TABLES" | wc -l)
echo -e "${GREEN}✓ Found ${TABLE_COUNT} core tables: ${TABLES}${NC}"
else
echo -e "${YELLOW}⚠ Could not verify table creation${NC}"
fi
# Check for pgvector extension
VECTOR_AVAILABLE=$(psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -c "
SELECT EXISTS(SELECT 1 FROM pg_extension WHERE extname = 'vector');
" | tr -d ' ')
if [ "$VECTOR_AVAILABLE" = "t" ]; then
echo -e "${GREEN}✓ pgvector extension is available${NC}"
else
echo -e "${YELLOW}⚠ pgvector extension not available - vector operations will be limited${NC}"
fi
echo ""
echo -e "${GREEN}🚀 Database migration completed successfully!${NC}"
echo -e "${GREEN}📊 Production-level database ready for AI repository analysis${NC}"
else
echo -e "${RED}❌ Migration failed!${NC}"
exit 1
fi

View File

@ -0,0 +1,203 @@
#!/usr/bin/env python3
"""
Database Migration Script using psql command
Executes the complete 001-schema.sql file using PostgreSQL's psql command
"""
import os
import subprocess
import sys
from dotenv import load_dotenv
import logging
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
logger = logging.getLogger(__name__)
def run_migration():
"""Run the database migration using psql command."""
load_dotenv()
# Database connection parameters
db_config = {
'host': os.getenv('POSTGRES_HOST', 'localhost'),
'port': os.getenv('POSTGRES_PORT', 5432),
'database': os.getenv('POSTGRES_DB', 'dev_pipeline'),
'user': os.getenv('POSTGRES_USER', 'pipeline_admin'),
'password': os.getenv('POSTGRES_PASSWORD', 'secure_pipeline_2024')
}
# Schema file path
schema_file = os.path.join(os.path.dirname(__file__), '001-schema.sql')
if not os.path.exists(schema_file):
logger.error(f"❌ Schema file not found: {schema_file}")
return False
try:
logger.info("🔧 Starting database migration with psql...")
logger.info(f" • Database: {db_config['database']}@{db_config['host']}:{db_config['port']}")
logger.info(f" • User: {db_config['user']}")
logger.info(f" • Schema file: {schema_file}")
# Set PGPASSWORD environment variable for psql
env = os.environ.copy()
env['PGPASSWORD'] = db_config['password']
# Build psql command
psql_cmd = [
'psql',
'-h', db_config['host'],
'-p', str(db_config['port']),
'-U', db_config['user'],
'-d', db_config['database'],
'-f', schema_file,
'-v', 'ON_ERROR_STOP=1', # Stop on first error
'--echo-errors', # Show errors
'--echo-queries' # Show queries being executed
]
logger.info(" • Executing migration...")
logger.info(f" • Command: {' '.join(psql_cmd)}")
# Run psql command
result = subprocess.run(
psql_cmd,
env=env,
capture_output=True,
text=True,
timeout=300 # 5 minute timeout
)
# Check if psql command exists
if result.returncode == 127:
logger.error("❌ psql command not found. Please install PostgreSQL client tools.")
logger.error(" On Ubuntu/Debian: sudo apt-get install postgresql-client")
logger.error(" On CentOS/RHEL: sudo yum install postgresql")
return False
# Check for errors
if result.returncode != 0:
logger.error(f"❌ Migration failed with return code: {result.returncode}")
if result.stderr:
logger.error("STDERR:")
logger.error(result.stderr)
if result.stdout:
logger.error("STDOUT:")
logger.error(result.stdout)
return False
# Log success
logger.info("✅ Migration completed successfully!")
if result.stdout:
logger.info("Migration output:")
# Filter out common psql output noise
lines = result.stdout.split('\n')
for line in lines:
if line.strip() and not line.startswith('SET') and not line.startswith('NOTICE'):
logger.info(f" {line}")
# Verify migration by checking if key tables exist
logger.info(" • Verifying migration...")
verify_cmd = [
'psql',
'-h', db_config['host'],
'-p', str(db_config['port']),
'-U', db_config['user'],
'-d', db_config['database'],
'-t', # tuples only
'-c', """
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'public'
AND table_name IN ('code_embeddings', 'query_embeddings', 'knowledge_embeddings',
'repository_metadata', 'analysis_sessions', 'file_analysis_history')
ORDER BY table_name;
"""
]
verify_result = subprocess.run(
verify_cmd,
env=env,
capture_output=True,
text=True,
timeout=30
)
if verify_result.returncode == 0:
tables = [line.strip() for line in verify_result.stdout.split('\n') if line.strip()]
logger.info(f" ✓ Found {len(tables)} core tables: {', '.join(tables)}")
else:
logger.warning(" ⚠ Could not verify table creation")
# Check for pgvector extension
vector_cmd = [
'psql',
'-h', db_config['host'],
'-p', str(db_config['port']),
'-U', db_config['user'],
'-d', db_config['database'],
'-t',
'-c', "SELECT EXISTS(SELECT 1 FROM pg_extension WHERE extname = 'vector');"
]
vector_result = subprocess.run(
vector_cmd,
env=env,
capture_output=True,
text=True,
timeout=30
)
if vector_result.returncode == 0:
has_vector = vector_result.stdout.strip() == 't'
if has_vector:
logger.info(" ✓ pgvector extension is available")
else:
logger.warning(" ⚠ pgvector extension not available - vector operations will be limited")
logger.info("🚀 Database migration completed successfully!")
logger.info("📊 Production-level database ready for AI repository analysis")
return True
except subprocess.TimeoutExpired:
logger.error("❌ Migration timed out after 5 minutes")
return False
except FileNotFoundError:
logger.error("❌ psql command not found. Please install PostgreSQL client tools.")
return False
except Exception as e:
logger.error(f"❌ Migration failed: {e}")
return False
def check_psql_available():
"""Check if psql command is available."""
try:
result = subprocess.run(['psql', '--version'], capture_output=True, text=True)
if result.returncode == 0:
logger.info(f"✓ Found psql: {result.stdout.strip()}")
return True
else:
return False
except FileNotFoundError:
return False
if __name__ == "__main__":
logger.info("🔧 AI Repository Analysis Database Migration")
logger.info("=" * 50)
# Check if psql is available
if not check_psql_available():
logger.error("❌ psql command not found!")
logger.error("Please install PostgreSQL client tools:")
logger.error(" Ubuntu/Debian: sudo apt-get install postgresql-client")
logger.error(" CentOS/RHEL: sudo yum install postgresql")
logger.error(" macOS: brew install postgresql")
sys.exit(1)
# Run migration
success = run_migration()
sys.exit(0 if success else 1)

View File

@ -0,0 +1,25 @@
# Core AI and API dependencies
anthropic>=0.7.0
python-dotenv>=1.0.0
# Web framework
fastapi>=0.104.1
uvicorn>=0.24.0
pydantic>=2.5.0
# Git operations
GitPython>=3.1.40
# Database dependencies
redis>=4.5.0
pymongo>=4.5.0
psycopg2-binary>=2.9.7
# Data processing
numpy>=1.24.0
# PDF generation
reportlab>=4.0.0
# Optional: For better performance (if needed)
# sentence-transformers>=2.2.2 # Commented out - using Claude API instead

View File

@ -0,0 +1,94 @@
#!/usr/bin/env python3
"""
AI Analysis Service Database Migration Runner
Runs the database migration for AI Analysis Service during container startup.
"""
import os
import sys
import subprocess
import time
from pathlib import Path
def log(message):
"""Log with timestamp."""
print(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] {message}")
def check_database_connection():
"""Check if database is available."""
try:
import psycopg2
from dotenv import load_dotenv
load_dotenv()
conn = psycopg2.connect(
host=os.getenv('POSTGRES_HOST', 'localhost'),
port=os.getenv('POSTGRES_PORT', 5432),
database=os.getenv('POSTGRES_DB', 'dev_pipeline'),
user=os.getenv('POSTGRES_USER', 'pipeline_admin'),
password=os.getenv('POSTGRES_PASSWORD', 'secure_pipeline_2024')
)
conn.close()
return True
except Exception as e:
log(f"Database connection failed: {e}")
return False
def run_migration():
"""Run the database migration."""
try:
log("Starting AI Analysis Service database migration...")
# Check if database is available
max_retries = 30
retry_count = 0
while retry_count < max_retries:
if check_database_connection():
log("Database connection successful")
break
else:
retry_count += 1
log(f"Database not ready, retrying in 2 seconds... ({retry_count}/{max_retries})")
time.sleep(2)
else:
log("ERROR: Could not connect to database after 60 seconds")
return False
# Run the migration script
schema_file = Path(__file__).parent / "001-schema.sql"
if not schema_file.exists():
log("ERROR: Schema file not found")
return False
log(f"Running migration from {schema_file}")
# Use psql to run the migration
env = os.environ.copy()
env['PGPASSWORD'] = os.getenv('POSTGRES_PASSWORD', 'secure_pipeline_2024')
result = subprocess.run([
'psql',
'-h', os.getenv('POSTGRES_HOST', 'localhost'),
'-p', os.getenv('POSTGRES_PORT', '5432'),
'-U', os.getenv('POSTGRES_USER', 'pipeline_admin'),
'-d', os.getenv('POSTGRES_DB', 'dev_pipeline'),
'-f', str(schema_file),
'-v', 'ON_ERROR_STOP=1'
], env=env, capture_output=True, text=True)
if result.returncode == 0:
log("✅ AI Analysis Service database migration completed successfully")
return True
else:
log(f"❌ Migration failed: {result.stderr}")
return False
except Exception as e:
log(f"❌ Migration error: {e}")
return False
if __name__ == "__main__":
success = run_migration()
sys.exit(0 if success else 1)

View File

@ -0,0 +1,230 @@
#!/usr/bin/env python3
"""
AI Analysis Service HTTP Server
Provides REST API endpoints for repository analysis.
"""
import os
import asyncio
import json
import tempfile
import shutil
from pathlib import Path
from typing import Dict, Any
from datetime import datetime
from fastapi import FastAPI, HTTPException, BackgroundTasks
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
from pydantic import BaseModel
import uvicorn
# Import the AI analysis components
# Note: ai-analyze.py has a hyphen, so we need to handle the import specially
import sys
import importlib.util
# Load the ai-analyze.py module
spec = importlib.util.spec_from_file_location("ai_analyze", "/app/ai-analyze.py")
ai_analyze_module = importlib.util.module_from_spec(spec)
sys.modules["ai_analyze"] = ai_analyze_module
spec.loader.exec_module(ai_analyze_module)
# Now import the classes
from ai_analyze import EnhancedGitHubAnalyzer, get_memory_config
app = FastAPI(
title="AI Analysis Service",
description="AI-powered repository analysis with memory system",
version="1.0.0"
)
# CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Global analyzer instance
analyzer = None
class AnalysisRequest(BaseModel):
repo_path: str
output_format: str = "pdf" # pdf, json
max_files: int = 50
class AnalysisResponse(BaseModel):
success: bool
message: str
analysis_id: str = None
report_path: str = None
stats: Dict[str, Any] = None
@app.on_event("startup")
async def startup_event():
"""Initialize the analyzer on startup."""
global analyzer
try:
# Load environment variables
from dotenv import load_dotenv
load_dotenv()
# Get API key
api_key = os.getenv('ANTHROPIC_API_KEY')
if not api_key:
raise Exception("ANTHROPIC_API_KEY not found in environment")
# Initialize analyzer
config = get_memory_config()
analyzer = EnhancedGitHubAnalyzer(api_key, config)
print("✅ AI Analysis Service initialized successfully")
except Exception as e:
print(f"❌ Failed to initialize AI Analysis Service: {e}")
raise
@app.get("/health")
async def health_check():
"""Health check endpoint."""
return {
"status": "healthy",
"service": "ai-analysis-service",
"timestamp": datetime.now().isoformat(),
"version": "1.0.0"
}
@app.post("/analyze", response_model=AnalysisResponse)
async def analyze_repository(request: AnalysisRequest, background_tasks: BackgroundTasks):
"""Analyze a repository."""
try:
if not analyzer:
raise HTTPException(status_code=500, detail="Analyzer not initialized")
# Generate unique analysis ID
analysis_id = f"analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
# Create temporary directory for this analysis
temp_dir = tempfile.mkdtemp(prefix=f"ai_analysis_{analysis_id}_")
try:
# Run analysis
analysis = await analyzer.analyze_repository_with_memory(
request.repo_path,
max_files=request.max_files
)
# Generate report
if request.output_format == "pdf":
report_path = f"/app/reports/{analysis_id}_analysis.pdf"
analyzer.create_pdf_report(analysis, report_path)
else:
report_path = f"/app/reports/{analysis_id}_analysis.json"
with open(report_path, 'w') as f:
json.dump({
"repo_path": analysis.repo_path,
"total_files": analysis.total_files,
"total_lines": analysis.total_lines,
"languages": analysis.languages,
"code_quality_score": analysis.code_quality_score,
"architecture_assessment": analysis.architecture_assessment,
"security_assessment": analysis.security_assessment,
"executive_summary": analysis.executive_summary,
"file_analyses": [
{
"path": fa.path,
"language": fa.language,
"lines_of_code": fa.lines_of_code,
"severity_score": fa.severity_score,
"issues_found": fa.issues_found,
"recommendations": fa.recommendations
} for fa in analysis.file_analyses
]
}, f, indent=2)
# Calculate stats
stats = {
"total_files": analysis.total_files,
"total_lines": analysis.total_lines,
"languages": analysis.languages,
"code_quality_score": analysis.code_quality_score,
"high_quality_files": len([fa for fa in analysis.file_analyses if fa.severity_score >= 8]),
"medium_quality_files": len([fa for fa in analysis.file_analyses if 5 <= fa.severity_score < 8]),
"low_quality_files": len([fa for fa in analysis.file_analyses if fa.severity_score < 5]),
"total_issues": sum(len(fa.issues_found) for fa in analysis.file_analyses)
}
return AnalysisResponse(
success=True,
message="Analysis completed successfully",
analysis_id=analysis_id,
report_path=report_path,
stats=stats
)
finally:
# Cleanup temporary directory
if os.path.exists(temp_dir):
shutil.rmtree(temp_dir)
except Exception as e:
return AnalysisResponse(
success=False,
message=f"Analysis failed: {str(e)}",
analysis_id=None,
report_path=None,
stats=None
)
@app.get("/reports/{filename}")
async def download_report(filename: str):
"""Download analysis report."""
report_path = f"/app/reports/{filename}"
if not os.path.exists(report_path):
raise HTTPException(status_code=404, detail="Report not found")
return FileResponse(
report_path,
media_type='application/octet-stream',
filename=filename
)
@app.get("/memory/stats")
async def get_memory_stats():
"""Get memory system statistics."""
try:
if not analyzer:
raise HTTPException(status_code=500, detail="Analyzer not initialized")
stats = await analyzer.memory_manager.get_memory_stats()
return {
"success": True,
"memory_stats": stats
}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to get memory stats: {str(e)}")
@app.post("/memory/query")
async def query_memory(query: str, repo_context: str = ""):
"""Query the memory system."""
try:
if not analyzer:
raise HTTPException(status_code=500, detail="Analyzer not initialized")
result = await analyzer.query_memory(query, repo_context)
return {
"success": True,
"query": query,
"result": result
}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Memory query failed: {str(e)}")
if __name__ == "__main__":
port = int(os.getenv('PORT', 8022))
host = os.getenv('HOST', '0.0.0.0')
print(f"🚀 Starting AI Analysis Service on {host}:{port}")
uvicorn.run(app, host=host, port=port)

View File

@ -68,6 +68,7 @@ const serviceTargets = {
DASHBOARD_URL: process.env.DASHBOARD_URL || 'http://localhost:8008',
SELF_IMPROVING_GENERATOR_URL: process.env.SELF_IMPROVING_GENERATOR_URL || 'http://localhost:8007',
AI_MOCKUP_URL: process.env.AI_MOCKUP_URL || 'http://localhost:8021',
AI_ANALYSIS_URL: process.env.AI_ANALYSIS_URL || 'http://localhost:8022',
};
// Log service targets for debugging
@ -1984,6 +1985,76 @@ app.use('/api/mockup',
}
);
// AI Analysis Service - Direct HTTP forwarding
console.log('🔧 Registering /api/ai-analysis proxy route...');
app.use('/api/ai-analysis',
createServiceLimiter(200),
// Allow unauthenticated access for AI analysis (public feature)
(req, res, next) => {
console.log(`🤖 [AI ANALYSIS PROXY] ${req.method} ${req.originalUrl}`);
return next();
},
(req, res, next) => {
const aiAnalysisServiceUrl = serviceTargets.AI_ANALYSIS_URL;
// Strip the /api/ai-analysis prefix so /api/ai-analysis/analyze -> /analyze at target
const rewrittenPath = (req.originalUrl || '').replace(/^\/api\/ai-analysis/, '');
const targetUrl = `${aiAnalysisServiceUrl}${rewrittenPath}`;
console.log(`🔥 [AI ANALYSIS PROXY] ${req.method} ${req.originalUrl}${targetUrl}`);
res.setTimeout(300000, () => { // 5 minutes timeout for analysis
console.error('❌ [AI ANALYSIS PROXY] Response timeout');
if (!res.headersSent) {
res.status(504).json({ error: 'Gateway timeout', service: 'ai-analysis' });
}
});
const options = {
method: req.method,
url: targetUrl,
headers: {
'Content-Type': 'application/json',
'User-Agent': 'API-Gateway/1.0',
'Connection': 'keep-alive',
'Authorization': req.headers.authorization,
'X-User-ID': req.user?.id || req.user?.userId,
...(req.user?.role && { 'X-User-Role': req.user.role })
},
timeout: 240000, // 4 minutes timeout
validateStatus: () => true,
maxRedirects: 0,
maxContentLength: 100 * 1024 * 1024, // 100MB max content length
maxBodyLength: 100 * 1024 * 1024 // 100MB max body length
};
if (req.method === 'POST' || req.method === 'PUT' || req.method === 'PATCH') {
options.data = req.body || {};
console.log(`📦 [AI ANALYSIS PROXY] Request body:`, JSON.stringify(req.body));
}
axios(options)
.then(response => {
console.log(`✅ [AI ANALYSIS PROXY] Response: ${response.status} for ${req.method} ${req.originalUrl}`);
if (!res.headersSent) {
res.status(response.status).json(response.data);
}
})
.catch(error => {
console.error(`❌ [AI ANALYSIS PROXY ERROR]:`, error.message);
if (!res.headersSent) {
if (error.response) {
res.status(error.response.status).json(error.response.data);
} else {
res.status(502).json({
error: 'AI Analysis service unavailable',
message: error.code || error.message,
service: 'ai-analysis'
});
}
}
});
}
);
// Gateway management endpoints
app.get('/api/gateway/info', authMiddleware.verifyToken, (req, res) => {
res.json({
@ -2042,6 +2113,7 @@ app.get('/', (req, res) => {
dashboard: '/api/dashboard',
self_improving: '/api/self-improving',
mockup: '/api/mockup',
ai_analysis: '/api/ai-analysis',
unison: '/api/unison',
unified: '/api/recommendations'
},