chenges in the frontend
This commit is contained in:
parent
5e39839d42
commit
b3a6bb8fdc
@ -4,7 +4,7 @@ services:
|
|||||||
# =====================================
|
# =====================================
|
||||||
|
|
||||||
postgres:
|
postgres:
|
||||||
image: postgres:15
|
image: pgvector/pgvector:pg15
|
||||||
container_name: pipeline_postgres
|
container_name: pipeline_postgres
|
||||||
environment:
|
environment:
|
||||||
POSTGRES_USER: pipeline_admin
|
POSTGRES_USER: pipeline_admin
|
||||||
@ -31,7 +31,7 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- redis_data:/data
|
- redis_data:/data
|
||||||
ports:
|
ports:
|
||||||
- "6379:6379"
|
- "6380:6379"
|
||||||
networks:
|
networks:
|
||||||
- pipeline_network
|
- pipeline_network
|
||||||
healthcheck:
|
healthcheck:
|
||||||
@ -714,6 +714,55 @@ services:
|
|||||||
timeout: 10s
|
timeout: 10s
|
||||||
retries: 3
|
retries: 3
|
||||||
start_period: 40s
|
start_period: 40s
|
||||||
|
|
||||||
|
# =====================================
|
||||||
|
# AI Analysis Service
|
||||||
|
# =====================================
|
||||||
|
|
||||||
|
ai-analysis-service:
|
||||||
|
build: ./services/ai-analysis-service
|
||||||
|
container_name: pipeline_ai_analysis_service
|
||||||
|
ports:
|
||||||
|
- "8022:8022"
|
||||||
|
environment:
|
||||||
|
- PORT=8022
|
||||||
|
- HOST=0.0.0.0
|
||||||
|
- ANTHROPIC_API_KEY=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
|
||||||
|
- POSTGRES_HOST=postgres
|
||||||
|
- POSTGRES_PORT=5432
|
||||||
|
- POSTGRES_DB=dev_pipeline
|
||||||
|
- POSTGRES_USER=pipeline_admin
|
||||||
|
- POSTGRES_PASSWORD=secure_pipeline_2024
|
||||||
|
- REDIS_HOST=redis
|
||||||
|
- REDIS_PORT=6379
|
||||||
|
- REDIS_PASSWORD=redis_secure_2024
|
||||||
|
- MONGODB_URL=mongodb://pipeline_admin:mongo_secure_2024@mongodb:27017/
|
||||||
|
- MONGODB_DB=repo_analyzer
|
||||||
|
- JWT_ACCESS_SECRET=access-secret-key-2024-tech4biz-secure_pipeline_2024
|
||||||
|
- USER_AUTH_SERVICE_URL=http://user-auth:8011
|
||||||
|
- PYTHONUNBUFFERED=1
|
||||||
|
volumes:
|
||||||
|
- ai_analysis_logs:/app/logs
|
||||||
|
- ai_analysis_reports:/app/reports
|
||||||
|
- ai_analysis_temp:/app/temp
|
||||||
|
networks:
|
||||||
|
- pipeline_network
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
redis:
|
||||||
|
condition: service_healthy
|
||||||
|
mongodb:
|
||||||
|
condition: service_started
|
||||||
|
migrations:
|
||||||
|
condition: service_completed_successfully
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:8022/health"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 60s
|
||||||
|
restart: unless-stopped
|
||||||
# =====================================
|
# =====================================
|
||||||
# Workflow Orchestration
|
# Workflow Orchestration
|
||||||
# =====================================
|
# =====================================
|
||||||
@ -827,6 +876,12 @@ volumes:
|
|||||||
driver: local
|
driver: local
|
||||||
migration_state:
|
migration_state:
|
||||||
driver: local
|
driver: local
|
||||||
|
ai_analysis_logs:
|
||||||
|
driver: local
|
||||||
|
ai_analysis_reports:
|
||||||
|
driver: local
|
||||||
|
ai_analysis_temp:
|
||||||
|
driver: local
|
||||||
|
|
||||||
# =====================================
|
# =====================================
|
||||||
# Networks
|
# Networks
|
||||||
@ -834,11 +889,3 @@ volumes:
|
|||||||
networks:
|
networks:
|
||||||
pipeline_network:
|
pipeline_network:
|
||||||
driver: bridge
|
driver: bridge
|
||||||
# =====================================
|
|
||||||
# Self-Improving Code Generator
|
|
||||||
# =====================================
|
|
||||||
|
|
||||||
|
|
||||||
# =====================================
|
|
||||||
# Self-Improving Code Generator
|
|
||||||
# =====================================
|
|
||||||
|
|||||||
95
fix_provider_names.sql
Normal file
95
fix_provider_names.sql
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
-- Fix provider_name based on repository URLs across ALL tables
|
||||||
|
-- This script updates the provider_name field to match the actual provider from the repository URL
|
||||||
|
|
||||||
|
-- =============================================
|
||||||
|
-- 1. Fix all_repositories table
|
||||||
|
-- =============================================
|
||||||
|
UPDATE all_repositories
|
||||||
|
SET provider_name = 'github'
|
||||||
|
WHERE repository_url LIKE '%github.com%'
|
||||||
|
OR repository_url LIKE '%github.io%';
|
||||||
|
|
||||||
|
UPDATE all_repositories
|
||||||
|
SET provider_name = 'gitlab'
|
||||||
|
WHERE repository_url LIKE '%gitlab.com%'
|
||||||
|
OR repository_url LIKE '%gitlab.io%';
|
||||||
|
|
||||||
|
UPDATE all_repositories
|
||||||
|
SET provider_name = 'bitbucket'
|
||||||
|
WHERE repository_url LIKE '%bitbucket.org%'
|
||||||
|
OR repository_url LIKE '%bitbucket.io%';
|
||||||
|
|
||||||
|
UPDATE all_repositories
|
||||||
|
SET provider_name = 'gitea'
|
||||||
|
WHERE repository_url LIKE '%gitea.com%'
|
||||||
|
OR repository_url LIKE '%gitea.io%';
|
||||||
|
|
||||||
|
-- =============================================
|
||||||
|
-- 2. Fix repository_storage table (linked to all_repositories)
|
||||||
|
-- =============================================
|
||||||
|
UPDATE repository_storage
|
||||||
|
SET provider_name = ar.provider_name
|
||||||
|
FROM all_repositories ar
|
||||||
|
WHERE repository_storage.repository_id = ar.id;
|
||||||
|
|
||||||
|
-- =============================================
|
||||||
|
-- 3. Fix repository_commit_details table (linked to all_repositories)
|
||||||
|
-- =============================================
|
||||||
|
UPDATE repository_commit_details
|
||||||
|
SET provider_name = ar.provider_name
|
||||||
|
FROM all_repositories ar
|
||||||
|
WHERE repository_commit_details.repository_id = ar.id;
|
||||||
|
|
||||||
|
-- =============================================
|
||||||
|
-- 4. Fix repository_commit_files table (linked to all_repositories)
|
||||||
|
-- =============================================
|
||||||
|
UPDATE repository_commit_files
|
||||||
|
SET provider_name = ar.provider_name
|
||||||
|
FROM all_repositories ar
|
||||||
|
WHERE repository_commit_files.repository_id = ar.id;
|
||||||
|
|
||||||
|
-- =============================================
|
||||||
|
-- 5. Fix repository_directories table (linked to all_repositories)
|
||||||
|
-- =============================================
|
||||||
|
UPDATE repository_directories
|
||||||
|
SET provider_name = ar.provider_name
|
||||||
|
FROM all_repositories ar
|
||||||
|
WHERE repository_directories.repository_id = ar.id;
|
||||||
|
|
||||||
|
-- =============================================
|
||||||
|
-- 6. Fix repository_files table (linked to all_repositories)
|
||||||
|
-- =============================================
|
||||||
|
UPDATE repository_files
|
||||||
|
SET provider_name = ar.provider_name
|
||||||
|
FROM all_repositories ar
|
||||||
|
WHERE repository_files.repository_id = ar.id;
|
||||||
|
|
||||||
|
-- =============================================
|
||||||
|
-- 7. Show results for verification
|
||||||
|
-- =============================================
|
||||||
|
|
||||||
|
-- Show all_repositories results
|
||||||
|
SELECT
|
||||||
|
'all_repositories' as table_name,
|
||||||
|
repository_url,
|
||||||
|
repository_name,
|
||||||
|
owner_name,
|
||||||
|
provider_name,
|
||||||
|
CASE
|
||||||
|
WHEN repository_url LIKE '%github.com%' OR repository_url LIKE '%github.io%' THEN 'github'
|
||||||
|
WHEN repository_url LIKE '%gitlab.com%' OR repository_url LIKE '%gitlab.io%' THEN 'gitlab'
|
||||||
|
WHEN repository_url LIKE '%bitbucket.org%' OR repository_url LIKE '%bitbucket.io%' THEN 'bitbucket'
|
||||||
|
WHEN repository_url LIKE '%gitea.com%' OR repository_url LIKE '%gitea.io%' THEN 'gitea'
|
||||||
|
ELSE 'unknown'
|
||||||
|
END as detected_provider
|
||||||
|
FROM all_repositories
|
||||||
|
ORDER BY provider_name, repository_name;
|
||||||
|
|
||||||
|
-- Show summary counts by provider
|
||||||
|
SELECT
|
||||||
|
'Summary by Provider' as info,
|
||||||
|
provider_name,
|
||||||
|
COUNT(*) as count
|
||||||
|
FROM all_repositories
|
||||||
|
GROUP BY provider_name
|
||||||
|
ORDER BY provider_name;
|
||||||
613
services/ai-analysis-service/001-schema.sql
Normal file
613
services/ai-analysis-service/001-schema.sql
Normal file
@ -0,0 +1,613 @@
|
|||||||
|
-- ================================================
|
||||||
|
-- Repository Analyzer Memory System Database Migration
|
||||||
|
-- Version: 1.0
|
||||||
|
-- Description: Complete database setup for AI memory system
|
||||||
|
-- ================================================
|
||||||
|
|
||||||
|
-- Enable required extensions
|
||||||
|
CREATE EXTENSION IF NOT EXISTS vector;
|
||||||
|
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
||||||
|
|
||||||
|
-- ================================================
|
||||||
|
-- CORE TABLES
|
||||||
|
-- ================================================
|
||||||
|
|
||||||
|
-- Code embeddings table for semantic search of analyzed code
|
||||||
|
CREATE TABLE IF NOT EXISTS code_embeddings (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
repo_id VARCHAR(255) NOT NULL,
|
||||||
|
file_path TEXT NOT NULL,
|
||||||
|
content_hash VARCHAR(64) NOT NULL,
|
||||||
|
embedding vector(384) NOT NULL,
|
||||||
|
metadata JSONB DEFAULT '{}',
|
||||||
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
last_accessed TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
access_count INTEGER DEFAULT 0,
|
||||||
|
|
||||||
|
-- Ensure uniqueness per repo/file/hash combination
|
||||||
|
CONSTRAINT unique_code_analysis UNIQUE(repo_id, file_path, content_hash)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Query embeddings for episodic memory (user interactions)
|
||||||
|
CREATE TABLE IF NOT EXISTS query_embeddings (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
session_id VARCHAR(255) NOT NULL,
|
||||||
|
query_text TEXT NOT NULL,
|
||||||
|
query_embedding vector(384) NOT NULL,
|
||||||
|
response_embedding vector(384),
|
||||||
|
repo_context VARCHAR(255),
|
||||||
|
timestamp TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
metadata JSONB DEFAULT '{}',
|
||||||
|
|
||||||
|
-- Index for session-based queries
|
||||||
|
CONSTRAINT valid_session_id CHECK (LENGTH(session_id) > 0)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Persistent knowledge embeddings for long-term learning
|
||||||
|
CREATE TABLE IF NOT EXISTS knowledge_embeddings (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
fact_id VARCHAR(255) UNIQUE NOT NULL,
|
||||||
|
content TEXT NOT NULL,
|
||||||
|
category VARCHAR(100) NOT NULL,
|
||||||
|
embedding vector(384) NOT NULL,
|
||||||
|
confidence REAL DEFAULT 1.0 CHECK (confidence >= 0.0 AND confidence <= 1.0),
|
||||||
|
source_repos TEXT[] DEFAULT '{}',
|
||||||
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
last_accessed TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
access_frequency INTEGER DEFAULT 0,
|
||||||
|
|
||||||
|
-- Ensure valid categories
|
||||||
|
CONSTRAINT valid_category CHECK (category IN ('code_pattern', 'best_practice', 'vulnerability', 'architecture', 'security_vulnerability', 'performance'))
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Repository metadata for tracking analyzed repositories
|
||||||
|
CREATE TABLE IF NOT EXISTS repository_metadata (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
repo_id VARCHAR(255) UNIQUE NOT NULL,
|
||||||
|
repo_path TEXT NOT NULL,
|
||||||
|
repo_name VARCHAR(500),
|
||||||
|
primary_language VARCHAR(100),
|
||||||
|
total_files INTEGER DEFAULT 0,
|
||||||
|
total_lines INTEGER DEFAULT 0,
|
||||||
|
last_analyzed TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
analysis_count INTEGER DEFAULT 0,
|
||||||
|
quality_score REAL DEFAULT 5.0 CHECK (quality_score >= 0.0 AND quality_score <= 10.0),
|
||||||
|
metadata JSONB DEFAULT '{}'
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Session tracking for episodic memory correlation
|
||||||
|
CREATE TABLE IF NOT EXISTS analysis_sessions (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
session_id VARCHAR(255) UNIQUE NOT NULL,
|
||||||
|
user_identifier VARCHAR(255),
|
||||||
|
start_time TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
end_time TIMESTAMP WITH TIME ZONE,
|
||||||
|
total_queries INTEGER DEFAULT 0,
|
||||||
|
repositories_analyzed TEXT[] DEFAULT '{}',
|
||||||
|
session_metadata JSONB DEFAULT '{}'
|
||||||
|
);
|
||||||
|
|
||||||
|
-- File analysis history for change tracking
|
||||||
|
CREATE TABLE IF NOT EXISTS file_analysis_history (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
repo_id VARCHAR(255) NOT NULL,
|
||||||
|
file_path TEXT NOT NULL,
|
||||||
|
content_hash VARCHAR(64) NOT NULL,
|
||||||
|
language VARCHAR(100),
|
||||||
|
lines_of_code INTEGER DEFAULT 0,
|
||||||
|
complexity_score REAL DEFAULT 0.0,
|
||||||
|
severity_score REAL DEFAULT 5.0 CHECK (severity_score >= 0.0 AND severity_score <= 10.0),
|
||||||
|
issues_count INTEGER DEFAULT 0,
|
||||||
|
analyzed_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
analysis_version VARCHAR(50) DEFAULT '1.0'
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Memory consolidation log for tracking knowledge extraction
|
||||||
|
CREATE TABLE IF NOT EXISTS memory_consolidation_log (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
source_type VARCHAR(50) NOT NULL, -- 'episodic', 'code_analysis', 'manual'
|
||||||
|
source_id VARCHAR(255) NOT NULL,
|
||||||
|
target_memory_type VARCHAR(50) NOT NULL, -- 'persistent', 'working'
|
||||||
|
target_id VARCHAR(255),
|
||||||
|
consolidation_confidence REAL DEFAULT 0.5,
|
||||||
|
consolidation_timestamp TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
consolidation_metadata JSONB DEFAULT '{}'
|
||||||
|
);
|
||||||
|
|
||||||
|
-- ================================================
|
||||||
|
-- PERFORMANCE INDEXES
|
||||||
|
-- ================================================
|
||||||
|
|
||||||
|
-- Code embeddings indexes
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_code_embeddings_repo_id ON code_embeddings(repo_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_code_embeddings_file_path ON code_embeddings(file_path);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_code_embeddings_accessed ON code_embeddings(last_accessed DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_code_embeddings_metadata ON code_embeddings USING gin(metadata);
|
||||||
|
|
||||||
|
-- Vector similarity indexes (using IVFFlat for better performance)
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_code_embeddings_vector
|
||||||
|
ON code_embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
|
||||||
|
|
||||||
|
-- Query embeddings indexes
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_query_embeddings_session ON query_embeddings(session_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_query_embeddings_timestamp ON query_embeddings(timestamp DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_query_embeddings_repo_context ON query_embeddings(repo_context);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_query_embeddings_vector
|
||||||
|
ON query_embeddings USING ivfflat (query_embedding vector_cosine_ops) WITH (lists = 100);
|
||||||
|
|
||||||
|
-- Knowledge embeddings indexes
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_knowledge_embeddings_category ON knowledge_embeddings(category);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_knowledge_embeddings_confidence ON knowledge_embeddings(confidence DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_knowledge_embeddings_access_freq ON knowledge_embeddings(access_frequency DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_knowledge_embeddings_vector
|
||||||
|
ON knowledge_embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_knowledge_source_repos ON knowledge_embeddings USING gin(source_repos);
|
||||||
|
|
||||||
|
-- Repository metadata indexes
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_repository_metadata_repo_id ON repository_metadata(repo_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_repository_metadata_analyzed ON repository_metadata(last_analyzed DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_repository_metadata_language ON repository_metadata(primary_language);
|
||||||
|
|
||||||
|
-- File history indexes
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_file_history_repo_file ON file_analysis_history(repo_id, file_path);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_file_history_analyzed ON file_analysis_history(analyzed_at DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_file_history_severity ON file_analysis_history(severity_score);
|
||||||
|
|
||||||
|
-- ================================================
|
||||||
|
-- MATERIALIZED VIEWS FOR COMMON QUERIES
|
||||||
|
-- ================================================
|
||||||
|
|
||||||
|
-- High confidence knowledge view
|
||||||
|
CREATE MATERIALIZED VIEW IF NOT EXISTS high_confidence_knowledge AS
|
||||||
|
SELECT
|
||||||
|
fact_id,
|
||||||
|
content,
|
||||||
|
category,
|
||||||
|
confidence,
|
||||||
|
source_repos,
|
||||||
|
created_at,
|
||||||
|
last_accessed,
|
||||||
|
access_frequency
|
||||||
|
FROM knowledge_embeddings
|
||||||
|
WHERE confidence > 0.8
|
||||||
|
ORDER BY confidence DESC, access_frequency DESC;
|
||||||
|
|
||||||
|
CREATE INDEX ON high_confidence_knowledge (category);
|
||||||
|
CREATE INDEX ON high_confidence_knowledge (confidence DESC);
|
||||||
|
|
||||||
|
-- Repository quality summary view
|
||||||
|
CREATE MATERIALIZED VIEW IF NOT EXISTS repository_quality_summary AS
|
||||||
|
SELECT
|
||||||
|
rm.repo_id,
|
||||||
|
rm.repo_path,
|
||||||
|
rm.repo_name,
|
||||||
|
rm.primary_language,
|
||||||
|
rm.total_files,
|
||||||
|
rm.total_lines,
|
||||||
|
rm.quality_score,
|
||||||
|
rm.last_analyzed,
|
||||||
|
COUNT(ce.id) as total_embeddings,
|
||||||
|
AVG(fah.severity_score) as avg_file_quality,
|
||||||
|
COUNT(DISTINCT fah.file_path) as analyzed_files_count
|
||||||
|
FROM repository_metadata rm
|
||||||
|
LEFT JOIN code_embeddings ce ON rm.repo_id = ce.repo_id
|
||||||
|
LEFT JOIN file_analysis_history fah ON rm.repo_id = fah.repo_id
|
||||||
|
GROUP BY rm.repo_id, rm.repo_path, rm.repo_name, rm.primary_language,
|
||||||
|
rm.total_files, rm.total_lines, rm.quality_score, rm.last_analyzed;
|
||||||
|
|
||||||
|
CREATE INDEX ON repository_quality_summary (quality_score DESC);
|
||||||
|
CREATE INDEX ON repository_quality_summary (last_analyzed DESC);
|
||||||
|
|
||||||
|
-- Recent activity view
|
||||||
|
CREATE MATERIALIZED VIEW IF NOT EXISTS recent_activity AS
|
||||||
|
SELECT
|
||||||
|
'query' as activity_type,
|
||||||
|
session_id as identifier,
|
||||||
|
query_text as description,
|
||||||
|
timestamp as activity_time,
|
||||||
|
repo_context
|
||||||
|
FROM query_embeddings
|
||||||
|
WHERE timestamp >= CURRENT_TIMESTAMP - INTERVAL '7 days'
|
||||||
|
UNION ALL
|
||||||
|
SELECT
|
||||||
|
'analysis' as activity_type,
|
||||||
|
repo_id as identifier,
|
||||||
|
file_path as description,
|
||||||
|
analyzed_at as activity_time,
|
||||||
|
repo_id as repo_context
|
||||||
|
FROM file_analysis_history
|
||||||
|
WHERE analyzed_at >= CURRENT_TIMESTAMP - INTERVAL '7 days'
|
||||||
|
ORDER BY activity_time DESC;
|
||||||
|
|
||||||
|
CREATE INDEX ON recent_activity (activity_time DESC);
|
||||||
|
CREATE INDEX ON recent_activity (activity_type);
|
||||||
|
|
||||||
|
-- ================================================
|
||||||
|
-- STORED FUNCTIONS AND PROCEDURES
|
||||||
|
-- ================================================
|
||||||
|
|
||||||
|
-- Function to refresh all materialized views
|
||||||
|
CREATE OR REPLACE FUNCTION refresh_memory_views()
|
||||||
|
RETURNS void AS $$
|
||||||
|
BEGIN
|
||||||
|
REFRESH MATERIALIZED VIEW CONCURRENTLY high_confidence_knowledge;
|
||||||
|
REFRESH MATERIALIZED VIEW CONCURRENTLY repository_quality_summary;
|
||||||
|
REFRESH MATERIALIZED VIEW CONCURRENTLY recent_activity;
|
||||||
|
|
||||||
|
-- Log the refresh
|
||||||
|
INSERT INTO memory_consolidation_log (
|
||||||
|
source_type, source_id, target_memory_type, target_id,
|
||||||
|
consolidation_confidence, consolidation_metadata
|
||||||
|
) VALUES (
|
||||||
|
'system', 'materialized_views', 'system', 'view_refresh',
|
||||||
|
1.0, '{"refresh_time": "' || CURRENT_TIMESTAMP || '"}'::jsonb
|
||||||
|
);
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function to calculate semantic similarity between texts
|
||||||
|
CREATE OR REPLACE FUNCTION calculate_similarity(embedding1 vector(384), embedding2 vector(384))
|
||||||
|
RETURNS real AS $$
|
||||||
|
BEGIN
|
||||||
|
RETURN 1 - (embedding1 <=> embedding2);
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql IMMUTABLE STRICT;
|
||||||
|
|
||||||
|
-- Function to update access patterns
|
||||||
|
CREATE OR REPLACE FUNCTION update_access_pattern(table_name text, id_column text, id_value text)
|
||||||
|
RETURNS void AS $$
|
||||||
|
BEGIN
|
||||||
|
CASE table_name
|
||||||
|
WHEN 'knowledge_embeddings' THEN
|
||||||
|
EXECUTE 'UPDATE knowledge_embeddings SET last_accessed = CURRENT_TIMESTAMP, access_frequency = access_frequency + 1 WHERE fact_id = $1'
|
||||||
|
USING id_value;
|
||||||
|
WHEN 'code_embeddings' THEN
|
||||||
|
EXECUTE 'UPDATE code_embeddings SET last_accessed = CURRENT_TIMESTAMP, access_count = access_count + 1 WHERE id = $1::uuid'
|
||||||
|
USING id_value;
|
||||||
|
ELSE
|
||||||
|
RAISE EXCEPTION 'Unsupported table: %', table_name;
|
||||||
|
END CASE;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function to cleanup old memories
|
||||||
|
CREATE OR REPLACE FUNCTION cleanup_old_memories(retention_days integer DEFAULT 365)
|
||||||
|
RETURNS integer AS $$
|
||||||
|
DECLARE
|
||||||
|
deleted_count integer := 0;
|
||||||
|
cutoff_date timestamp;
|
||||||
|
BEGIN
|
||||||
|
cutoff_date := CURRENT_TIMESTAMP - (retention_days || ' days')::interval;
|
||||||
|
|
||||||
|
-- Delete old query embeddings (episodic memories)
|
||||||
|
DELETE FROM query_embeddings WHERE timestamp < cutoff_date;
|
||||||
|
GET DIAGNOSTICS deleted_count = ROW_COUNT;
|
||||||
|
|
||||||
|
-- Update knowledge confidence based on access patterns
|
||||||
|
UPDATE knowledge_embeddings
|
||||||
|
SET confidence = LEAST(confidence * (
|
||||||
|
CASE
|
||||||
|
WHEN EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - last_accessed)) / 86400 < 30
|
||||||
|
THEN 1.05
|
||||||
|
ELSE 0.98
|
||||||
|
END *
|
||||||
|
(1.0 + LOG(access_frequency + 1) / 20.0)
|
||||||
|
), 1.0);
|
||||||
|
|
||||||
|
-- Log cleanup activity
|
||||||
|
INSERT INTO memory_consolidation_log (
|
||||||
|
source_type, source_id, target_memory_type, target_id,
|
||||||
|
consolidation_confidence, consolidation_metadata
|
||||||
|
) VALUES (
|
||||||
|
'system', 'cleanup_function', 'system', 'memory_cleanup',
|
||||||
|
1.0, ('{"deleted_records": ' || deleted_count || ', "cutoff_date": "' || cutoff_date || '"}')::jsonb
|
||||||
|
);
|
||||||
|
|
||||||
|
RETURN deleted_count;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function to find similar code patterns
|
||||||
|
CREATE OR REPLACE FUNCTION find_similar_code(
|
||||||
|
query_embedding vector(384),
|
||||||
|
repo_filter text DEFAULT NULL,
|
||||||
|
similarity_threshold real DEFAULT 0.7,
|
||||||
|
max_results integer DEFAULT 10
|
||||||
|
)
|
||||||
|
RETURNS TABLE (
|
||||||
|
id uuid,
|
||||||
|
repo_id varchar(255),
|
||||||
|
file_path text,
|
||||||
|
similarity real,
|
||||||
|
metadata jsonb
|
||||||
|
) AS $$
|
||||||
|
BEGIN
|
||||||
|
RETURN QUERY
|
||||||
|
SELECT
|
||||||
|
ce.id,
|
||||||
|
ce.repo_id,
|
||||||
|
ce.file_path,
|
||||||
|
(1 - (ce.embedding <=> query_embedding))::real as similarity,
|
||||||
|
ce.metadata
|
||||||
|
FROM code_embeddings ce
|
||||||
|
WHERE (repo_filter IS NULL OR ce.repo_id = repo_filter)
|
||||||
|
AND (1 - (ce.embedding <=> query_embedding)) > similarity_threshold
|
||||||
|
ORDER BY similarity DESC
|
||||||
|
LIMIT max_results;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function to get knowledge by category
|
||||||
|
CREATE OR REPLACE FUNCTION get_knowledge_by_category(
|
||||||
|
category_filter varchar(100),
|
||||||
|
min_confidence real DEFAULT 0.5,
|
||||||
|
max_results integer DEFAULT 20
|
||||||
|
)
|
||||||
|
RETURNS TABLE (
|
||||||
|
fact_id varchar(255),
|
||||||
|
content text,
|
||||||
|
confidence real,
|
||||||
|
access_frequency integer,
|
||||||
|
source_repos text[]
|
||||||
|
) AS $$
|
||||||
|
BEGIN
|
||||||
|
RETURN QUERY
|
||||||
|
SELECT
|
||||||
|
ke.fact_id,
|
||||||
|
ke.content,
|
||||||
|
ke.confidence,
|
||||||
|
ke.access_frequency,
|
||||||
|
ke.source_repos
|
||||||
|
FROM knowledge_embeddings ke
|
||||||
|
WHERE ke.category = category_filter
|
||||||
|
AND ke.confidence >= min_confidence
|
||||||
|
ORDER BY ke.confidence DESC, ke.access_frequency DESC
|
||||||
|
LIMIT max_results;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ================================================
|
||||||
|
-- TRIGGERS FOR AUTOMATIC MAINTENANCE
|
||||||
|
-- ================================================
|
||||||
|
|
||||||
|
-- Trigger function to update repository metadata when embeddings are added
|
||||||
|
CREATE OR REPLACE FUNCTION update_repository_stats()
|
||||||
|
RETURNS trigger AS $$
|
||||||
|
BEGIN
|
||||||
|
-- Update or insert repository metadata
|
||||||
|
INSERT INTO repository_metadata (repo_id, repo_path, analysis_count, last_analyzed)
|
||||||
|
VALUES (NEW.repo_id, NEW.repo_id, 1, CURRENT_TIMESTAMP)
|
||||||
|
ON CONFLICT (repo_id)
|
||||||
|
DO UPDATE SET
|
||||||
|
analysis_count = repository_metadata.analysis_count + 1,
|
||||||
|
last_analyzed = CURRENT_TIMESTAMP;
|
||||||
|
|
||||||
|
RETURN NEW;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Create triggers
|
||||||
|
DROP TRIGGER IF EXISTS trigger_update_repo_stats ON code_embeddings;
|
||||||
|
CREATE TRIGGER trigger_update_repo_stats
|
||||||
|
AFTER INSERT ON code_embeddings
|
||||||
|
FOR EACH ROW
|
||||||
|
EXECUTE FUNCTION update_repository_stats();
|
||||||
|
|
||||||
|
-- Trigger to automatically update access patterns
|
||||||
|
CREATE OR REPLACE FUNCTION auto_update_access()
|
||||||
|
RETURNS trigger AS $$
|
||||||
|
BEGIN
|
||||||
|
NEW.last_accessed = CURRENT_TIMESTAMP;
|
||||||
|
NEW.access_count = COALESCE(OLD.access_count, 0) + 1;
|
||||||
|
RETURN NEW;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
DROP TRIGGER IF EXISTS trigger_auto_access_update ON code_embeddings;
|
||||||
|
CREATE TRIGGER trigger_auto_access_update
|
||||||
|
BEFORE UPDATE ON code_embeddings
|
||||||
|
FOR EACH ROW
|
||||||
|
EXECUTE FUNCTION auto_update_access();
|
||||||
|
|
||||||
|
-- ================================================
|
||||||
|
-- SECURITY AND PERMISSIONS
|
||||||
|
-- ================================================
|
||||||
|
|
||||||
|
-- Create roles for different access levels
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'repo_analyzer_read') THEN
|
||||||
|
CREATE ROLE repo_analyzer_read;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'repo_analyzer_write') THEN
|
||||||
|
CREATE ROLE repo_analyzer_write;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'repo_analyzer_admin') THEN
|
||||||
|
CREATE ROLE repo_analyzer_admin;
|
||||||
|
END IF;
|
||||||
|
END
|
||||||
|
$$;
|
||||||
|
|
||||||
|
-- Grant permissions
|
||||||
|
GRANT SELECT ON ALL TABLES IN SCHEMA public TO repo_analyzer_read;
|
||||||
|
GRANT SELECT ON high_confidence_knowledge TO repo_analyzer_read;
|
||||||
|
GRANT SELECT ON repository_quality_summary TO repo_analyzer_read;
|
||||||
|
GRANT SELECT ON recent_activity TO repo_analyzer_read;
|
||||||
|
|
||||||
|
GRANT SELECT, INSERT, UPDATE ON ALL TABLES IN SCHEMA public TO repo_analyzer_write;
|
||||||
|
GRANT SELECT ON high_confidence_knowledge TO repo_analyzer_write;
|
||||||
|
GRANT SELECT ON repository_quality_summary TO repo_analyzer_write;
|
||||||
|
GRANT SELECT ON recent_activity TO repo_analyzer_write;
|
||||||
|
GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public TO repo_analyzer_write;
|
||||||
|
|
||||||
|
GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO repo_analyzer_admin;
|
||||||
|
GRANT ALL PRIVILEGES ON high_confidence_knowledge TO repo_analyzer_admin;
|
||||||
|
GRANT ALL PRIVILEGES ON repository_quality_summary TO repo_analyzer_admin;
|
||||||
|
GRANT ALL PRIVILEGES ON recent_activity TO repo_analyzer_admin;
|
||||||
|
GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO repo_analyzer_admin;
|
||||||
|
GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO repo_analyzer_admin;
|
||||||
|
|
||||||
|
-- ================================================
|
||||||
|
-- DATA VALIDATION AND CONSTRAINTS
|
||||||
|
-- ================================================
|
||||||
|
|
||||||
|
-- Add check constraints for data quality
|
||||||
|
-- Note: Vector dimensions are validated at insertion time, no need for runtime checks
|
||||||
|
|
||||||
|
-- Add constraints for reasonable data ranges
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'reasonable_lines_of_code') THEN
|
||||||
|
ALTER TABLE file_analysis_history ADD CONSTRAINT reasonable_lines_of_code
|
||||||
|
CHECK (lines_of_code >= 0 AND lines_of_code <= 1000000);
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'reasonable_complexity') THEN
|
||||||
|
ALTER TABLE file_analysis_history ADD CONSTRAINT reasonable_complexity
|
||||||
|
CHECK (complexity_score >= 0.0 AND complexity_score <= 100.0);
|
||||||
|
END IF;
|
||||||
|
END
|
||||||
|
$$;
|
||||||
|
|
||||||
|
-- ================================================
|
||||||
|
-- INITIAL DATA AND CONFIGURATION
|
||||||
|
-- ================================================
|
||||||
|
|
||||||
|
-- Insert initial system configuration
|
||||||
|
INSERT INTO memory_consolidation_log (
|
||||||
|
source_type, source_id, target_memory_type, target_id,
|
||||||
|
consolidation_confidence, consolidation_metadata
|
||||||
|
) VALUES (
|
||||||
|
'system', 'database_migration', 'system', 'initial_setup',
|
||||||
|
1.0, ('{"migration_version": "1.0", "setup_time": "' || CURRENT_TIMESTAMP || '"}')::jsonb
|
||||||
|
) ON CONFLICT DO NOTHING;
|
||||||
|
|
||||||
|
-- Create initial knowledge categories
|
||||||
|
INSERT INTO knowledge_embeddings (
|
||||||
|
fact_id, content, category, embedding, confidence, source_repos
|
||||||
|
) VALUES
|
||||||
|
(
|
||||||
|
'init_security_001',
|
||||||
|
'Always validate and sanitize user input to prevent injection attacks',
|
||||||
|
'security_vulnerability',
|
||||||
|
array_fill(0.0, ARRAY[384])::vector(384),
|
||||||
|
0.95,
|
||||||
|
ARRAY[]::text[]
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'init_performance_001',
|
||||||
|
'Use appropriate data structures and algorithms for better performance',
|
||||||
|
'performance',
|
||||||
|
array_fill(0.0, ARRAY[384])::vector(384),
|
||||||
|
0.9,
|
||||||
|
ARRAY[]::text[]
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'init_best_practice_001',
|
||||||
|
'Follow consistent naming conventions and code formatting standards',
|
||||||
|
'best_practice',
|
||||||
|
array_fill(0.0, ARRAY[384])::vector(384),
|
||||||
|
0.85,
|
||||||
|
ARRAY[]::text[]
|
||||||
|
)
|
||||||
|
ON CONFLICT (fact_id) DO NOTHING;
|
||||||
|
|
||||||
|
-- ================================================
|
||||||
|
-- BACKUP AND MAINTENANCE PROCEDURES
|
||||||
|
-- ================================================
|
||||||
|
|
||||||
|
-- Function to create backup of critical memory data
|
||||||
|
CREATE OR REPLACE FUNCTION backup_memory_data(backup_path text DEFAULT '/tmp/memory_backup')
|
||||||
|
RETURNS text AS $$
|
||||||
|
DECLARE
|
||||||
|
backup_file text;
|
||||||
|
result_message text;
|
||||||
|
BEGIN
|
||||||
|
backup_file := backup_path || '_' || to_char(CURRENT_TIMESTAMP, 'YYYY-MM-DD_HH24-MI-SS') || '.sql';
|
||||||
|
|
||||||
|
-- This would need to be implemented with actual backup logic
|
||||||
|
-- For now, just return the intended backup file name
|
||||||
|
result_message := 'Backup would be created at: ' || backup_file;
|
||||||
|
|
||||||
|
-- Log backup activity
|
||||||
|
INSERT INTO memory_consolidation_log (
|
||||||
|
source_type, source_id, target_memory_type, target_id,
|
||||||
|
consolidation_confidence, consolidation_metadata
|
||||||
|
) VALUES (
|
||||||
|
'system', 'backup_function', 'system', 'backup_created',
|
||||||
|
1.0, ('{"backup_file": "' || backup_file || '"}')::jsonb
|
||||||
|
);
|
||||||
|
|
||||||
|
RETURN result_message;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ================================================
|
||||||
|
-- MONITORING AND ANALYTICS
|
||||||
|
-- ================================================
|
||||||
|
|
||||||
|
-- View for system health monitoring
|
||||||
|
CREATE OR REPLACE VIEW system_health_monitor AS
|
||||||
|
SELECT
|
||||||
|
'code_embeddings' as table_name,
|
||||||
|
COUNT(*) as record_count,
|
||||||
|
MAX(created_at) as latest_record,
|
||||||
|
AVG(access_count) as avg_access_count
|
||||||
|
FROM code_embeddings
|
||||||
|
UNION ALL
|
||||||
|
SELECT
|
||||||
|
'query_embeddings' as table_name,
|
||||||
|
COUNT(*) as record_count,
|
||||||
|
MAX(timestamp) as latest_record,
|
||||||
|
NULL as avg_access_count
|
||||||
|
FROM query_embeddings
|
||||||
|
UNION ALL
|
||||||
|
SELECT
|
||||||
|
'knowledge_embeddings' as table_name,
|
||||||
|
COUNT(*) as record_count,
|
||||||
|
MAX(created_at) as latest_record,
|
||||||
|
AVG(access_frequency) as avg_access_count
|
||||||
|
FROM knowledge_embeddings;
|
||||||
|
|
||||||
|
-- Function to get comprehensive system statistics
|
||||||
|
CREATE OR REPLACE FUNCTION get_system_statistics()
|
||||||
|
RETURNS jsonb AS $$
|
||||||
|
DECLARE
|
||||||
|
stats jsonb;
|
||||||
|
BEGIN
|
||||||
|
SELECT jsonb_build_object(
|
||||||
|
'total_code_embeddings', (SELECT COUNT(*) FROM code_embeddings),
|
||||||
|
'total_query_embeddings', (SELECT COUNT(*) FROM query_embeddings),
|
||||||
|
'total_knowledge_embeddings', (SELECT COUNT(*) FROM knowledge_embeddings),
|
||||||
|
'unique_repositories', (SELECT COUNT(DISTINCT repo_id) FROM code_embeddings),
|
||||||
|
'high_confidence_knowledge', (SELECT COUNT(*) FROM knowledge_embeddings WHERE confidence > 0.8),
|
||||||
|
'recent_activity_7d', (SELECT COUNT(*) FROM query_embeddings WHERE timestamp >= CURRENT_TIMESTAMP - INTERVAL '7 days'),
|
||||||
|
'average_code_quality', (SELECT AVG(quality_score) FROM repository_metadata),
|
||||||
|
'last_updated', CURRENT_TIMESTAMP
|
||||||
|
) INTO stats;
|
||||||
|
|
||||||
|
RETURN stats;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ================================================
|
||||||
|
-- COMPLETION MESSAGE
|
||||||
|
-- ================================================
|
||||||
|
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
RAISE NOTICE '================================================';
|
||||||
|
RAISE NOTICE 'Repository Analyzer Memory System Database Setup Complete';
|
||||||
|
RAISE NOTICE '================================================';
|
||||||
|
RAISE NOTICE 'Tables created: code_embeddings, query_embeddings, knowledge_embeddings';
|
||||||
|
RAISE NOTICE 'Indexes created: Vector similarity indexes with IVFFlat';
|
||||||
|
RAISE NOTICE 'Functions created: Similarity search, cleanup, statistics';
|
||||||
|
RAISE NOTICE 'Materialized views created: High confidence knowledge, repository summary';
|
||||||
|
RAISE NOTICE 'Triggers created: Auto-update repository stats and access patterns';
|
||||||
|
RAISE NOTICE '================================================';
|
||||||
|
RAISE NOTICE 'Ready for AI-enhanced repository analysis with persistent memory';
|
||||||
|
RAISE NOTICE '================================================';
|
||||||
|
END
|
||||||
|
$$;
|
||||||
37
services/ai-analysis-service/Dockerfile
Normal file
37
services/ai-analysis-service/Dockerfile
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install system dependencies
|
||||||
|
RUN apt-get update && apt-get install -y \
|
||||||
|
git \
|
||||||
|
postgresql-client \
|
||||||
|
curl \
|
||||||
|
build-essential \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Copy requirements and install Python dependencies
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy the service code
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Create necessary directories
|
||||||
|
RUN mkdir -p /app/logs /app/temp /app/reports
|
||||||
|
|
||||||
|
# Set environment variables
|
||||||
|
ENV PYTHONPATH=/app
|
||||||
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
ENV PORT=8022
|
||||||
|
|
||||||
|
# Expose port
|
||||||
|
EXPOSE 8022
|
||||||
|
|
||||||
|
# Health check
|
||||||
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
||||||
|
CMD curl -f http://localhost:8022/health || exit 1
|
||||||
|
|
||||||
|
# Run migration and then start the service
|
||||||
|
CMD ["sh", "-c", "python run_migration.py && python server.py"]
|
||||||
202
services/ai-analysis-service/README.md
Normal file
202
services/ai-analysis-service/README.md
Normal file
@ -0,0 +1,202 @@
|
|||||||
|
# Complete AI Repository Analysis Service
|
||||||
|
|
||||||
|
A comprehensive AI-powered repository analysis tool that automatically analyzes **ALL files** in a repository without any limits or user queries required.
|
||||||
|
|
||||||
|
## 🚀 Features
|
||||||
|
|
||||||
|
- **Complete Analysis**: Analyzes ALL files in the repository (no max-files limit)
|
||||||
|
- **Fully Automated**: No user query required - runs completely automatically
|
||||||
|
- **Memory-Enhanced**: Learns from previous analyses using advanced memory systems
|
||||||
|
- **Comprehensive Reports**: Generates detailed PDF reports with executive summaries
|
||||||
|
- **Multi-Database Support**: Uses PostgreSQL, MongoDB, and Redis for optimal performance
|
||||||
|
- **Security Focus**: Identifies security vulnerabilities and code quality issues
|
||||||
|
- **Architecture Assessment**: Provides architectural insights and recommendations
|
||||||
|
|
||||||
|
## 📋 Requirements
|
||||||
|
|
||||||
|
### System Dependencies
|
||||||
|
- Python 3.8+
|
||||||
|
- PostgreSQL with pgvector extension
|
||||||
|
- MongoDB
|
||||||
|
- Redis
|
||||||
|
|
||||||
|
### Python Dependencies
|
||||||
|
```bash
|
||||||
|
pip install anthropic python-dotenv git redis pymongo psycopg2-binary numpy reportlab
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🛠️ Setup
|
||||||
|
|
||||||
|
1. **Install Dependencies**:
|
||||||
|
```bash
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Database Setup**:
|
||||||
|
```bash
|
||||||
|
# Run the database migration
|
||||||
|
psql -U postgres -d repo_vectors -f 001-schema.sql
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Environment Variables**:
|
||||||
|
Create a `.env` file with:
|
||||||
|
```env
|
||||||
|
ANTHROPIC_API_KEY=your_api_key_here
|
||||||
|
REDIS_HOST=localhost
|
||||||
|
REDIS_PORT=6379
|
||||||
|
REDIS_DB=0
|
||||||
|
MONGODB_URL=mongodb://localhost:27017/
|
||||||
|
MONGODB_DB=repo_analyzer
|
||||||
|
POSTGRES_HOST=localhost
|
||||||
|
POSTGRES_PORT=5432
|
||||||
|
POSTGRES_DB=repo_vectors
|
||||||
|
POSTGRES_USER=postgres
|
||||||
|
POSTGRES_PASSWORD=your_password
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🎯 Usage
|
||||||
|
|
||||||
|
### Basic Usage
|
||||||
|
```bash
|
||||||
|
python ai-analyze.py /path/to/repository
|
||||||
|
```
|
||||||
|
|
||||||
|
### With Custom Output
|
||||||
|
```bash
|
||||||
|
python ai-analyze.py /path/to/repository --output my_analysis.pdf
|
||||||
|
```
|
||||||
|
|
||||||
|
### With API Key Override
|
||||||
|
```bash
|
||||||
|
python ai-analyze.py /path/to/repository --api-key your_api_key
|
||||||
|
```
|
||||||
|
|
||||||
|
## 📊 What It Analyzes
|
||||||
|
|
||||||
|
### File Types Supported
|
||||||
|
- **Programming Languages**: Python, JavaScript, TypeScript, Java, C++, C#, Go, Rust, PHP, Ruby, Swift, Kotlin
|
||||||
|
- **Web Technologies**: HTML, CSS, SCSS, SASS
|
||||||
|
- **Configuration Files**: JSON, YAML, XML, SQL
|
||||||
|
- **Build Files**: Dockerfile, Makefile, CMake, package.json, requirements.txt, Cargo.toml, pom.xml, build.gradle
|
||||||
|
- **Documentation**: README.md, Markdown files
|
||||||
|
|
||||||
|
### Analysis Coverage
|
||||||
|
- **Code Quality**: Complexity, maintainability, best practices
|
||||||
|
- **Security**: Vulnerabilities, injection attacks, authentication issues
|
||||||
|
- **Architecture**: Project structure, scalability, design patterns
|
||||||
|
- **Performance**: Optimization opportunities, bottlenecks
|
||||||
|
- **Documentation**: Completeness and quality
|
||||||
|
|
||||||
|
## 📈 Output
|
||||||
|
|
||||||
|
### Console Output
|
||||||
|
- Real-time analysis progress
|
||||||
|
- Repository statistics
|
||||||
|
- Quality breakdown by file
|
||||||
|
- Language distribution
|
||||||
|
- Memory system statistics
|
||||||
|
|
||||||
|
### PDF Report
|
||||||
|
- Executive summary for leadership
|
||||||
|
- Repository overview with metrics
|
||||||
|
- Detailed file-by-file analysis
|
||||||
|
- Security assessment
|
||||||
|
- Architecture evaluation
|
||||||
|
- Recommendations and next steps
|
||||||
|
|
||||||
|
## 🧠 Memory System
|
||||||
|
|
||||||
|
The tool uses a sophisticated three-tier memory system:
|
||||||
|
|
||||||
|
1. **Working Memory (Redis)**: Temporary, fast access for current analysis
|
||||||
|
2. **Episodic Memory (MongoDB)**: User interactions and analysis sessions
|
||||||
|
3. **Persistent Memory (PostgreSQL)**: Long-term knowledge and best practices
|
||||||
|
|
||||||
|
This allows the tool to learn from previous analyses and provide increasingly accurate insights.
|
||||||
|
|
||||||
|
## 🔧 Configuration
|
||||||
|
|
||||||
|
### File Size Limits
|
||||||
|
- Default: 2MB per file (configurable in code)
|
||||||
|
- Large files are skipped with notification
|
||||||
|
|
||||||
|
### Excluded Directories
|
||||||
|
- `.git`, `node_modules`, `__pycache__`, `build`, `dist`, `target`
|
||||||
|
- `venv`, `env`, `.next`, `coverage`, `vendor`
|
||||||
|
- `bower_components`, `.gradle`, `.m2`, `.cargo`
|
||||||
|
|
||||||
|
### Rate Limiting
|
||||||
|
- 0.1 second delay between file analyses to avoid API rate limits
|
||||||
|
- Configurable in the code
|
||||||
|
|
||||||
|
## 📝 Example Output
|
||||||
|
|
||||||
|
```
|
||||||
|
🚀 Starting Complete AI Repository Analysis
|
||||||
|
============================================================
|
||||||
|
Repository: /path/to/my-project
|
||||||
|
Output: complete_repository_analysis.pdf
|
||||||
|
Mode: Complete automated analysis of ALL files
|
||||||
|
============================================================
|
||||||
|
|
||||||
|
Scanning repository: /path/to/my-project
|
||||||
|
Found 127 files to analyze
|
||||||
|
Starting comprehensive analysis of 127 files...
|
||||||
|
Analyzing file 1/127: main.py
|
||||||
|
Analyzing file 2/127: config.js
|
||||||
|
...
|
||||||
|
|
||||||
|
🎯 COMPLETE ANALYSIS FINISHED
|
||||||
|
============================================================
|
||||||
|
📊 Repository Statistics:
|
||||||
|
• Files Analyzed: 127
|
||||||
|
• Lines of Code: 15,432
|
||||||
|
• Languages: 8
|
||||||
|
• Code Quality: 7.2/10
|
||||||
|
|
||||||
|
📈 Quality Breakdown:
|
||||||
|
• High Quality Files (8-10): 45
|
||||||
|
• Medium Quality Files (5-7): 67
|
||||||
|
• Low Quality Files (1-4): 15
|
||||||
|
• Total Issues Found: 89
|
||||||
|
|
||||||
|
🔤 Language Distribution:
|
||||||
|
• Python: 45 files
|
||||||
|
• JavaScript: 32 files
|
||||||
|
• TypeScript: 28 files
|
||||||
|
• HTML: 12 files
|
||||||
|
• CSS: 10 files
|
||||||
|
|
||||||
|
📄 Complete PDF Report: complete_repository_analysis.pdf
|
||||||
|
✅ Complete analysis finished successfully!
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🚨 Troubleshooting
|
||||||
|
|
||||||
|
### Common Issues
|
||||||
|
|
||||||
|
1. **Database Connection Errors**:
|
||||||
|
- Ensure PostgreSQL, MongoDB, and Redis are running
|
||||||
|
- Check connection credentials in `.env` file
|
||||||
|
|
||||||
|
2. **API Key Issues**:
|
||||||
|
- Verify Anthropic API key is valid and has sufficient credits
|
||||||
|
- Check rate limits if analysis fails
|
||||||
|
|
||||||
|
3. **Memory Issues**:
|
||||||
|
- Large repositories may require more RAM
|
||||||
|
- Consider increasing system memory or processing in batches
|
||||||
|
|
||||||
|
4. **File Permission Errors**:
|
||||||
|
- Ensure read access to repository files
|
||||||
|
- Check write permissions for output directory
|
||||||
|
|
||||||
|
## 🤝 Contributing
|
||||||
|
|
||||||
|
This is a complete automated analysis system. The tool will:
|
||||||
|
- Analyze every file in the repository
|
||||||
|
- Generate comprehensive reports
|
||||||
|
- Learn from previous analyses
|
||||||
|
- Provide actionable insights
|
||||||
|
|
||||||
|
No user interaction required - just run and get results!
|
||||||
710
services/ai-analysis-service/ai-analysis/adv_git_analyzer.py
Normal file
710
services/ai-analysis-service/ai-analysis/adv_git_analyzer.py
Normal file
@ -0,0 +1,710 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Robust GitHub Repository AI Analysis Tool
|
||||||
|
Simplified version with better error handling and JSON parsing.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import asyncio
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, List, Optional, Tuple
|
||||||
|
from datetime import datetime
|
||||||
|
import argparse
|
||||||
|
from dataclasses import dataclass
|
||||||
|
import shutil
|
||||||
|
import tempfile
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
|
# Core packages
|
||||||
|
import anthropic
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
import git
|
||||||
|
|
||||||
|
# PDF generation
|
||||||
|
from reportlab.lib.pagesizes import A4
|
||||||
|
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
||||||
|
from reportlab.lib.enums import TA_CENTER, TA_LEFT
|
||||||
|
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak, Table, TableStyle
|
||||||
|
from reportlab.lib import colors
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class FileAnalysis:
|
||||||
|
path: str
|
||||||
|
language: str
|
||||||
|
lines_of_code: int
|
||||||
|
complexity_score: float
|
||||||
|
issues_found: List[str]
|
||||||
|
recommendations: List[str]
|
||||||
|
detailed_analysis: str
|
||||||
|
severity_score: float
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RepositoryAnalysis:
|
||||||
|
repo_path: str
|
||||||
|
total_files: int
|
||||||
|
total_lines: int
|
||||||
|
languages: Dict[str, int]
|
||||||
|
architecture_assessment: str
|
||||||
|
security_assessment: str
|
||||||
|
code_quality_score: float
|
||||||
|
file_analyses: List[FileAnalysis]
|
||||||
|
executive_summary: str
|
||||||
|
|
||||||
|
class RobustGitHubAnalyzer:
|
||||||
|
def __init__(self, api_key: str):
|
||||||
|
self.client = anthropic.Anthropic(api_key=api_key)
|
||||||
|
self.temp_dir = None
|
||||||
|
|
||||||
|
# Language mapping for file detection
|
||||||
|
self.language_map = {
|
||||||
|
'.py': 'Python', '.js': 'JavaScript', '.ts': 'TypeScript',
|
||||||
|
'.tsx': 'TypeScript', '.jsx': 'JavaScript', '.java': 'Java',
|
||||||
|
'.cpp': 'C++', '.c': 'C', '.cs': 'C#', '.go': 'Go', '.rs': 'Rust',
|
||||||
|
'.php': 'PHP', '.rb': 'Ruby', '.swift': 'Swift', '.kt': 'Kotlin',
|
||||||
|
'.html': 'HTML', '.css': 'CSS', '.scss': 'SCSS', '.sass': 'SASS',
|
||||||
|
'.sql': 'SQL', '.yaml': 'YAML', '.yml': 'YAML', '.json': 'JSON',
|
||||||
|
'.xml': 'XML', '.sh': 'Shell', '.dockerfile': 'Docker',
|
||||||
|
'.md': 'Markdown', '.txt': 'Text'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Code file extensions to analyze
|
||||||
|
self.code_extensions = set(self.language_map.keys())
|
||||||
|
|
||||||
|
def clone_repository(self, repo_path: str) -> str:
|
||||||
|
"""Clone repository or use existing path."""
|
||||||
|
if os.path.exists(repo_path):
|
||||||
|
print(f"Using existing repository: {repo_path}")
|
||||||
|
return repo_path
|
||||||
|
else:
|
||||||
|
print(f"Cloning repository: {repo_path}")
|
||||||
|
self.temp_dir = tempfile.mkdtemp(prefix="repo_analysis_")
|
||||||
|
try:
|
||||||
|
git.Repo.clone_from(repo_path, self.temp_dir)
|
||||||
|
return self.temp_dir
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(f"Failed to clone repository: {e}")
|
||||||
|
|
||||||
|
def get_file_language(self, file_path: Path) -> str:
|
||||||
|
"""Get programming language from file extension."""
|
||||||
|
return self.language_map.get(file_path.suffix.lower(), 'Unknown')
|
||||||
|
|
||||||
|
def calculate_complexity_score(self, content: str) -> float:
|
||||||
|
"""Calculate basic complexity score based on code patterns."""
|
||||||
|
lines = content.split('\n')
|
||||||
|
complexity_indicators = ['if', 'else', 'elif', 'for', 'while', 'try', 'except', 'catch', 'switch']
|
||||||
|
|
||||||
|
complexity = 1
|
||||||
|
for line in lines:
|
||||||
|
line_lower = line.lower().strip()
|
||||||
|
for indicator in complexity_indicators:
|
||||||
|
if indicator in line_lower:
|
||||||
|
complexity += 1
|
||||||
|
|
||||||
|
# Normalize to 1-10 scale
|
||||||
|
return min(complexity / max(len(lines), 1) * 100, 10.0)
|
||||||
|
|
||||||
|
async def analyze_file_comprehensive(self, file_path: Path, content: str) -> FileAnalysis:
|
||||||
|
"""Perform comprehensive file analysis using a single, robust prompt."""
|
||||||
|
language = self.get_file_language(file_path)
|
||||||
|
lines_of_code = len([line for line in content.split('\n') if line.strip()])
|
||||||
|
complexity_score = self.calculate_complexity_score(content)
|
||||||
|
|
||||||
|
# Truncate content if too long
|
||||||
|
if len(content) > 4000:
|
||||||
|
content = content[:4000] + "\n... [truncated for analysis]"
|
||||||
|
|
||||||
|
print(f" Analyzing {file_path.name} ({language}, {lines_of_code} lines)")
|
||||||
|
|
||||||
|
# Create comprehensive analysis prompt
|
||||||
|
prompt = f"""
|
||||||
|
You are a senior software engineer with 25 years of experience. Analyze this {language} code file:
|
||||||
|
|
||||||
|
FILENAME: {file_path.name}
|
||||||
|
LANGUAGE: {language}
|
||||||
|
LINES OF CODE: {lines_of_code}
|
||||||
|
|
||||||
|
CODE:
|
||||||
|
```{language.lower()}
|
||||||
|
{content}
|
||||||
|
```
|
||||||
|
|
||||||
|
Provide a comprehensive analysis covering:
|
||||||
|
|
||||||
|
1. ISSUES FOUND: List specific problems, bugs, security vulnerabilities, or code smells
|
||||||
|
2. RECOMMENDATIONS: Actionable suggestions for improvement
|
||||||
|
3. CODE QUALITY: Overall assessment of code quality and maintainability
|
||||||
|
4. SECURITY: Any security concerns or vulnerabilities
|
||||||
|
5. PERFORMANCE: Potential performance issues or optimizations
|
||||||
|
6. BEST PRACTICES: Adherence to coding standards and best practices
|
||||||
|
|
||||||
|
Provide your analysis in clear, structured text (not JSON). Be specific and actionable.
|
||||||
|
Rate the overall code quality from 1-10 where 10 is excellent.
|
||||||
|
|
||||||
|
ANALYSIS:
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
message = self.client.messages.create(
|
||||||
|
model="claude-3-5-sonnet-20241022",
|
||||||
|
max_tokens=3000,
|
||||||
|
temperature=0.1,
|
||||||
|
messages=[{"role": "user", "content": prompt}]
|
||||||
|
)
|
||||||
|
|
||||||
|
analysis_text = message.content[0].text.strip()
|
||||||
|
|
||||||
|
# Extract severity score from analysis
|
||||||
|
severity_match = re.search(r'(\d+(?:\.\d+)?)/10', analysis_text)
|
||||||
|
severity_score = float(severity_match.group(1)) if severity_match else 5.0
|
||||||
|
|
||||||
|
# Parse issues and recommendations from the text
|
||||||
|
issues = self.extract_issues_from_analysis(analysis_text)
|
||||||
|
recommendations = self.extract_recommendations_from_analysis(analysis_text)
|
||||||
|
|
||||||
|
return FileAnalysis(
|
||||||
|
path=str(file_path.relative_to(Path(self.temp_dir or '.'))),
|
||||||
|
language=language,
|
||||||
|
lines_of_code=lines_of_code,
|
||||||
|
complexity_score=complexity_score,
|
||||||
|
issues_found=issues,
|
||||||
|
recommendations=recommendations,
|
||||||
|
detailed_analysis=analysis_text,
|
||||||
|
severity_score=severity_score
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Error analyzing {file_path.name}: {e}")
|
||||||
|
return FileAnalysis(
|
||||||
|
path=str(file_path),
|
||||||
|
language=language,
|
||||||
|
lines_of_code=lines_of_code,
|
||||||
|
complexity_score=complexity_score,
|
||||||
|
issues_found=[f"Analysis failed: {str(e)}"],
|
||||||
|
recommendations=["Review file manually due to analysis error"],
|
||||||
|
detailed_analysis=f"Analysis failed due to error: {str(e)}",
|
||||||
|
severity_score=5.0
|
||||||
|
)
|
||||||
|
|
||||||
|
def extract_issues_from_analysis(self, analysis_text: str) -> List[str]:
|
||||||
|
"""Extract issues from analysis text."""
|
||||||
|
issues = []
|
||||||
|
lines = analysis_text.split('\n')
|
||||||
|
|
||||||
|
# Look for common issue indicators
|
||||||
|
issue_keywords = ['issue', 'problem', 'bug', 'vulnerability', 'error', 'warning', 'concern']
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
line_lower = line.lower().strip()
|
||||||
|
if any(keyword in line_lower for keyword in issue_keywords):
|
||||||
|
if line.strip() and not line.strip().startswith('#'):
|
||||||
|
issues.append(line.strip())
|
||||||
|
|
||||||
|
return issues[:10] # Limit to top 10 issues
|
||||||
|
|
||||||
|
def extract_recommendations_from_analysis(self, analysis_text: str) -> List[str]:
|
||||||
|
"""Extract recommendations from analysis text."""
|
||||||
|
recommendations = []
|
||||||
|
lines = analysis_text.split('\n')
|
||||||
|
|
||||||
|
# Look for recommendation indicators
|
||||||
|
rec_keywords = ['recommend', 'suggest', 'should', 'consider', 'improve']
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
line_lower = line.lower().strip()
|
||||||
|
if any(keyword in line_lower for keyword in rec_keywords):
|
||||||
|
if line.strip() and not line.strip().startswith('#'):
|
||||||
|
recommendations.append(line.strip())
|
||||||
|
|
||||||
|
return recommendations[:10] # Limit to top 10 recommendations
|
||||||
|
|
||||||
|
def scan_repository(self, repo_path: str, max_files: int = 50) -> List[Tuple[Path, str]]:
|
||||||
|
"""Scan repository and collect files for analysis."""
|
||||||
|
print(f"Scanning repository: {repo_path}")
|
||||||
|
|
||||||
|
files_to_analyze = []
|
||||||
|
|
||||||
|
# Important files to always include
|
||||||
|
important_files = {
|
||||||
|
'README.md', 'package.json', 'requirements.txt', 'Dockerfile',
|
||||||
|
'docker-compose.yml', 'tsconfig.json', 'next.config.js',
|
||||||
|
'tailwind.config.js', 'webpack.config.js', '.env.example'
|
||||||
|
}
|
||||||
|
|
||||||
|
for root, dirs, files in os.walk(repo_path):
|
||||||
|
# Skip common build/cache directories
|
||||||
|
dirs[:] = [d for d in dirs if not d.startswith('.') and
|
||||||
|
d not in {'node_modules', '__pycache__', 'build', 'dist', 'target',
|
||||||
|
'venv', 'env', '.git', '.next', 'coverage'}]
|
||||||
|
|
||||||
|
for file in files:
|
||||||
|
if len(files_to_analyze) >= max_files:
|
||||||
|
break
|
||||||
|
|
||||||
|
file_path = Path(root) / file
|
||||||
|
|
||||||
|
# Skip large files
|
||||||
|
try:
|
||||||
|
if file_path.stat().st_size > 1000000: # 1MB limit
|
||||||
|
continue
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Include important files or files with code extensions
|
||||||
|
should_include = (
|
||||||
|
file.lower() in important_files or
|
||||||
|
file_path.suffix.lower() in self.code_extensions or
|
||||||
|
file.lower().startswith('dockerfile')
|
||||||
|
)
|
||||||
|
|
||||||
|
if should_include:
|
||||||
|
try:
|
||||||
|
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||||||
|
content = f.read()
|
||||||
|
if content.strip(): # Only non-empty files
|
||||||
|
files_to_analyze.append((file_path, content))
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Could not read {file_path}: {e}")
|
||||||
|
|
||||||
|
print(f"Found {len(files_to_analyze)} files to analyze")
|
||||||
|
return files_to_analyze
|
||||||
|
|
||||||
|
async def analyze_repository_overview(self, repo_path: str, file_analyses: List[FileAnalysis]) -> Tuple[str, str]:
|
||||||
|
"""Analyze repository architecture and security."""
|
||||||
|
print("Analyzing repository overview...")
|
||||||
|
|
||||||
|
# Prepare summary data
|
||||||
|
languages = dict(Counter(fa.language for fa in file_analyses))
|
||||||
|
total_lines = sum(fa.lines_of_code for fa in file_analyses)
|
||||||
|
avg_quality = sum(fa.severity_score for fa in file_analyses) / len(file_analyses) if file_analyses else 5.0
|
||||||
|
|
||||||
|
# Get repository structure
|
||||||
|
structure_lines = []
|
||||||
|
try:
|
||||||
|
for root, dirs, files in os.walk(repo_path):
|
||||||
|
dirs[:] = [d for d in dirs if not d.startswith('.') and d not in {'node_modules', '__pycache__'}]
|
||||||
|
level = root.replace(repo_path, '').count(os.sep)
|
||||||
|
indent = ' ' * level
|
||||||
|
structure_lines.append(f"{indent}{os.path.basename(root)}/")
|
||||||
|
for file in files[:3]: # Limit files shown per directory
|
||||||
|
structure_lines.append(f"{indent} {file}")
|
||||||
|
if len(structure_lines) > 50: # Limit total structure size
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
structure_lines = [f"Error reading structure: {e}"]
|
||||||
|
|
||||||
|
# Architecture analysis
|
||||||
|
arch_prompt = f"""
|
||||||
|
You are a Senior Software Architect with 25 years of experience.
|
||||||
|
|
||||||
|
Analyze this repository:
|
||||||
|
|
||||||
|
REPOSITORY STRUCTURE:
|
||||||
|
{chr(10).join(structure_lines[:30])}
|
||||||
|
|
||||||
|
STATISTICS:
|
||||||
|
- Total files analyzed: {len(file_analyses)}
|
||||||
|
- Total lines of code: {total_lines:,}
|
||||||
|
- Languages: {languages}
|
||||||
|
- Average code quality: {avg_quality:.1f}/10
|
||||||
|
|
||||||
|
TOP FILE ISSUES:
|
||||||
|
{chr(10).join([f"- {fa.path}: {len(fa.issues_found)} issues" for fa in file_analyses[:10]])}
|
||||||
|
|
||||||
|
Provide an architectural assessment covering:
|
||||||
|
1. Project type and purpose
|
||||||
|
2. Technology stack evaluation
|
||||||
|
3. Code organization and structure
|
||||||
|
4. Scalability and maintainability concerns
|
||||||
|
5. Key recommendations for improvement
|
||||||
|
|
||||||
|
Keep response under 1500 words and focus on actionable insights.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Security analysis
|
||||||
|
security_issues = []
|
||||||
|
for fa in file_analyses:
|
||||||
|
security_issues.extend([issue for issue in fa.issues_found if
|
||||||
|
any(keyword in issue.lower() for keyword in
|
||||||
|
['security', 'vulnerability', 'injection', 'xss', 'auth', 'password'])])
|
||||||
|
|
||||||
|
sec_prompt = f"""
|
||||||
|
You are a Senior Security Engineer with 20+ years of experience.
|
||||||
|
|
||||||
|
Security Analysis for repository with {len(file_analyses)} files:
|
||||||
|
|
||||||
|
SECURITY ISSUES FOUND:
|
||||||
|
{chr(10).join(security_issues[:20]) if security_issues else "No obvious security issues detected"}
|
||||||
|
|
||||||
|
HIGH-RISK FILE TYPES PRESENT:
|
||||||
|
{[lang for lang, count in languages.items() if lang in ['JavaScript', 'TypeScript', 'Python', 'PHP', 'SQL']]}
|
||||||
|
|
||||||
|
Provide security assessment covering:
|
||||||
|
1. Overall security posture
|
||||||
|
2. Main security risks and vulnerabilities
|
||||||
|
3. Authentication and authorization concerns
|
||||||
|
4. Data protection and privacy issues
|
||||||
|
5. Immediate security priorities
|
||||||
|
|
||||||
|
Keep response under 1000 words and focus on actionable security recommendations.
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Run both analyses
|
||||||
|
arch_task = self.client.messages.create(
|
||||||
|
model="claude-3-5-sonnet-20241022",
|
||||||
|
max_tokens=2000,
|
||||||
|
temperature=0.1,
|
||||||
|
messages=[{"role": "user", "content": arch_prompt}]
|
||||||
|
)
|
||||||
|
|
||||||
|
sec_task = self.client.messages.create(
|
||||||
|
model="claude-3-5-sonnet-20241022",
|
||||||
|
max_tokens=1500,
|
||||||
|
temperature=0.1,
|
||||||
|
messages=[{"role": "user", "content": sec_prompt}]
|
||||||
|
)
|
||||||
|
|
||||||
|
architecture_assessment = arch_task.content[0].text
|
||||||
|
security_assessment = sec_task.content[0].text
|
||||||
|
|
||||||
|
return architecture_assessment, security_assessment
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return f"Architecture analysis failed: {e}", f"Security analysis failed: {e}"
|
||||||
|
|
||||||
|
async def generate_executive_summary(self, analysis: RepositoryAnalysis) -> str:
|
||||||
|
"""Generate executive summary for leadership."""
|
||||||
|
print("Generating executive summary...")
|
||||||
|
|
||||||
|
prompt = f"""
|
||||||
|
You are presenting to C-level executives. Create an executive summary of this technical analysis:
|
||||||
|
|
||||||
|
REPOSITORY METRICS:
|
||||||
|
- Total Files: {analysis.total_files}
|
||||||
|
- Lines of Code: {analysis.total_lines:,}
|
||||||
|
- Languages: {analysis.languages}
|
||||||
|
- Code Quality Score: {analysis.code_quality_score:.1f}/10
|
||||||
|
|
||||||
|
KEY FINDINGS:
|
||||||
|
- Total issues identified: {sum(len(fa.issues_found) for fa in analysis.file_analyses)}
|
||||||
|
- Files needing attention: {len([fa for fa in analysis.file_analyses if fa.severity_score < 7])}
|
||||||
|
- High-quality files: {len([fa for fa in analysis.file_analyses if fa.severity_score >= 8])}
|
||||||
|
|
||||||
|
Create an executive summary for non-technical leadership covering:
|
||||||
|
1. Business impact of code quality findings
|
||||||
|
2. Risk assessment and implications
|
||||||
|
3. Investment priorities and recommendations
|
||||||
|
4. Expected ROI from addressing technical debt
|
||||||
|
5. Competitive implications
|
||||||
|
|
||||||
|
Focus on business outcomes, not technical details. Keep under 800 words.
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
message = self.client.messages.create(
|
||||||
|
model="claude-3-5-sonnet-20241022",
|
||||||
|
max_tokens=1200,
|
||||||
|
temperature=0.1,
|
||||||
|
messages=[{"role": "user", "content": prompt}]
|
||||||
|
)
|
||||||
|
return message.content[0].text
|
||||||
|
except Exception as e:
|
||||||
|
return f"Executive summary generation failed: {e}"
|
||||||
|
|
||||||
|
def create_pdf_report(self, analysis: RepositoryAnalysis, output_path: str):
|
||||||
|
"""Generate comprehensive PDF report."""
|
||||||
|
print(f"Generating PDF report: {output_path}")
|
||||||
|
|
||||||
|
doc = SimpleDocTemplate(output_path, pagesize=A4,
|
||||||
|
leftMargin=72, rightMargin=72,
|
||||||
|
topMargin=72, bottomMargin=72)
|
||||||
|
styles = getSampleStyleSheet()
|
||||||
|
story = []
|
||||||
|
|
||||||
|
# Custom styles
|
||||||
|
title_style = ParagraphStyle(
|
||||||
|
'CustomTitle',
|
||||||
|
parent=styles['Heading1'],
|
||||||
|
fontSize=24,
|
||||||
|
textColor=colors.darkblue,
|
||||||
|
spaceAfter=30,
|
||||||
|
alignment=TA_CENTER
|
||||||
|
)
|
||||||
|
|
||||||
|
heading_style = ParagraphStyle(
|
||||||
|
'CustomHeading',
|
||||||
|
parent=styles['Heading2'],
|
||||||
|
fontSize=16,
|
||||||
|
textColor=colors.darkblue,
|
||||||
|
spaceBefore=20,
|
||||||
|
spaceAfter=10
|
||||||
|
)
|
||||||
|
|
||||||
|
# Title Page
|
||||||
|
story.append(Paragraph("Repository Analysis Report", title_style))
|
||||||
|
story.append(Spacer(1, 20))
|
||||||
|
story.append(Paragraph(f"<b>Repository:</b> {analysis.repo_path}", styles['Normal']))
|
||||||
|
story.append(Paragraph(f"<b>Analysis Date:</b> {datetime.now().strftime('%B %d, %Y at %H:%M')}", styles['Normal']))
|
||||||
|
story.append(Paragraph("<b>Generated by:</b> AI Senior Engineering Team", styles['Normal']))
|
||||||
|
story.append(PageBreak())
|
||||||
|
|
||||||
|
# Executive Summary
|
||||||
|
story.append(Paragraph("Executive Summary", heading_style))
|
||||||
|
story.append(Paragraph(analysis.executive_summary, styles['Normal']))
|
||||||
|
story.append(PageBreak())
|
||||||
|
|
||||||
|
# Repository Overview
|
||||||
|
story.append(Paragraph("Repository Overview", heading_style))
|
||||||
|
|
||||||
|
overview_data = [
|
||||||
|
['Metric', 'Value'],
|
||||||
|
['Total Files Analyzed', str(analysis.total_files)],
|
||||||
|
['Total Lines of Code', f"{analysis.total_lines:,}"],
|
||||||
|
['Primary Languages', ', '.join(list(analysis.languages.keys())[:5])],
|
||||||
|
['Overall Code Quality', f"{analysis.code_quality_score:.1f}/10"],
|
||||||
|
]
|
||||||
|
|
||||||
|
overview_table = Table(overview_data, colWidths=[200, 300])
|
||||||
|
overview_table.setStyle(TableStyle([
|
||||||
|
('BACKGROUND', (0, 0), (-1, 0), colors.grey),
|
||||||
|
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
|
||||||
|
('ALIGN', (0, 0), (-1, -1), 'LEFT'),
|
||||||
|
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
|
||||||
|
('FONTSIZE', (0, 0), (-1, 0), 12),
|
||||||
|
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
|
||||||
|
('BACKGROUND', (0, 1), (-1, -1), colors.beige),
|
||||||
|
('GRID', (0, 0), (-1, -1), 1, colors.black)
|
||||||
|
]))
|
||||||
|
|
||||||
|
story.append(overview_table)
|
||||||
|
story.append(Spacer(1, 20))
|
||||||
|
|
||||||
|
# Languages Distribution
|
||||||
|
if analysis.languages:
|
||||||
|
story.append(Paragraph("Language Distribution", heading_style))
|
||||||
|
lang_data = [['Language', 'Files']]
|
||||||
|
for lang, count in sorted(analysis.languages.items(), key=lambda x: x[1], reverse=True):
|
||||||
|
lang_data.append([lang, str(count)])
|
||||||
|
|
||||||
|
lang_table = Table(lang_data, colWidths=[200, 100])
|
||||||
|
lang_table.setStyle(TableStyle([
|
||||||
|
('BACKGROUND', (0, 0), (-1, 0), colors.grey),
|
||||||
|
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
|
||||||
|
('ALIGN', (0, 0), (-1, -1), 'LEFT'),
|
||||||
|
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
|
||||||
|
('GRID', (0, 0), (-1, -1), 1, colors.black)
|
||||||
|
]))
|
||||||
|
story.append(lang_table)
|
||||||
|
story.append(PageBreak())
|
||||||
|
|
||||||
|
# Architecture Assessment
|
||||||
|
story.append(Paragraph("Architecture Assessment", heading_style))
|
||||||
|
# Split long text into paragraphs
|
||||||
|
arch_paragraphs = analysis.architecture_assessment.split('\n\n')
|
||||||
|
for para in arch_paragraphs[:10]: # Limit paragraphs
|
||||||
|
if para.strip():
|
||||||
|
story.append(Paragraph(para.strip(), styles['Normal']))
|
||||||
|
story.append(Spacer(1, 10))
|
||||||
|
story.append(PageBreak())
|
||||||
|
|
||||||
|
# Security Assessment
|
||||||
|
story.append(Paragraph("Security Assessment", heading_style))
|
||||||
|
sec_paragraphs = analysis.security_assessment.split('\n\n')
|
||||||
|
for para in sec_paragraphs[:10]: # Limit paragraphs
|
||||||
|
if para.strip():
|
||||||
|
story.append(Paragraph(para.strip(), styles['Normal']))
|
||||||
|
story.append(Spacer(1, 10))
|
||||||
|
story.append(PageBreak())
|
||||||
|
|
||||||
|
# File Analysis Summary
|
||||||
|
story.append(Paragraph("File Analysis Summary", heading_style))
|
||||||
|
|
||||||
|
# Summary statistics
|
||||||
|
high_quality_files = [fa for fa in analysis.file_analyses if fa.severity_score >= 8]
|
||||||
|
medium_quality_files = [fa for fa in analysis.file_analyses if 5 <= fa.severity_score < 8]
|
||||||
|
low_quality_files = [fa for fa in analysis.file_analyses if fa.severity_score < 5]
|
||||||
|
|
||||||
|
quality_data = [
|
||||||
|
['Quality Level', 'Files', 'Percentage'],
|
||||||
|
['High Quality (8-10)', str(len(high_quality_files)), f"{len(high_quality_files)/len(analysis.file_analyses)*100:.1f}%"],
|
||||||
|
['Medium Quality (5-7)', str(len(medium_quality_files)), f"{len(medium_quality_files)/len(analysis.file_analyses)*100:.1f}%"],
|
||||||
|
['Low Quality (1-4)', str(len(low_quality_files)), f"{len(low_quality_files)/len(analysis.file_analyses)*100:.1f}%"]
|
||||||
|
]
|
||||||
|
|
||||||
|
quality_table = Table(quality_data)
|
||||||
|
quality_table.setStyle(TableStyle([
|
||||||
|
('BACKGROUND', (0, 0), (-1, 0), colors.grey),
|
||||||
|
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
|
||||||
|
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
|
||||||
|
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
|
||||||
|
('GRID', (0, 0), (-1, -1), 1, colors.black),
|
||||||
|
('BACKGROUND', (0, 1), (-1, 1), colors.lightgreen),
|
||||||
|
('BACKGROUND', (0, 2), (-1, 2), colors.lightyellow),
|
||||||
|
('BACKGROUND', (0, 3), (-1, 3), colors.lightcoral)
|
||||||
|
]))
|
||||||
|
|
||||||
|
story.append(quality_table)
|
||||||
|
story.append(Spacer(1, 20))
|
||||||
|
|
||||||
|
# Top Issues Found
|
||||||
|
story.append(Paragraph("Files Requiring Attention", heading_style))
|
||||||
|
|
||||||
|
# Sort files by severity (lowest scores first - need most attention)
|
||||||
|
files_by_priority = sorted(analysis.file_analyses, key=lambda x: x.severity_score)
|
||||||
|
|
||||||
|
for i, file_analysis in enumerate(files_by_priority[:15]): # Top 15 files needing attention
|
||||||
|
story.append(Paragraph(f"<b>{i+1}. {file_analysis.path}</b>", styles['Heading4']))
|
||||||
|
story.append(Paragraph(f"Language: {file_analysis.language} | Quality Score: {file_analysis.severity_score:.1f}/10 | Lines: {file_analysis.lines_of_code}", styles['Normal']))
|
||||||
|
|
||||||
|
# Show top issues
|
||||||
|
if file_analysis.issues_found:
|
||||||
|
story.append(Paragraph("Key Issues:", styles['Heading5']))
|
||||||
|
for issue in file_analysis.issues_found[:3]: # Top 3 issues
|
||||||
|
story.append(Paragraph(f"• {issue}", styles['Normal']))
|
||||||
|
|
||||||
|
# Show top recommendations
|
||||||
|
if file_analysis.recommendations:
|
||||||
|
story.append(Paragraph("Recommendations:", styles['Heading5']))
|
||||||
|
for rec in file_analysis.recommendations[:2]: # Top 2 recommendations
|
||||||
|
story.append(Paragraph(f"• {rec}", styles['Normal']))
|
||||||
|
|
||||||
|
story.append(Spacer(1, 15))
|
||||||
|
|
||||||
|
# Build PDF
|
||||||
|
try:
|
||||||
|
doc.build(story)
|
||||||
|
print(f"✅ PDF report generated successfully: {output_path}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error generating PDF: {e}")
|
||||||
|
|
||||||
|
async def analyze_repository(self, repo_path: str, max_files: int = 50) -> RepositoryAnalysis:
|
||||||
|
"""Main analysis function."""
|
||||||
|
try:
|
||||||
|
# Clone/access repository
|
||||||
|
actual_repo_path = self.clone_repository(repo_path)
|
||||||
|
|
||||||
|
# Scan files
|
||||||
|
files_to_analyze = self.scan_repository(actual_repo_path, max_files)
|
||||||
|
|
||||||
|
if not files_to_analyze:
|
||||||
|
raise Exception("No files found to analyze")
|
||||||
|
|
||||||
|
# Analyze each file
|
||||||
|
print(f"Starting analysis of {len(files_to_analyze)} files...")
|
||||||
|
file_analyses = []
|
||||||
|
|
||||||
|
for i, (file_path, content) in enumerate(files_to_analyze):
|
||||||
|
print(f"Analyzing file {i+1}/{len(files_to_analyze)}: {file_path.name}")
|
||||||
|
analysis = await self.analyze_file_comprehensive(file_path, content)
|
||||||
|
file_analyses.append(analysis)
|
||||||
|
|
||||||
|
# Small delay to avoid rate limiting
|
||||||
|
await asyncio.sleep(0.2)
|
||||||
|
|
||||||
|
# Repository-level analyses
|
||||||
|
print("Performing repository-level analysis...")
|
||||||
|
architecture_assessment, security_assessment = await self.analyze_repository_overview(
|
||||||
|
actual_repo_path, file_analyses)
|
||||||
|
|
||||||
|
# Calculate overall quality score
|
||||||
|
avg_quality = sum(fa.severity_score for fa in file_analyses) / len(file_analyses)
|
||||||
|
|
||||||
|
# Generate statistics
|
||||||
|
languages = dict(Counter(fa.language for fa in file_analyses))
|
||||||
|
total_lines = sum(fa.lines_of_code for fa in file_analyses)
|
||||||
|
|
||||||
|
# Create repository analysis
|
||||||
|
repo_analysis = RepositoryAnalysis(
|
||||||
|
repo_path=repo_path,
|
||||||
|
total_files=len(file_analyses),
|
||||||
|
total_lines=total_lines,
|
||||||
|
languages=languages,
|
||||||
|
architecture_assessment=architecture_assessment,
|
||||||
|
security_assessment=security_assessment,
|
||||||
|
code_quality_score=avg_quality,
|
||||||
|
file_analyses=file_analyses,
|
||||||
|
executive_summary=""
|
||||||
|
)
|
||||||
|
|
||||||
|
# Generate executive summary
|
||||||
|
print("Generating executive summary...")
|
||||||
|
repo_analysis.executive_summary = await self.generate_executive_summary(repo_analysis)
|
||||||
|
|
||||||
|
return repo_analysis
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Cleanup
|
||||||
|
if self.temp_dir and os.path.exists(self.temp_dir):
|
||||||
|
shutil.rmtree(self.temp_dir)
|
||||||
|
print("Temporary files cleaned up")
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
# Load environment variables
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description="Robust GitHub Repository AI Analysis")
|
||||||
|
parser.add_argument("repo_path", help="Repository path (local directory or Git URL)")
|
||||||
|
parser.add_argument("--output", "-o", default="repository_analysis.pdf",
|
||||||
|
help="Output PDF file path")
|
||||||
|
parser.add_argument("--max-files", type=int, default=50,
|
||||||
|
help="Maximum files to analyze")
|
||||||
|
parser.add_argument("--api-key", help="Anthropic API key (overrides .env)")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Get API key
|
||||||
|
api_key = args.api_key or os.getenv('ANTHROPIC_API_KEY')
|
||||||
|
if not api_key:
|
||||||
|
print("❌ Error: ANTHROPIC_API_KEY not found in .env file or command line")
|
||||||
|
print("Please create a .env file with: ANTHROPIC_API_KEY=your_key_here")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
print("🚀 Starting Repository Analysis")
|
||||||
|
print("=" * 60)
|
||||||
|
print(f"Repository: {args.repo_path}")
|
||||||
|
print(f"Max files: {args.max_files}")
|
||||||
|
print(f"Output: {args.output}")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
# Initialize analyzer
|
||||||
|
analyzer = RobustGitHubAnalyzer(api_key)
|
||||||
|
|
||||||
|
# Perform analysis
|
||||||
|
analysis = await analyzer.analyze_repository(args.repo_path, args.max_files)
|
||||||
|
|
||||||
|
# Generate PDF report
|
||||||
|
analyzer.create_pdf_report(analysis, args.output)
|
||||||
|
|
||||||
|
# Print summary to console
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("🎯 ANALYSIS COMPLETE")
|
||||||
|
print("=" * 60)
|
||||||
|
print(f"📊 Repository Statistics:")
|
||||||
|
print(f" • Files Analyzed: {analysis.total_files}")
|
||||||
|
print(f" • Lines of Code: {analysis.total_lines:,}")
|
||||||
|
print(f" • Languages: {len(analysis.languages)}")
|
||||||
|
print(f" • Code Quality: {analysis.code_quality_score:.1f}/10")
|
||||||
|
|
||||||
|
# Quality breakdown
|
||||||
|
high_quality = len([fa for fa in analysis.file_analyses if fa.severity_score >= 8])
|
||||||
|
low_quality = len([fa for fa in analysis.file_analyses if fa.severity_score < 5])
|
||||||
|
|
||||||
|
print(f"\n📈 Quality Breakdown:")
|
||||||
|
print(f" • High Quality Files: {high_quality}")
|
||||||
|
print(f" • Files Needing Attention: {low_quality}")
|
||||||
|
print(f" • Total Issues Found: {sum(len(fa.issues_found) for fa in analysis.file_analyses)}")
|
||||||
|
|
||||||
|
print(f"\n📄 Detailed PDF Report: {args.output}")
|
||||||
|
print("\n✅ Analysis completed successfully!")
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error during analysis: {e}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
exit(asyncio.run(main()))
|
||||||
232
services/ai-analysis-service/ai-analysis/ai_blog_analysis.pdf
Normal file
232
services/ai-analysis-service/ai-analysis/ai_blog_analysis.pdf
Normal file
@ -0,0 +1,232 @@
|
|||||||
|
%PDF-1.4
|
||||||
|
%“Œ‹ž ReportLab Generated PDF document http://www.reportlab.com
|
||||||
|
1 0 obj
|
||||||
|
<<
|
||||||
|
/F1 2 0 R /F2 3 0 R /F3 9 0 R
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
2 0 obj
|
||||||
|
<<
|
||||||
|
/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
3 0 obj
|
||||||
|
<<
|
||||||
|
/BaseFont /Helvetica-Bold /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
4 0 obj
|
||||||
|
<<
|
||||||
|
/Contents 17 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 16 0 R /Resources <<
|
||||||
|
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
||||||
|
>> /Rotate 0 /Trans <<
|
||||||
|
|
||||||
|
>>
|
||||||
|
/Type /Page
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
5 0 obj
|
||||||
|
<<
|
||||||
|
/Contents 18 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 16 0 R /Resources <<
|
||||||
|
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
||||||
|
>> /Rotate 0 /Trans <<
|
||||||
|
|
||||||
|
>>
|
||||||
|
/Type /Page
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
6 0 obj
|
||||||
|
<<
|
||||||
|
/Contents 19 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 16 0 R /Resources <<
|
||||||
|
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
||||||
|
>> /Rotate 0 /Trans <<
|
||||||
|
|
||||||
|
>>
|
||||||
|
/Type /Page
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
7 0 obj
|
||||||
|
<<
|
||||||
|
/Contents 20 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 16 0 R /Resources <<
|
||||||
|
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
||||||
|
>> /Rotate 0 /Trans <<
|
||||||
|
|
||||||
|
>>
|
||||||
|
/Type /Page
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
8 0 obj
|
||||||
|
<<
|
||||||
|
/Contents 21 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 16 0 R /Resources <<
|
||||||
|
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
||||||
|
>> /Rotate 0 /Trans <<
|
||||||
|
|
||||||
|
>>
|
||||||
|
/Type /Page
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
9 0 obj
|
||||||
|
<<
|
||||||
|
/BaseFont /Helvetica-BoldOblique /Encoding /WinAnsiEncoding /Name /F3 /Subtype /Type1 /Type /Font
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
10 0 obj
|
||||||
|
<<
|
||||||
|
/Contents 22 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 16 0 R /Resources <<
|
||||||
|
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
||||||
|
>> /Rotate 0 /Trans <<
|
||||||
|
|
||||||
|
>>
|
||||||
|
/Type /Page
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
11 0 obj
|
||||||
|
<<
|
||||||
|
/Contents 23 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 16 0 R /Resources <<
|
||||||
|
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
||||||
|
>> /Rotate 0 /Trans <<
|
||||||
|
|
||||||
|
>>
|
||||||
|
/Type /Page
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
12 0 obj
|
||||||
|
<<
|
||||||
|
/Contents 24 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 16 0 R /Resources <<
|
||||||
|
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
||||||
|
>> /Rotate 0 /Trans <<
|
||||||
|
|
||||||
|
>>
|
||||||
|
/Type /Page
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
13 0 obj
|
||||||
|
<<
|
||||||
|
/Contents 25 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 16 0 R /Resources <<
|
||||||
|
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
||||||
|
>> /Rotate 0 /Trans <<
|
||||||
|
|
||||||
|
>>
|
||||||
|
/Type /Page
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
14 0 obj
|
||||||
|
<<
|
||||||
|
/PageMode /UseNone /Pages 16 0 R /Type /Catalog
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
15 0 obj
|
||||||
|
<<
|
||||||
|
/Author (\(anonymous\)) /CreationDate (D:20250919123308+05'00') /Creator (\(unspecified\)) /Keywords () /ModDate (D:20250919123308+05'00') /Producer (ReportLab PDF Library - www.reportlab.com)
|
||||||
|
/Subject (\(unspecified\)) /Title (\(anonymous\)) /Trapped /False
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
16 0 obj
|
||||||
|
<<
|
||||||
|
/Count 9 /Kids [ 4 0 R 5 0 R 6 0 R 7 0 R 8 0 R 10 0 R 11 0 R 12 0 R 13 0 R ] /Type /Pages
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
17 0 obj
|
||||||
|
<<
|
||||||
|
/Filter [ /ASCII85Decode /FlateDecode ] /Length 367
|
||||||
|
>>
|
||||||
|
stream
|
||||||
|
Gat>Ob>,r/&-^F/^>^aQ+qM;2mo!"Z,rU:'+DFN<Wd3O48H!l^(6k\u=B$Mj0cj[B%tdBkbdhVAKn0'=^c97;R.'e]03ASIdpbP*;2iS/:)kW9]qC:gm0%mr;+">!-*UmX9fWY/Ec?M%jF#/Z\\ge'p)luOhIPLQ[I2NF=e"ji6TniD.=DH+Kt)n$GsIg"Wei,tr^>pN;0%8ZkR<IhdR[p*8G#TTl4fO&M-5e*R:2k55GYGdeU"PTS9<Gn6>lCGNkJ`@0/m+gMd9CE2":C%X7.gS;0UgGA$4o>n6P`k2MG+<aTK&n"6>p1deWfJ:Cu=FH'YR36n(u<fiPU+;-S5ObI4ET.952)&2J1L1pF[pP3AK!~>endstream
|
||||||
|
endobj
|
||||||
|
18 0 obj
|
||||||
|
<<
|
||||||
|
/Filter [ /ASCII85Decode /FlateDecode ] /Length 2039
|
||||||
|
>>
|
||||||
|
stream
|
||||||
|
Gat%#?$"aY&:Dg-\;-rFFG?eDbDtmI7q"KL`h-_gFs\jr#uPA,J,qpglEBXt5Z*^1cEu!O1SKW:]t<)`32J&fC%tuB7.1N[n`Q.b)&4YokE@n@+"8^HI=%4hDn\<2GOs;*q>!hL3.WaXn`4e@3lM2*^I!Tq%#Q_j!mW2W$N\R6gmdY%QG$?=8^"hbL#'J>i_M%Qi'_ea*$m[,9b3C-76c&VkP,JZ@t[#,/CX*n2%okZ/NspFkDY_!Y-'DGs.G(F,i/-f;1;0q;^'>l<i'IXf./AA[sdLf/*YJBl!,aHhdLkqr:b\_o/XG_S7fUa9lEU:d;5@oA6.dCP"L?0&%tm[.7ePd!sDpJ9ic/-B\\tQ:bB)^U1q'C`]&@[`T]uY#Uek6q)*G+C[#D!_Fibui*3CddP[^4iFT,`;L0RYk>EX++MHH]M"E9B@8,eb/ms&c3VsDZm#4l%b#&\6%lf;?<KFq"&fd\/.Qjh5]l*?";..unl[+V4da?3>P'S^%.60J81ZiG+dN1WOVX:0\JIJ:,#X#6NK\h2^k1A:,8bpp(jeAE$(;7*qKZi7=-eF-,%b6Gl7ZQHJk*cc>@hGD?kHicFiCYuCf1KRCWu0tt.:pKu)+/bE.q'r`gr7u>N6MDN;^IqTF2aH?2f4HYkW&t<JX#?^%<Xd3i9j9`LuV7aN@H5Sb1XLei0"0hY"Xo#iB0*[9V\V?_E;NT`H0_R1:4/P8$#a@M-\8t/(?FCCJ\M!S^".Bjq/&!4<)-"*8Tf$?g2UA7%^ZglXEPNO-M7)AnSN`!5L*[_[iki?b)$*3\X->a%CTRi.u*D9idts<89Mf>80)0fG=oJHTlK`<=oI7R_GcJcq]gS3"9IY8j'%+Rlq]E,p6q+b<e#H&Sh'k!Ssj@<!<_2Z>7Z"*IOZJ'J+>r+-!E:<7"P"N_0]ps+6OkIXd<"5c77US33[UeBE*Ki]tYA/Z#AeD#,%[T_fj@[A$ucW^:0MaX"6PeN$%TiT=krA5J"<XhoEpNiEWDm\KWh2\8ejRD@:j94@rCj2bU=nCk(oUPouLL-T0ld\7Fm2PKIl;7S9(&Gc"^CfrGPkk73T\^-0r>LL1f2CQ.'"d`d?qj07PVAfo#0K!a!#\r%AH$_jA":#,tNUb[XP(6.bf?6Dus+8B)2fnJjH#cB8;LWaqhU63Q\Hp=g?E0%!Rlb7>kckrg&EX+)d=0>;:*sE+d@!B5_@!a!Sc&#Lo#;a!GDJ!.a2i_Ebn`bA@8(`lPLFO]m6s@TLO$(fkG)Z]\j+9s@Tll:ojniKhXUN91eQs7n&ALiR0NKtN"/9%1k-QfCaRf7.dk@Yh%.l/ZNM%`"Rl!UQqK.G2mH9e>/AQ(dmZorU4pRSOE2)CH#i`iKibBM]L`>$nQInMi8,9s?kqko>rnBZ%D!]12Aeh)a_9m_*8@g0\[p%C4D]:ZMi[\nZH-seQZNtjNNmDWF`qb4+9#V@=&^krFr'dUetY-PZrKuT/701G@&e2Qn(G-NU9T_;o<<k89j$Ep`D1r?X&_*p4u7/g>(r6-cu3$qk)o>DhlCR/<.cEBWP0d,'eU9Q4GA5.+%D4D<u`sNBBU7ErF'A>b$s"kI['JUFRIS]66\-:S&U\$%7k,X>@N%H1g&J:H?\(<5d_O'*nM:<'07lq!nrfI5i9cTnrf'#(XVelQJB^qYl$ul+7Lf;7ZJnpbWHO7eC><;G]lg9\\S*V_Q5aTQ;[bq2JTR"bD>qF^,qfZIne5Y$SQ*f*B#f_eW*a[0lT:,CRRKJ)t4FVk:,K9QSf\h\R2"FjUQGoL4O]+$N_+L=2/C\_&$#$\:R%;\<jqNrl;E-\4?cDLHEpKFGC;\?4k)@>Y!rlH5e+^aq@bi)hnuJ18.BD:f0VnGZ;r?[:D=dVXp!c9#W$Y;U@>5qhkgkR9L@I?5X!dgLNYNkE:9GT140pL;Z_<4#a7BNIjZ?Wh?-6j/<O/rX`34WXc'^TKOM!8j.b&=>M$Cfg%URGaj>&I]Nci7+I0Tk+I477c0\ScaE7WoF):_lgUMP!9TmO`C-p/##-kDNW~>endstream
|
||||||
|
endobj
|
||||||
|
19 0 obj
|
||||||
|
<<
|
||||||
|
/Filter [ /ASCII85Decode /FlateDecode ] /Length 764
|
||||||
|
>>
|
||||||
|
stream
|
||||||
|
GatU0?#SFN'Rf.GgrHR,WU2Z?o%8*NU^G[MU.K_(MF$Jn_En7-?b[P0OHe^U2FV$:ptTq#qjpH3i'[2;o+KtK"ul8j."c=GPQr26&U__*^BW1Cirig4"\Fk((kE&H*(2n5#h4b5.aWerat-DO!>SclC#uLhe>c^89i^Z@ENAAAY'07VH\(Op9f9bb9?6'XKU>\kU6dZl#YbJVit:mL(m_$1&H_E(%(1]_ocQd/M%^AS0bFh<iV5,`HlP:s4.?-4%@Il=p3_1u(4\g.p=38&FoL1N=c^MBJbDeR)qAF19lZTj/r2_jP\Q&VC1IA>H(if.>KUFT>L!(kD,<Vq;Zq'n;]XsGW`F2NX(KL-C1AY2$:]H\/C^K!FhX)bq'0#p2KGMBIT[VPm8n'!>j&/"#S5D)01-T"qWFs6Q1uu@d]Ir4*KPTi,H]H2S1G#\)jkGPOZ3.rN_7?"$/X&.Bsm'uJA2nI=\k[[A`l[(WJ_'1"\^dC/4<ef4c3D?.h9bM?oW..`%?]WLP<TS3,od=c!WLc(pce9QXk,I[ao)uo@_Mh'IWolH["<80jg\3IVbIc._Wj"cM=!:bFMFsJ?ZF:k.K?hD]"F2;h4;jrMAM@g4fQ?k;56"G(2PGj^&V@YpQhqnZ,b)pNjidMU#[D]*^XK<:6X4ZVAB=@Cq*^rn4a=D+*OUr8"+=5/#26D;?ddjgDG/c,147ml2KT=T(c_&r2Y2J>S?qP1NDP4OGFk'29Z5d3M%cPAoDh\c`H@!#HR!U&~>endstream
|
||||||
|
endobj
|
||||||
|
20 0 obj
|
||||||
|
<<
|
||||||
|
/Filter [ /ASCII85Decode /FlateDecode ] /Length 1610
|
||||||
|
>>
|
||||||
|
stream
|
||||||
|
Gat=*968iG&AJ$Cln(tJaeIY;c-`=]3_AX,b,'4k+M":UK)c:0P1a4">u77:[Zl_@1Ro$XmOn3[/0a<*0+-%$!-l8/lX(ilqQS$`)Kpn?p^A5[(]Rf0S"5`l9ST>1FF#a>05,oDG=TPJO'^K:Jg*U":^U,t^ck0H&9,eN/oPU4PTCKF=bL#Bd('4cIg_/>=T$,%rhSF[b5U<IAa`SRWkK:4og?$GB^q(7U'@kq.hmdt0:31G'#A$kSFjhPn'743m7KBB)NTpuo=W5^oYiS\0&0V$5h3]F/eNb3iNBIc&]/!+*Jh!h@3'Rp0eGb$;R@@n)"WoRM=Ar,Lb#GeXJK:@n+btU&+0dhHpgtQcZJEuY,'@B!u*(:g/AMCR7Sl`.?mI"bh/Wr&M'&P$gVAucp:^sKTcZfIGLa*&5%ijFb8DdmCoXjfi=N&O5`k_'*m)fScZGPcd@"Cd_FHc!XbViNX[<NaspH?/\0AJ<D?BSD&(.@VN<k##2gh;jnL(?<9&;Z#D`RK_eiPn="_AXdfI)D!$@7k-NT9G^/)rBMh(Qd/=7urTjH8'pEFm9%kF?R#03,TD1oCeT[Ot@rphQHU.AlbJSSa;n))PC'5!h<=aJdUlp2s)KkJ*X[M`sU>mBq";f\`^Jrj_A)dtXs;iFg4'rVH@-Bi_5EnEISS2UU&NHldA(u$AuTLU+F_(M5_D7n(N"Ef:KKo)cu;Of9%Q!C"0/Y9qSGB4+DdId=1MhWlo0_Z?*m[&r\r$;X6MYi#H-SfQVK+`if:C/Mi`(Y0)b*5::I%mMIm-h`[7"r)0ABMs@'T/@7[O)T_TG'sOM5#Gj1<<[JE_B+mI:*qiQCDm0c)(IRQE];O'Xf.j$'*A(W8t:E)bj(jG;OP%H1)1-<K`>jQA+r?Z@SqY9Y?OcEnif%h4CF5;,o#m-(Tu$IV*Y)4^J(VN$;;-s(8p*bd"Tp+Z`J_PjOmG;A8Y+q6TStbFtaBC>Z.8i&qrd\fl%#l'Wb?M\JQgNMDV4.5+?%F-3+7W_,'$c'Q72rC.e4mp,aF209Ucrb:diP?3dP6'k\@>l2G$6HfCto<G[ba6*/+8\N6FP(o1aorOhV8c5EQ?4qGa+_GA3gFt_.`h-."V5;LsAW`(Zd(YJjW8GF/%VL[j2<36?2g&77TZjhk=_<mRCM(lk&;Z0;jCr/#1'mtpo!4pT'NO[8PI67q>)P]ogW=Sfq6s:&r_ILMDdEXKgDV/R*cm6b3"/Y^agaK4:&BE?-76iNlJ<uM*)a[oF:tSPpPnVE;R`G(a&'tu-lAqEOuu<;7808F($SuBo2HfGBO<#*e>mK@p!<<8Vr=1J(j8H.8r@Rtd#^0qWVk<cmk:YQfDON'<!(!Tl3R@F\j7Lg<8j:Ace!j)]2&ks]*hAL"'_Z0`q7JAA+XO^\,H#7O(psK1#F*e,QS?eR@M5D3pEichJ;S]__0.HTph*@LPieO8YhYuqel*0hESM1GuG`BI_^27k0AEeZ_cTZ8=-k3o1t&VOJ,Ieoo/qp:!bb1".'WXPAK-fB`8Xm]G[j-]2Z_Gd]"Ab*%@B0^r)SrUk/2`g)Q'u:tq9E,^$go1'u\lHl"9@[;=!NGeUf-I1M$irrFJGr6Y~>endstream
|
||||||
|
endobj
|
||||||
|
21 0 obj
|
||||||
|
<<
|
||||||
|
/Filter [ /ASCII85Decode /FlateDecode ] /Length 1572
|
||||||
|
>>
|
||||||
|
stream
|
||||||
|
Gat=*gJ[&k&:N^lqIu;LJkSij>i("!/Z9S2Z6W-2"##P5,T:L@/'3@dfC*E6EL`-+(6p?t>?5+Vl-nGp[IHoL?^VR5NTfu+#pgrURS_FLF_UK-^5`^&4\1lGSt=>D\(<Z&4$UUl*:W8pYM3jQKps.!6;ebVR2cE,.-X6SB5I!TW+>.7=O<o=b55G3n;0V!C%MYBqA[1DG:/q5GO9g=&:$>u3f/kL4UE#VUTL<FkuT[k7O`OL1Zd-TT(3'\eY/odU9X2;-#4pQ'G\p=Fc'g?7(2U=?b@7rZAs1qW8HLPmWC:UZ^obp/c%Y4:1W$=R=Y%)FGc)mKIEZk'o,MX.$S\BJu(2n.*$lpoW6&[P\DRu7V8LKB"CUqE3q%C:c_m8,J$rCih%nRN93.7ICQ+IO.C5nH+:I!?HY3D\Ios'!,i"8%2G:mMB3:H7\df(p7TXJ3iNtQ,P!`oUmqNG1\f''b2XjO*]T_.9=CLMZR.)a*`&jgR>bc!AgB0lqo9b"OMe&<\;>QVqF.6gX'C<-1'CNGWUh<lhIHBf<]]P0B388gf::KQ'(Z88B3H.J&Gap14YDD?Kc5QPkjS7lHg,I@I['hq(>T:-;fdGlrKE9Vr?sIS_AMT4#H$Z&kMS>3?oT_\$sI36cYuGH`g7'Dk%m&K;/*Zs\FQ[$i6CKR)j"J0!mH&>:<<J?9PU3ANdpV`O:l;b[+I.<8.#Ns%IZm6P^Hq"^Bs^(FqQNQC8QsAF&b<LVSKmUaE1Tc,h(tf.d;9D\l%akgBD?WH8n0`+LHusQ>Uj6f(a8@d?9DtX/p&[N)aJfe&K"*r:S?2p[Ql$-h$f(r_EI\=G%eG-KTRCE3)&a7Y@KjF5_tl>8F*CAX8K7@[nnD@YZ3q&/CkCbQ5-BX#fAUW)EhZJocT)[?1s)A2((M"GolUQ])[nP,T!s>?]0_W#!M[\@!f$-VXp,3Z#VZOS4jNO=&54\-'h[^GVT5eEO3dU<=2:fn<?qq^oOd:B.>c;+2+gO&O^-EjHQYWe/Tc-Y$#7g1pn!Rl]S2rP)4/c=Z@ORMJO^Y\`eE[<d=FH&Z8b5eo.6^FUflKdu1]"5f*0A]-FF[%Z5$g.d<rMt*OWbT[jM_-1aH4&tP>V5^[X8S[_]>M];S7nN!SkR/3g^`ar5A-ktZ/th?2n&m[d*fS;sZ>.Wb8O+AK'b[QnNHfhU[]GIiR&=>gc*i^7OM[aE`Hr9^BNDe\Q:G*6*#nD!DLAYu<)qBs-3C"=Mj7b]N*lr49*\-GOer\k?anWmn996BHf=G-5;m\g5eRrhk.+)A3_uN;3ika"XEZl*mLV=7G76P'!d"D3e!jchp3+Joo)>MPFEb`MUB1$CXMk>h*;5Po34OjWHFSH2VJ/2_RWZDu8emc57MhT7KYjh+RO=1>.\`g/7jSCV7bFQA=ZD:kkfogXD=?<F3iD-_9cHIuX-pq$A$\C>Q>6VhEaCX4g1V1Z"h,AN9-RH`eiblG*EEt:cca-VFH@7RKBLKQ48lj8fQjn#s6iWCO\rJ_[G;<nKN@YJ/%#[KjQA=b%,Mf/(p@sbCNHgtCbGfN1[[(=+!(7a'TqHE~>endstream
|
||||||
|
endobj
|
||||||
|
22 0 obj
|
||||||
|
<<
|
||||||
|
/Filter [ /ASCII85Decode /FlateDecode ] /Length 1697
|
||||||
|
>>
|
||||||
|
stream
|
||||||
|
GauHKgN)%,&:O:Slq:u]1dVK)PrCXA)Q1<fQ85#VjFnp(M$Za-!,5Ljq=l"-+sj9&-/3.9Ysa2%4_GSCLB&\0+5VCc3RMbTHMsEK[qRrJmP:-KkB&aKqL6ALQ9e&ml`i,`L7!MsAsdkHB44&"O5PS5:>mOT6^tUC3"1eYj7d77kbO$?\P>#Al9(-Wqur(pdeKX>]>eIeaG2D>\K-k%4);(EZhVo1[.t(:"m,tHfp9r8Ns7jLJgN-*`HMF--T6(j+1:jd.A$G*.=`c]#,1@)SfN<=kFp(Ei9qil].Hs/$[ug]GEK`hB3(3PHas8pM7#A84S"4R]>rNGPblp#cGc?qf!;etcT,W52o2:smkAj3`nf58P>JM4Wb<m5XKBLLA]]:$Ef4?@>i,8POA9H<;Z1VU%_22n`@eS"j.Y)MFSH>%04_uG^MbpoQgKN00;l(c&4p'gCFm+aY`H_C.NeAI=B[`D:(l=r0mSc3\\)8o_$BCG&jqn;\"%'t0_.43>*Fa:VMRLrs6F^UDLTXNIG5ih>ElYCB[dGpX&83!TXD)jSo8\#-L->*h%$2o\m\jQ_ruhm(tX[SDL&*_NW8*OkF]APWR'_Sic=kYH:'N^;SKc+Mp4cCo*%h:NVHhX.P7N>;H;qE<#.Pa%%pqjCk,^$i1($XFj(_g7@=ZA)1Q/f.*m3Jr8:D=LWt0n*Ym-Bc2NIs3k75J+'jkd@];&=<I0W25Nk3]KRSR'4$bM5GK?jK9K3F7$kF:,(0dU0%l't<X_Rq0@Q;Y>N:##AiB]_AUXA8R&\YsUI/0oea#Y=YG;mln-7G1:TL@kHd$9J<<7"UeKZY_BL9+;p(&5mJ85uT;Y0n.&[rk-G8<\e<X!j`6$/A7NC<7NpH)]k11QOCIH'B)[.Uo(YD'nlZjpm%,,S6et,IG\_9H-qr^Cd,6+KNm"PaW?RKb$L^5IEsu6nEB^Sj]IMq?t9]$Xt2S:RjAADm(cG:+tLqN6eXZ8,Z8Uoc,BLd9B#&-:/:[2f"`1Uc/l3@MM<oVjU5$1KI(`-^$M[@F\qT`[ImrTQDhWa6P^&52sk]oJ^k!b]SWILtPkcYD!IdmV++RM:BSne`D7Bnp>)DqV;*QTc=d'5)fIF4'89u'](X=I\j@pcKYP<,F">uK`kPI77EB5e9Z\Jr@p@l!U>L$^n`Sle':GLMM0t_6q&>QGhJh$D^18<U-E;Q%$=QuH=1PiIp%3`N^\"'66uCBX?*W1cl1UdmG,(7<)j(/KiUssBrr9g+^$^4@Pei]/C@\aIP=c!UY]Q/kGBEq6>T:@1ceNrS9,kq`oBi>&d:D9$U$G"Ce:T4\!/qUdQ@!!M:!a8`'ec%lR\`6;2>O1S1'e(NX.]T#To^P!]k=V\4'XQ1r1[lK`We,N8_%`?PLfpe:Sl$lW[(&)\rDQct")"Q$kpr6MVI$[QX(>BS2R"7nI/f3YNnJV)R\[e4mOr]l^K.osZHUc,2o:DCDa,aAdmF9SL3PA25p"0IS0"^-J0l9)m^?$B=tj*3F=.4>4Z%<bY_(=(f-h"3D)+0gAa2q3/;2O4up?DtD'?-leT$V-IcVXfToV.Y[0HDD?<j8osab"3:rZ-V<lj_jp-p<'k\QEGb1/kCCV:#sVg]rfd&#Md:D^s+EOAXc8^\*K:?&JjLFPVA)N'W!hH3P_hu^Lruj8.9!?!Hlp2O?4jh6U8*2<-5Y0_~>endstream
|
||||||
|
endobj
|
||||||
|
23 0 obj
|
||||||
|
<<
|
||||||
|
/Filter [ /ASCII85Decode /FlateDecode ] /Length 1467
|
||||||
|
>>
|
||||||
|
stream
|
||||||
|
GauHKgN)%,&:O:Sls$$@9aeUi;Voj?C#/R4=Nm841GB,,E(GK_5V("<j1>:;g"+*7/@ljI1_rCD*\>SX*"WtFLUcfc!r+@"PE,i;#h]n_*5mr0_eF;`cN.1^R>rCa82(sA7lUSU#&Z]N%WF&RKYmd)L5LKi>c?!R3fF0>C&XCC=E(17GQZV>AA?h$TCMM08X/S1KKMtL:;s<N'Q;sVajh^8JBnZV\QQcpQO9@^_;GGDr.&i@R!oF<aGY]/q].Z#*%d<b`X^)*5!UNgfFlK:DD7&/a+/9oq"\*38=IEKAFei(!["V[>^l2))%Mku4N$=q?/7;*bOPq_S85o)$<ib.Gd=<Qi+A"q.gAs-WaR@ln>]O[SlJIO!4"V;MK/a.'KK)YgDAJO%l&k%(oF#/6eWDC70+.TRYr%_bg:q[g4h=5T*q7>'!sq5OO#6!R0s:c/24T)]SX=0AU1AH\sCLCiWsE@"+i7dNm*"nB2+j.ed)hY;6gVC-&oOGNl981oU6\''p@!CnechBZG;&L!gdRDX9%=<MF+SV"EM).BRdSPXGVUV@``4T^0*6-^1_O'%C`+:SCd`dKjbP\bP5k=b6g21`UB`'E%pWPjIuPFl2.N>Mpoi[n$9:#bDA/X1627-M?9.^/2U?1s3<M*&P[DkPc]*t%GVr'p#;Zb)tg]#I`2J>2`6nSl'jV<R-AhtF*f<,G?r%NkU$=.kaMm?c;QpE*q^^B54;RO8$YeVkp0?C2Vm@c;D/ra@f.=+Dh9P,b4-D0)^>N5j?X,Z8ef6+jAO6eiuG)^K8.\H4VOd<mLa>YUKRs9e2.^,qGUp=&e+f$L6%O<B;/IBpc)%GUr$l]a45,k6*0W<SR!6K&s"e+B/k.^n6A?#l_bKlj,1\_Y3dC6l&Lk73d_bEXm;2)*l1<qoAu$pL&9E^TtdpFc$&LCB]O%"..)2'I2b:8f'VWG?e4-I;Q`M2'#;K/j"R=6bNF-.PkM"<6u63<[a6<V#!&ZP>O?ULG5/EVmX03tiC18cVd:T1X6R"`A8!JiL:3d:mq:/@,c;u]_egjoYH7o&H7<m=F'>:,ip>^9?Qr$<5ND\T5mmA[hT(8!6qK4/+^;#\B27OrAj,pJ$0THtd(3GVd-[Od(X<mnAQd00?CTnh7W[?HC.T\X'tL?1_X==$1Fi0D=1%W]"th._a@f"AOULr_q$m>X>4%Ua#bfYI#iH6(@-Ea>4b5'UMZtJ=[=&Pc]DsqbCn0dF75iK@6gWbei3f^r1>!:dHRKm$]%($MR^VKRQ/PgM]p$Zp,i"ScqoNXkO*kof3839<mjPN`b<?\gB:1'u83QO^gG4-ADHi!&.O4CS8_nBIpIVaX?6Dg`;>ic:'u_siqEcH)\$^Su]d..<a2>VZ01eB4SiecIm:FM-Oln7*FJ<es3+HR[*rgr+J9Ng!,]3%6Cp2K:a\cr9D$[d6$Oh\cI#,h]!`"NQpqX_\Zc;02~>endstream
|
||||||
|
endobj
|
||||||
|
24 0 obj
|
||||||
|
<<
|
||||||
|
/Filter [ /ASCII85Decode /FlateDecode ] /Length 1179
|
||||||
|
>>
|
||||||
|
stream
|
||||||
|
GauHK9lo&I'YO<E]VC[Y;?Xb&><\;&;d5hqb,'4k&.)!#8V.,]Nn)74cNE:5%dSWZl"2as%07'Qf_;UT&odA.g@)*GGdDt?HNM-<boMn]$".f\bK\^IBB8$DE<:,NS0UZ>]E9P:G!bWBn6XNpLC9?VPRk]LQh&?ekD9;JXr#hZlk[@:U=oLMW9K=&>2?rDbpV/V1ghEpo.?UWNWg]c!aa;if-%p\fGnY7c6TSNI"i.@/\"![3YN.h@`Md4D4fdM=%p;Z0FFn'#i77##8K94nfVfF\7P^YrQ5UhTi?Y(<Xb<'@Q_?c>8"tqZ!MobY2T?Pa437%:6_PqE/4TH!DH*/@8Er:i7/>*n:I"*3Y[2.m0MfB,FPhmM2,*=0_$-m.-lJMXO9p<;)A$`CFbi'Viih>aKX^#1t;\e_SkuAf(k&3U-paseQc)I@Rku.#\;Wbc1:8pe^\^5me,`%HF:1Kq<f6$'k=d06;-im/%$CVKH405.STfG4F/'3*^,0c=nO3;#dLY3D0_RsOOUbf_6+.EBD2\:TnY7m_`^?QVAVA0,PQG)9)7([b]53n)&CX%mQ9o!cbF61NA;ic1rF6lUBeRJ?&K&Hfk:P'l9mkR21iag"L9!/03%)%aB9"Ah.tde8ri>=pJce!Ml+R(b5eH-XK^gdZ.C4VaW*LB\6';3*E7O2&&tp=)%.4RFVZQDSuGT<&mu)Bg[[1:n[ue0a2caMb"6ZV`Q,-NQGlV*(-`i\17uDi*Ot+/4i9'SJ0,8ZC+&QoS*?*aM+iN[9_^0aid9XS.!Ea)p5)!=$=>4<ibj!Y[(Y(SB%gq/B@MkWH1-DOqZ:)<!K,E,U<<KS\Q*Q$9fd1jWAkeh]C.KlRJ0&GW>:J&5A<EKFp;VJmR2mYk*O/X?Td\laf0N\a`q,O-`)h])39_OU#1-K55k2mEd[])W%qeY_WN?MampcgdRqM"(=o'iIP/R)(/QoXD@:W0'Y$:Z=XiE?h-RLL_\GAuR8gXKn0FC!Hq(@YJ[^=fN01*:po?/NTk=^JJ!+>%Cd*b/OXQY3peJ(?1*SuI^m]($?TKN*$<RGt"0l^-IOIo^.m0Bi)ljT?@hc_!h-d;=07LgR@Bm%)VP/rW's,S&kEo0TSfK\pm_aspIIsGmE3,LV$hGED==L\k(edOnC;AimLRD8s@fp@N!3uorXT~>endstream
|
||||||
|
endobj
|
||||||
|
25 0 obj
|
||||||
|
<<
|
||||||
|
/Filter [ /ASCII85Decode /FlateDecode ] /Length 1292
|
||||||
|
>>
|
||||||
|
stream
|
||||||
|
GauI7>Ar7U&;B$5/*<ti,$%BCCo)'\\>[e7Bk$aPJH[9XAYq5^P#PTerUCoND@gHi<,/6;#74pHcJe9^j!Y=joO7.UHj+JX_9k=K/;.g?/.Q.1/msh_HoYTh/&s)tP:.9Fa!oh1-\h!p>oqA+m,;q880M3b_a6>#T/C^t>eoY9\;$t;%@XbU6$<Ld)u'8GqWqrbj%bSQUHj!XX[LeKr6<5kR*Ole-?FEUp$)&&Y7i72FJ8bI_AFFh5cp<+-'q'^%Ms5+X4::(g43IL$\abI/=<<;Q+!pV2IFb6&DV&>aLY"bs4Uhg7*&AZ+:S46\M+<MOPJrW_#fi(%aKNWT[D7])K@8W@FkOm='tJ=ocXSK(!f+6-UU#oC&E*>:KsO"g^5><>62@=&I`$>?%Z80>2>sc_?@U#Nm]TepC5[_k%[='7I.g_Y0gq4.HHoiS&s@6Cc8gd5KTd(QZKo@').NG"#t@c;P9o.I!3W#?(F_D-NBUm9MRd!]UE/=+QOR*QJ^+9deqHS01=LWp@qs5T^(.kLq^=mc$I&m`t)LKSmpaC%O9[J#,=%B1IKQ1o7(:!%2B@j.8ZjAN@Y-H^3NH#'%jC<(L3780C^W)PfA!O7;_!F>W:FA*9Of[FH/>%7(7T"$R#gK&2TrJKH_?7@J2"3c7Y*C?sc7Jm%Heo]Mr)^gq&p7>+fjAguX4@68\$]Vh]2$@)_S*b[B:@2lhsZW20O_YY3WDT=WEPX_AfKq+3#A[9O-KK\XS2(lcO4](M'oJE(ZE$FC5D\47[YE&UH7W2?t(2qCX0KX"qWIo%^\:-+)8Lh^oJooTS';6=PVca3EeXQIsX^:Bu4)N1,oVZg&0YX_aERgg+7V-@]amP7Nnm56mr+&"j]'p"sPs!c7Q*Lq*uBICi0:hnC7ZC'(S?e+j;fkBSl6b,nj0ZkSsA=(;/TIcg"p<\X;TkpWZbIP:KD<p9V:>kr77Q:`'l#efMY,oZ<'#7(9r0sdjYGtQ)Ftbf=e"6RLDk_\D3Xt[Df>YOF\=aI98oM^_m(1&Ndqk>MW<ui(4)Ku/<POQCe4/C/@@V:0W_Gq9==cnO*1VMK66T$.n[7m*U>c[_)ae&&51f+!$mdtP>#^CGa`;p^[a4A,;)f'[XO;PGMGgVsMX92Zs"dLd7aLL1H_Dj`r:SDSrF5</fPTKC]]-$)O<3qCbJ'YB:TpT1pLpRShUlgl]D3XU@cOgk?i)p5&F7rJ5CU5>rC->5[f8tP/7L#)DR&63066?9XE#u\=EEjVW3Pa%3\22;GATr'@1QDB&)c@N.11I*~>endstream
|
||||||
|
endobj
|
||||||
|
xref
|
||||||
|
0 26
|
||||||
|
0000000000 65535 f
|
||||||
|
0000000073 00000 n
|
||||||
|
0000000124 00000 n
|
||||||
|
0000000231 00000 n
|
||||||
|
0000000343 00000 n
|
||||||
|
0000000548 00000 n
|
||||||
|
0000000753 00000 n
|
||||||
|
0000000958 00000 n
|
||||||
|
0000001163 00000 n
|
||||||
|
0000001368 00000 n
|
||||||
|
0000001487 00000 n
|
||||||
|
0000001693 00000 n
|
||||||
|
0000001899 00000 n
|
||||||
|
0000002105 00000 n
|
||||||
|
0000002311 00000 n
|
||||||
|
0000002381 00000 n
|
||||||
|
0000002665 00000 n
|
||||||
|
0000002777 00000 n
|
||||||
|
0000003235 00000 n
|
||||||
|
0000005366 00000 n
|
||||||
|
0000006221 00000 n
|
||||||
|
0000007923 00000 n
|
||||||
|
0000009587 00000 n
|
||||||
|
0000011376 00000 n
|
||||||
|
0000012935 00000 n
|
||||||
|
0000014206 00000 n
|
||||||
|
trailer
|
||||||
|
<<
|
||||||
|
/ID
|
||||||
|
[<18e7918b3296693e83634aaf57fa33ad><18e7918b3296693e83634aaf57fa33ad>]
|
||||||
|
% ReportLab generated PDF document -- digest (http://www.reportlab.com)
|
||||||
|
|
||||||
|
/Info 15 0 R
|
||||||
|
/Root 14 0 R
|
||||||
|
/Size 26
|
||||||
|
>>
|
||||||
|
startxref
|
||||||
|
15590
|
||||||
|
%%EOF
|
||||||
363
services/ai-analysis-service/ai-analysis/analysis_report.md
Normal file
363
services/ai-analysis-service/ai-analysis/analysis_report.md
Normal file
@ -0,0 +1,363 @@
|
|||||||
|
|
||||||
|
# GitHub Repository Analysis Report
|
||||||
|
|
||||||
|
**Repository:** https://github.com/TejasTeju-dev/AI-Blog
|
||||||
|
**Analysis Date:** 2025-09-19 11:09:14
|
||||||
|
**Analyzed by:** Claude AI Assistant
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Executive Summary
|
||||||
|
|
||||||
|
Let me provide a comprehensive analysis:
|
||||||
|
|
||||||
|
1. **Project Type & Purpose**:
|
||||||
|
This appears to be a modern web application built with Next.js, likely a blog or content platform with articles and topics sections. The extensive UI component library suggests it's a full-featured web application with a sophisticated user interface.
|
||||||
|
|
||||||
|
2. **Technology Stack**:
|
||||||
|
- Frontend Framework: Next.js (React)
|
||||||
|
- Language: TypeScript
|
||||||
|
- Styling: Tailwind CSS
|
||||||
|
- Package Manager: pnpm
|
||||||
|
- UI Components: Extensive component library (possibly using shadcn/ui)
|
||||||
|
- State Management: Custom hooks
|
||||||
|
- Animations: Multiple background animation components
|
||||||
|
|
||||||
|
3. **Architecture Overview**:
|
||||||
|
The project follows Next.js 13+ App Router structure:
|
||||||
|
```
|
||||||
|
app/ # Main application routes
|
||||||
|
components/ # Reusable UI components
|
||||||
|
hooks/ # Custom React hooks
|
||||||
|
lib/ # Utility functions
|
||||||
|
public/ # Static assets
|
||||||
|
styles/ # Global styles
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **Key Components**:
|
||||||
|
- **UI Components**: Comprehensive set of 40+ UI components including:
|
||||||
|
- Basic elements (Button, Input, Form)
|
||||||
|
- Navigation (Navbar, Menu, Breadcrumb)
|
||||||
|
- Feedback (Toast, Alert, Dialog)
|
||||||
|
- Data display (Table, Chart, Card)
|
||||||
|
- Layout (Grid, Sidebar)
|
||||||
|
- **Background Components**:
|
||||||
|
- AnimatedGrid
|
||||||
|
- FloatingElements
|
||||||
|
- ParticleField
|
||||||
|
- 3DBackground
|
||||||
|
- **Core Pages**:
|
||||||
|
- Home (page.tsx)
|
||||||
|
- Articles
|
||||||
|
- Blog
|
||||||
|
- Topics
|
||||||
|
- About
|
||||||
|
|
||||||
|
5. **Development Setup**:
|
||||||
|
Required setup likely includes:
|
||||||
|
```bash
|
||||||
|
# Install dependencies
|
||||||
|
pnpm install
|
||||||
|
|
||||||
|
# Development server
|
||||||
|
pnpm dev
|
||||||
|
|
||||||
|
# Build
|
||||||
|
pnpm build
|
||||||
|
```
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
- Node.js
|
||||||
|
- pnpm
|
||||||
|
- TypeScript knowledge
|
||||||
|
- Understanding of Next.js and React
|
||||||
|
|
||||||
|
6. **Code Quality Assessment**:
|
||||||
|
Strengths:
|
||||||
|
- Well-organized directory structure
|
||||||
|
- Consistent use of TypeScript
|
||||||
|
- Modular component architecture
|
||||||
|
- Separation of concerns (UI components, hooks, pages)
|
||||||
|
- Comprehensive UI component library
|
||||||
|
- Modern development practices (App Router, TypeScript)
|
||||||
|
|
||||||
|
Areas for consideration:
|
||||||
|
- Large number of UI components might indicate need for documentation
|
||||||
|
- Multiple background components might need performance optimization
|
||||||
|
- Could benefit from API documentation
|
||||||
|
- Might need testing infrastructure (not visible in structure)
|
||||||
|
|
||||||
|
Additional Observations:
|
||||||
|
- The project uses modern React patterns (hooks)
|
||||||
|
- Strong focus on UI/UX with multiple animation options
|
||||||
|
- Built with scalability in mind (modular structure)
|
||||||
|
- Follows Next.js best practices
|
||||||
|
- Uses modern tooling (pnpm, TypeScript, Tailwind)
|
||||||
|
|
||||||
|
This appears to be a well-structured, modern web application with a strong focus on UI components and user experience. The architecture suggests it's built for scalability and maintainability.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Detailed Code Analysis
|
||||||
|
|
||||||
|
I'll analyze each aspect of this Next.js project:
|
||||||
|
|
||||||
|
1. **Code Quality**
|
||||||
|
- Strong TypeScript usage with proper type definitions and configurations
|
||||||
|
- Consistent code formatting and organization following Next.js 13+ conventions
|
||||||
|
- Clean project structure with clear separation of concerns
|
||||||
|
- Good use of modern React patterns and Next.js features
|
||||||
|
- Well-structured configuration files (next.config.js, tailwind.config.js, etc.)
|
||||||
|
- Follows React best practices with components organization
|
||||||
|
|
||||||
|
2. **Design Patterns**
|
||||||
|
- Component-based architecture following React principles
|
||||||
|
- Server-side rendering approach using Next.js App Router
|
||||||
|
- Atomic design pattern evident in UI components organization
|
||||||
|
- Utility-first CSS approach with Tailwind
|
||||||
|
- Singleton pattern for configuration management
|
||||||
|
- Dependency injection through React context (seen in theme implementation)
|
||||||
|
|
||||||
|
3. **Key Dependencies**
|
||||||
|
- Core: Next.js 14.2, React 19, TypeScript
|
||||||
|
- UI: Radix UI components, Tailwind CSS, shadcn/ui
|
||||||
|
- 3D: Three.js, React Three Fiber
|
||||||
|
- Forms: React Hook Form, Zod validation
|
||||||
|
- Utilities: clsx, tailwind-merge
|
||||||
|
- Development: PostCSS, TypeScript, ESLint
|
||||||
|
|
||||||
|
4. **Potential Issues**
|
||||||
|
- Build errors being ignored (typescript.ignoreBuildErrors, eslint.ignoreDuringBuilds)
|
||||||
|
- Unoptimized images configuration could impact performance
|
||||||
|
- Missing error boundaries and proper error handling
|
||||||
|
- Security considerations for client-side rendering of 3D content
|
||||||
|
- No explicit API rate limiting or security headers
|
||||||
|
- Missing proper environment variable handling
|
||||||
|
|
||||||
|
5. **Testing Strategy**
|
||||||
|
- No visible testing setup (Jest, React Testing Library, etc.)
|
||||||
|
- Missing unit tests, integration tests, and e2e tests
|
||||||
|
- Should add testing framework and implement test coverage
|
||||||
|
- Consider adding Cypress or Playwright for e2e testing
|
||||||
|
|
||||||
|
6. **Documentation**
|
||||||
|
- Good README with clear project structure and setup instructions
|
||||||
|
- Missing JSDoc comments for components and functions
|
||||||
|
- Could benefit from more inline documentation
|
||||||
|
- API documentation could be improved
|
||||||
|
- Missing contribution guidelines and deployment docs
|
||||||
|
|
||||||
|
7. **Maintainability**
|
||||||
|
Strengths:
|
||||||
|
- Clear project structure
|
||||||
|
- Modern tooling and frameworks
|
||||||
|
- Type safety with TypeScript
|
||||||
|
- Component modularity
|
||||||
|
- Consistent coding style
|
||||||
|
|
||||||
|
Areas for Improvement:
|
||||||
|
- Add comprehensive testing
|
||||||
|
- Improve error handling
|
||||||
|
- Better documentation
|
||||||
|
- Implement proper CI/CD
|
||||||
|
- Add proper logging system
|
||||||
|
- Consider performance monitoring
|
||||||
|
|
||||||
|
Additional Recommendations:
|
||||||
|
|
||||||
|
1. Security:
|
||||||
|
```typescript
|
||||||
|
// Add security headers
|
||||||
|
const securityHeaders = [
|
||||||
|
{ key: 'X-XSS-Protection', value: '1; mode=block' },
|
||||||
|
{ key: 'X-Frame-Options', value: 'SAMEORIGIN' },
|
||||||
|
{ key: 'X-Content-Type-Options', value: 'nosniff' },
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Error Handling:
|
||||||
|
```typescript
|
||||||
|
// Add error boundary component
|
||||||
|
class ErrorBoundary extends React.Component {
|
||||||
|
static getDerivedStateFromError(error) {
|
||||||
|
return { hasError: true };
|
||||||
|
}
|
||||||
|
|
||||||
|
componentDidCatch(error, errorInfo) {
|
||||||
|
// Log error to service
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Testing Setup:
|
||||||
|
```json
|
||||||
|
// Add to package.json
|
||||||
|
{
|
||||||
|
"jest": {
|
||||||
|
"setupFilesAfterEnv": ["<rootDir>/jest.setup.js"],
|
||||||
|
"testEnvironment": "jsdom"
|
||||||
|
},
|
||||||
|
"scripts": {
|
||||||
|
"test": "jest",
|
||||||
|
"test:watch": "jest --watch",
|
||||||
|
"test:coverage": "jest --coverage"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Performance Monitoring:
|
||||||
|
```typescript
|
||||||
|
// Add performance monitoring
|
||||||
|
export function reportWebVitals(metric) {
|
||||||
|
if (metric.label === 'web-vital') {
|
||||||
|
console.log(metric); // Send to analytics
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The project has a solid foundation but would benefit from these improvements for production readiness.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Security & Best Practices Analysis
|
||||||
|
|
||||||
|
I'll analyze the repository based on the provided files and structure:
|
||||||
|
|
||||||
|
1. **Security Issues**:
|
||||||
|
- ⚠️ ESLint and TypeScript build errors are being ignored (`ignoreDuringBuilds: true` and `ignoreBuildErrors: true`), which could mask security-related issues
|
||||||
|
- ⚠️ Image optimization is disabled (`unoptimized: true`), which could lead to performance and security concerns
|
||||||
|
- ✅ Remote image patterns are properly restricted to specific domains (unsplash.com)
|
||||||
|
- ⚠️ No explicit CSP (Content Security Policy) configuration visible
|
||||||
|
|
||||||
|
2. **Secret Management**:
|
||||||
|
- ✅ Uses environment variables (process.env)
|
||||||
|
- ⚠️ No visible secret management solution or environment validation
|
||||||
|
- 🔍 Recommend implementing a secret management solution (e.g., Vault, AWS Secrets Manager)
|
||||||
|
|
||||||
|
3. **Dependencies**:
|
||||||
|
- Cannot fully assess without package.json
|
||||||
|
- Using Next.js and Tailwind CSS which are generally well-maintained
|
||||||
|
- 🔍 Recommend implementing dependency scanning (e.g., Snyk, OWASP Dependency-Check)
|
||||||
|
|
||||||
|
4. **Best Practices**:
|
||||||
|
✅ Good:
|
||||||
|
- TypeScript implementation with strict mode enabled
|
||||||
|
- Proper module resolution and ES6 target
|
||||||
|
- Well-organized file structure
|
||||||
|
- Using modern module systems
|
||||||
|
- Proper tailwind configuration
|
||||||
|
|
||||||
|
⚠️ Concerns:
|
||||||
|
- Disabling TypeScript and ESLint checks in production
|
||||||
|
- Multiple next.config files (both .js and .mjs)
|
||||||
|
- No visible testing configuration
|
||||||
|
|
||||||
|
5. **Configuration**:
|
||||||
|
✅ Good:
|
||||||
|
- Environment-based configuration for basePath
|
||||||
|
- Proper TypeScript configuration
|
||||||
|
- Well-structured Tailwind configuration
|
||||||
|
|
||||||
|
⚠️ Concerns:
|
||||||
|
- Duplicate next.config files might cause confusion
|
||||||
|
- Some hardcoded values could be externalized
|
||||||
|
- No visible staging/production environment separation
|
||||||
|
|
||||||
|
6. **Error Handling**:
|
||||||
|
- Cannot fully assess without application code
|
||||||
|
- ⚠️ Disabling TypeScript and ESLint checks could mask error handling issues
|
||||||
|
- 🔍 Recommend implementing proper error boundaries and logging
|
||||||
|
|
||||||
|
7. **Recommendations**:
|
||||||
|
|
||||||
|
Security:
|
||||||
|
```typescript
|
||||||
|
// Enable TypeScript and ESLint checks
|
||||||
|
const nextConfig = {
|
||||||
|
eslint: {
|
||||||
|
ignoreDuringBuilds: false,
|
||||||
|
},
|
||||||
|
typescript: {
|
||||||
|
ignoreBuildErrors: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Configuration:
|
||||||
|
```javascript
|
||||||
|
// Consolidate next.config files
|
||||||
|
// Add proper environment validation
|
||||||
|
const validateEnv = () => {
|
||||||
|
const required = ['API_KEY', 'DATABASE_URL'];
|
||||||
|
required.forEach(key => {
|
||||||
|
if (!process.env[key]) throw new Error(`Missing ${key}`);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Best Practices:
|
||||||
|
1. Implement proper CSP:
|
||||||
|
```javascript
|
||||||
|
// next.config.js
|
||||||
|
{
|
||||||
|
async headers() {
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
source: '/:path*',
|
||||||
|
headers: [
|
||||||
|
{
|
||||||
|
key: 'Content-Security-Policy',
|
||||||
|
value: "default-src 'self';"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Enable image optimization:
|
||||||
|
```javascript
|
||||||
|
images: {
|
||||||
|
unoptimized: false,
|
||||||
|
domains: ['images.unsplash.com'],
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Additional Recommendations:
|
||||||
|
1. Implement security headers
|
||||||
|
2. Add input validation
|
||||||
|
3. Set up proper error boundaries
|
||||||
|
4. Add proper testing configuration
|
||||||
|
5. Implement API rate limiting
|
||||||
|
6. Add security scanning in CI/CD
|
||||||
|
7. Implement proper logging
|
||||||
|
8. Add environment validation
|
||||||
|
9. Consider implementing authentication/authorization
|
||||||
|
10. Add proper CORS configuration
|
||||||
|
|
||||||
|
Environment Setup:
|
||||||
|
```bash
|
||||||
|
# .env.example
|
||||||
|
NODE_ENV=development
|
||||||
|
API_KEY=
|
||||||
|
DATABASE_URL=
|
||||||
|
```
|
||||||
|
|
||||||
|
This analysis is based on the configuration files provided. For a more comprehensive security assessment, access to the actual application code, API endpoints, and authentication mechanisms would be needed.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Recommendations Summary
|
||||||
|
|
||||||
|
Based on the analysis, here are the key recommendations for this repository:
|
||||||
|
|
||||||
|
1. **Immediate Actions**: Critical issues that should be addressed promptly
|
||||||
|
2. **Code Quality Improvements**: Suggestions for better maintainability
|
||||||
|
3. **Security Enhancements**: Steps to improve security posture
|
||||||
|
4. **Documentation**: Areas where documentation could be enhanced
|
||||||
|
5. **Architecture**: Potential architectural improvements
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
*This analysis was generated using AI and should be reviewed by human developers for accuracy and context.*
|
||||||
391
services/ai-analysis-service/ai-analysis/app.py
Normal file
391
services/ai-analysis-service/ai-analysis/app.py
Normal file
@ -0,0 +1,391 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
GitHub Repository AI Analysis Tool
|
||||||
|
Analyzes GitHub repositories using Claude API for comprehensive code insights.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import git
|
||||||
|
import json
|
||||||
|
import requests
|
||||||
|
import tempfile
|
||||||
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, List, Optional, Tuple
|
||||||
|
import argparse
|
||||||
|
from datetime import datetime
|
||||||
|
import mimetypes
|
||||||
|
import base64
|
||||||
|
|
||||||
|
class GitHubRepoAnalyzer:
|
||||||
|
def __init__(self, anthropic_api_key: str):
|
||||||
|
self.api_key = anthropic_api_key
|
||||||
|
self.api_url = "https://api.anthropic.com/v1/messages"
|
||||||
|
self.temp_dir = None
|
||||||
|
|
||||||
|
# File extensions to analyze
|
||||||
|
self.code_extensions = {
|
||||||
|
'.py', '.js', '.ts', '.jsx', '.tsx', '.java', '.cpp', '.c', '.h',
|
||||||
|
'.cs', '.php', '.rb', '.go', '.rs', '.swift', '.kt', '.scala',
|
||||||
|
'.html', '.css', '.scss', '.sass', '.less', '.vue', '.svelte',
|
||||||
|
'.sql', '.sh', '.bash', '.yml', '.yaml', '.json', '.xml',
|
||||||
|
'.dockerfile', '.md', '.rst', '.txt'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Files to always include in analysis
|
||||||
|
self.important_files = {
|
||||||
|
'README.md', 'readme.md', 'README.txt', 'readme.txt',
|
||||||
|
'package.json', 'requirements.txt', 'Cargo.toml', 'pom.xml',
|
||||||
|
'build.gradle', 'Makefile', 'dockerfile', 'Dockerfile',
|
||||||
|
'docker-compose.yml', '.gitignore', 'setup.py', 'pyproject.toml'
|
||||||
|
}
|
||||||
|
|
||||||
|
def clone_repository(self, repo_url: str) -> str:
|
||||||
|
"""Clone GitHub repository to temporary directory."""
|
||||||
|
print(f"Cloning repository: {repo_url}")
|
||||||
|
|
||||||
|
self.temp_dir = tempfile.mkdtemp(prefix="github_analysis_")
|
||||||
|
|
||||||
|
try:
|
||||||
|
git.Repo.clone_from(repo_url, self.temp_dir)
|
||||||
|
print(f"Repository cloned to: {self.temp_dir}")
|
||||||
|
return self.temp_dir
|
||||||
|
except git.exc.GitCommandError as e:
|
||||||
|
raise Exception(f"Failed to clone repository: {e}")
|
||||||
|
|
||||||
|
def get_file_info(self, file_path: Path) -> Dict:
|
||||||
|
"""Get file information and content."""
|
||||||
|
try:
|
||||||
|
# Check file size (skip files larger than 1MB)
|
||||||
|
if file_path.stat().st_size > 1024 * 1024:
|
||||||
|
return {
|
||||||
|
'path': str(file_path.relative_to(self.temp_dir)),
|
||||||
|
'size': file_path.stat().st_size,
|
||||||
|
'content': '[File too large to analyze]',
|
||||||
|
'encoding': 'skipped'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Try to read as text
|
||||||
|
try:
|
||||||
|
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||||||
|
content = f.read()
|
||||||
|
encoding = 'utf-8'
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
# If text fails, try binary for certain file types
|
||||||
|
with open(file_path, 'rb') as f:
|
||||||
|
raw_content = f.read()
|
||||||
|
if len(raw_content) < 10000: # Only encode small binary files
|
||||||
|
content = base64.b64encode(raw_content).decode('ascii')
|
||||||
|
encoding = 'base64'
|
||||||
|
else:
|
||||||
|
content = '[Binary file - content not included]'
|
||||||
|
encoding = 'binary'
|
||||||
|
|
||||||
|
return {
|
||||||
|
'path': str(file_path.relative_to(self.temp_dir)),
|
||||||
|
'size': file_path.stat().st_size,
|
||||||
|
'content': content,
|
||||||
|
'encoding': encoding,
|
||||||
|
'mime_type': mimetypes.guess_type(str(file_path))[0]
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
return {
|
||||||
|
'path': str(file_path.relative_to(self.temp_dir)),
|
||||||
|
'error': str(e),
|
||||||
|
'content': '[Error reading file]'
|
||||||
|
}
|
||||||
|
|
||||||
|
def scan_repository(self, max_files: int = 50) -> Dict:
|
||||||
|
"""Scan repository and collect file information."""
|
||||||
|
print("Scanning repository structure...")
|
||||||
|
|
||||||
|
repo_data = {
|
||||||
|
'structure': [],
|
||||||
|
'files': [],
|
||||||
|
'stats': {
|
||||||
|
'total_files': 0,
|
||||||
|
'analyzed_files': 0,
|
||||||
|
'total_size': 0,
|
||||||
|
'languages': {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Get directory structure
|
||||||
|
for root, dirs, files in os.walk(self.temp_dir):
|
||||||
|
# Skip hidden directories and common build/cache directories
|
||||||
|
dirs[:] = [d for d in dirs if not d.startswith('.') and
|
||||||
|
d not in {'node_modules', '__pycache__', 'build', 'dist', 'target', 'venv', 'env'}]
|
||||||
|
|
||||||
|
level = root.replace(self.temp_dir, '').count(os.sep)
|
||||||
|
indent = ' ' * level
|
||||||
|
folder_name = os.path.basename(root) if root != self.temp_dir else '.'
|
||||||
|
repo_data['structure'].append(f"{indent}{folder_name}/")
|
||||||
|
|
||||||
|
# Process files
|
||||||
|
for file in files:
|
||||||
|
if file.startswith('.'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
file_path = Path(root) / file
|
||||||
|
repo_data['stats']['total_files'] += 1
|
||||||
|
repo_data['stats']['total_size'] += file_path.stat().st_size
|
||||||
|
|
||||||
|
# Track languages
|
||||||
|
ext = file_path.suffix.lower()
|
||||||
|
if ext:
|
||||||
|
repo_data['stats']['languages'][ext] = repo_data['stats']['languages'].get(ext, 0) + 1
|
||||||
|
|
||||||
|
# Add to structure
|
||||||
|
repo_data['structure'].append(f"{indent} {file}")
|
||||||
|
|
||||||
|
# Decide if we should analyze this file
|
||||||
|
should_analyze = (
|
||||||
|
file.lower() in self.important_files or
|
||||||
|
ext in self.code_extensions or
|
||||||
|
repo_data['stats']['analyzed_files'] < max_files
|
||||||
|
)
|
||||||
|
|
||||||
|
if should_analyze and repo_data['stats']['analyzed_files'] < max_files:
|
||||||
|
file_info = self.get_file_info(file_path)
|
||||||
|
repo_data['files'].append(file_info)
|
||||||
|
repo_data['stats']['analyzed_files'] += 1
|
||||||
|
|
||||||
|
return repo_data
|
||||||
|
|
||||||
|
def call_claude_api(self, prompt: str, max_tokens: int = 4000) -> str:
|
||||||
|
"""Make API call to Claude."""
|
||||||
|
headers = {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"x-api-key": self.api_key,
|
||||||
|
"anthropic-version": "2023-06-01"
|
||||||
|
}
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"model": "claude-3-5-sonnet-20241022",
|
||||||
|
"max_tokens": max_tokens,
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": prompt}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(self.api_url, headers=headers, json=data)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
result = response.json()
|
||||||
|
return result['content'][0]['text']
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
raise Exception(f"API request failed: {e}")
|
||||||
|
|
||||||
|
def analyze_repository_overview(self, repo_data: Dict) -> str:
|
||||||
|
"""Get high-level repository analysis."""
|
||||||
|
print("Analyzing repository overview...")
|
||||||
|
|
||||||
|
structure_summary = "\n".join(repo_data['structure'][:100]) # Limit structure size
|
||||||
|
|
||||||
|
prompt = f"""
|
||||||
|
Analyze this GitHub repository and provide a comprehensive overview:
|
||||||
|
|
||||||
|
REPOSITORY STRUCTURE:
|
||||||
|
{structure_summary}
|
||||||
|
|
||||||
|
STATISTICS:
|
||||||
|
- Total files: {repo_data['stats']['total_files']}
|
||||||
|
- Files analyzed: {repo_data['stats']['analyzed_files']}
|
||||||
|
- Total size: {repo_data['stats']['total_size']} bytes
|
||||||
|
- Languages found: {dict(list(repo_data['stats']['languages'].items())[:10])}
|
||||||
|
|
||||||
|
Please provide:
|
||||||
|
1. **Project Type & Purpose**: What kind of project is this?
|
||||||
|
2. **Technology Stack**: What technologies, frameworks, and languages are used?
|
||||||
|
3. **Architecture Overview**: How is the project structured?
|
||||||
|
4. **Key Components**: What are the main modules/components?
|
||||||
|
5. **Development Setup**: What's needed to run this project?
|
||||||
|
6. **Code Quality Assessment**: Initial observations about code organization
|
||||||
|
"""
|
||||||
|
|
||||||
|
return self.call_claude_api(prompt)
|
||||||
|
|
||||||
|
def analyze_code_files(self, repo_data: Dict) -> str:
|
||||||
|
"""Analyze individual code files."""
|
||||||
|
print("Analyzing code files...")
|
||||||
|
|
||||||
|
# Prepare file contents for analysis
|
||||||
|
files_content = []
|
||||||
|
for file_info in repo_data['files'][:20]: # Limit to first 20 files
|
||||||
|
if file_info.get('encoding') == 'utf-8' and len(file_info.get('content', '')) < 5000:
|
||||||
|
files_content.append(f"=== {file_info['path']} ===\n{file_info['content']}\n")
|
||||||
|
|
||||||
|
files_text = "\n".join(files_content)
|
||||||
|
|
||||||
|
prompt = f"""
|
||||||
|
Analyze these key files from the repository:
|
||||||
|
|
||||||
|
{files_text}
|
||||||
|
|
||||||
|
Please provide detailed analysis covering:
|
||||||
|
1. **Code Quality**: Code style, organization, and best practices
|
||||||
|
2. **Design Patterns**: What patterns and architectural approaches are used?
|
||||||
|
3. **Dependencies & Libraries**: Key external dependencies identified
|
||||||
|
4. **Potential Issues**: Any code smells, security concerns, or improvements needed
|
||||||
|
5. **Testing Strategy**: How is testing implemented (if at all)?
|
||||||
|
6. **Documentation**: Quality of inline documentation and comments
|
||||||
|
7. **Maintainability**: How maintainable and extensible is this code?
|
||||||
|
"""
|
||||||
|
|
||||||
|
return self.call_claude_api(prompt, max_tokens=6000)
|
||||||
|
|
||||||
|
def analyze_security_and_best_practices(self, repo_data: Dict) -> str:
|
||||||
|
"""Analyze security and best practices."""
|
||||||
|
print("Analyzing security and best practices...")
|
||||||
|
|
||||||
|
# Look for security-sensitive files
|
||||||
|
security_files = []
|
||||||
|
for file_info in repo_data['files']:
|
||||||
|
path_lower = file_info['path'].lower()
|
||||||
|
if any(term in path_lower for term in ['config', 'env', 'secret', 'key', 'auth', 'security']):
|
||||||
|
if file_info.get('encoding') == 'utf-8':
|
||||||
|
security_files.append(f"=== {file_info['path']} ===\n{file_info['content'][:2000]}\n")
|
||||||
|
|
||||||
|
security_content = "\n".join(security_files[:10])
|
||||||
|
|
||||||
|
prompt = f"""
|
||||||
|
Analyze this repository for security and best practices:
|
||||||
|
|
||||||
|
SECURITY-RELEVANT FILES:
|
||||||
|
{security_content}
|
||||||
|
|
||||||
|
FILE STRUCTURE ANALYSIS:
|
||||||
|
{json.dumps(repo_data['stats'], indent=2)}
|
||||||
|
|
||||||
|
Please analyze:
|
||||||
|
1. **Security Issues**: Potential security vulnerabilities or concerns
|
||||||
|
2. **Secret Management**: How are secrets/credentials handled?
|
||||||
|
3. **Dependencies**: Are there any vulnerable dependencies?
|
||||||
|
4. **Best Practices**: Adherence to language/framework best practices
|
||||||
|
5. **Configuration**: Are configurations properly externalized?
|
||||||
|
6. **Error Handling**: How are errors handled throughout the codebase?
|
||||||
|
7. **Recommendations**: Specific suggestions for improvement
|
||||||
|
"""
|
||||||
|
|
||||||
|
return self.call_claude_api(prompt, max_tokens=5000)
|
||||||
|
|
||||||
|
def generate_comprehensive_report(self, repo_url: str, overview: str, code_analysis: str, security_analysis: str) -> str:
|
||||||
|
"""Generate final comprehensive report."""
|
||||||
|
print("Generating comprehensive report...")
|
||||||
|
|
||||||
|
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
|
||||||
|
report = f"""
|
||||||
|
# GitHub Repository Analysis Report
|
||||||
|
|
||||||
|
**Repository:** {repo_url}
|
||||||
|
**Analysis Date:** {timestamp}
|
||||||
|
**Analyzed by:** Claude AI Assistant
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Executive Summary
|
||||||
|
|
||||||
|
{overview}
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Detailed Code Analysis
|
||||||
|
|
||||||
|
{code_analysis}
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Security & Best Practices Analysis
|
||||||
|
|
||||||
|
{security_analysis}
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Recommendations Summary
|
||||||
|
|
||||||
|
Based on the analysis, here are the key recommendations for this repository:
|
||||||
|
|
||||||
|
1. **Immediate Actions**: Critical issues that should be addressed promptly
|
||||||
|
2. **Code Quality Improvements**: Suggestions for better maintainability
|
||||||
|
3. **Security Enhancements**: Steps to improve security posture
|
||||||
|
4. **Documentation**: Areas where documentation could be enhanced
|
||||||
|
5. **Architecture**: Potential architectural improvements
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
*This analysis was generated using AI and should be reviewed by human developers for accuracy and context.*
|
||||||
|
"""
|
||||||
|
|
||||||
|
return report
|
||||||
|
|
||||||
|
def analyze(self, repo_url: str, output_file: Optional[str] = None) -> str:
|
||||||
|
"""Main analysis function."""
|
||||||
|
try:
|
||||||
|
# Clone repository
|
||||||
|
self.clone_repository(repo_url)
|
||||||
|
|
||||||
|
# Scan repository structure and files
|
||||||
|
repo_data = self.scan_repository()
|
||||||
|
|
||||||
|
# Perform different types of analysis
|
||||||
|
overview = self.analyze_repository_overview(repo_data)
|
||||||
|
code_analysis = self.analyze_code_files(repo_data)
|
||||||
|
security_analysis = self.analyze_security_and_best_practices(repo_data)
|
||||||
|
|
||||||
|
# Generate comprehensive report
|
||||||
|
final_report = self.generate_comprehensive_report(
|
||||||
|
repo_url, overview, code_analysis, security_analysis
|
||||||
|
)
|
||||||
|
|
||||||
|
# Save report if output file specified
|
||||||
|
if output_file:
|
||||||
|
with open(output_file, 'w', encoding='utf-8') as f:
|
||||||
|
f.write(final_report)
|
||||||
|
print(f"Report saved to: {output_file}")
|
||||||
|
|
||||||
|
return final_report
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Cleanup temporary directory
|
||||||
|
if self.temp_dir and os.path.exists(self.temp_dir):
|
||||||
|
shutil.rmtree(self.temp_dir)
|
||||||
|
print("Temporary files cleaned up")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Analyze GitHub repository using Claude AI")
|
||||||
|
parser.add_argument("repo_url", help="GitHub repository URL")
|
||||||
|
parser.add_argument("--api-key", required=True, help="Anthropic API key")
|
||||||
|
parser.add_argument("--output", "-o", help="Output file path (optional)")
|
||||||
|
parser.add_argument("--max-files", type=int, default=50, help="Maximum files to analyze")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Initialize analyzer
|
||||||
|
analyzer = GitHubRepoAnalyzer(args.api_key)
|
||||||
|
|
||||||
|
try:
|
||||||
|
print("Starting GitHub repository analysis...")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
# Perform analysis
|
||||||
|
report = analyzer.analyze(args.repo_url, args.output)
|
||||||
|
|
||||||
|
# Print report if no output file specified
|
||||||
|
if not args.output:
|
||||||
|
print("\n" + "=" * 50)
|
||||||
|
print("ANALYSIS REPORT")
|
||||||
|
print("=" * 50)
|
||||||
|
print(report)
|
||||||
|
|
||||||
|
print("\nAnalysis completed successfully!")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error during analysis: {e}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
exit(main())
|
||||||
391
services/ai-analysis-service/ai-analysis/github_analyzer.py
Normal file
391
services/ai-analysis-service/ai-analysis/github_analyzer.py
Normal file
@ -0,0 +1,391 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
GitHub Repository AI Analysis Tool
|
||||||
|
Analyzes GitHub repositories using Claude API for comprehensive code insights.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import git
|
||||||
|
import json
|
||||||
|
import tempfile
|
||||||
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, List, Optional, Tuple
|
||||||
|
import argparse
|
||||||
|
from datetime import datetime
|
||||||
|
import mimetypes
|
||||||
|
import base64
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
import anthropic
|
||||||
|
|
||||||
|
class GitHubRepoAnalyzer:
|
||||||
|
def __init__(self, anthropic_api_key: str = None):
|
||||||
|
# Load environment variables
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Get API key from parameter or environment
|
||||||
|
self.api_key = anthropic_api_key or os.getenv('ANTHROPIC_API_KEY')
|
||||||
|
if not self.api_key:
|
||||||
|
raise ValueError("Anthropic API key not found. Please set ANTHROPIC_API_KEY in .env file or pass as parameter.")
|
||||||
|
|
||||||
|
# Initialize Anthropic client
|
||||||
|
self.client = anthropic.Anthropic(api_key=self.api_key)
|
||||||
|
self.temp_dir = None
|
||||||
|
|
||||||
|
# File extensions to analyze
|
||||||
|
self.code_extensions = {
|
||||||
|
'.py', '.js', '.ts', '.jsx', '.tsx', '.java', '.cpp', '.c', '.h',
|
||||||
|
'.cs', '.php', '.rb', '.go', '.rs', '.swift', '.kt', '.scala',
|
||||||
|
'.html', '.css', '.scss', '.sass', '.less', '.vue', '.svelte',
|
||||||
|
'.sql', '.sh', '.bash', '.yml', '.yaml', '.json', '.xml',
|
||||||
|
'.dockerfile', '.md', '.rst', '.txt'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Files to always include in analysis
|
||||||
|
self.important_files = {
|
||||||
|
'README.md', 'readme.md', 'README.txt', 'readme.txt',
|
||||||
|
'package.json', 'requirements.txt', 'Cargo.toml', 'pom.xml',
|
||||||
|
'build.gradle', 'Makefile', 'dockerfile', 'Dockerfile',
|
||||||
|
'docker-compose.yml', '.gitignore', 'setup.py', 'pyproject.toml'
|
||||||
|
}
|
||||||
|
|
||||||
|
def clone_repository(self, repo_url: str) -> str:
|
||||||
|
"""Clone GitHub repository to temporary directory."""
|
||||||
|
print(f"Cloning repository: {repo_url}")
|
||||||
|
|
||||||
|
self.temp_dir = tempfile.mkdtemp(prefix="github_analysis_")
|
||||||
|
|
||||||
|
try:
|
||||||
|
git.Repo.clone_from(repo_url, self.temp_dir)
|
||||||
|
print(f"Repository cloned to: {self.temp_dir}")
|
||||||
|
return self.temp_dir
|
||||||
|
except git.exc.GitCommandError as e:
|
||||||
|
raise Exception(f"Failed to clone repository: {e}")
|
||||||
|
|
||||||
|
def get_file_info(self, file_path: Path) -> Dict:
|
||||||
|
"""Get file information and content."""
|
||||||
|
try:
|
||||||
|
# Check file size (skip files larger than 1MB)
|
||||||
|
if file_path.stat().st_size > 1024 * 1024:
|
||||||
|
return {
|
||||||
|
'path': str(file_path.relative_to(self.temp_dir)),
|
||||||
|
'size': file_path.stat().st_size,
|
||||||
|
'content': '[File too large to analyze]',
|
||||||
|
'encoding': 'skipped'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Try to read as text
|
||||||
|
try:
|
||||||
|
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||||||
|
content = f.read()
|
||||||
|
encoding = 'utf-8'
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
# If text fails, try binary for certain file types
|
||||||
|
with open(file_path, 'rb') as f:
|
||||||
|
raw_content = f.read()
|
||||||
|
if len(raw_content) < 10000: # Only encode small binary files
|
||||||
|
content = base64.b64encode(raw_content).decode('ascii')
|
||||||
|
encoding = 'base64'
|
||||||
|
else:
|
||||||
|
content = '[Binary file - content not included]'
|
||||||
|
encoding = 'binary'
|
||||||
|
|
||||||
|
return {
|
||||||
|
'path': str(file_path.relative_to(self.temp_dir)),
|
||||||
|
'size': file_path.stat().st_size,
|
||||||
|
'content': content,
|
||||||
|
'encoding': encoding,
|
||||||
|
'mime_type': mimetypes.guess_type(str(file_path))[0]
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
return {
|
||||||
|
'path': str(file_path.relative_to(self.temp_dir)),
|
||||||
|
'error': str(e),
|
||||||
|
'content': '[Error reading file]'
|
||||||
|
}
|
||||||
|
|
||||||
|
def scan_repository(self, max_files: int = 50) -> Dict:
|
||||||
|
"""Scan repository and collect file information."""
|
||||||
|
print("Scanning repository structure...")
|
||||||
|
|
||||||
|
repo_data = {
|
||||||
|
'structure': [],
|
||||||
|
'files': [],
|
||||||
|
'stats': {
|
||||||
|
'total_files': 0,
|
||||||
|
'analyzed_files': 0,
|
||||||
|
'total_size': 0,
|
||||||
|
'languages': {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Get directory structure
|
||||||
|
for root, dirs, files in os.walk(self.temp_dir):
|
||||||
|
# Skip hidden directories and common build/cache directories
|
||||||
|
dirs[:] = [d for d in dirs if not d.startswith('.') and
|
||||||
|
d not in {'node_modules', '__pycache__', 'build', 'dist', 'target', 'venv', 'env'}]
|
||||||
|
|
||||||
|
level = root.replace(self.temp_dir, '').count(os.sep)
|
||||||
|
indent = ' ' * level
|
||||||
|
folder_name = os.path.basename(root) if root != self.temp_dir else '.'
|
||||||
|
repo_data['structure'].append(f"{indent}{folder_name}/")
|
||||||
|
|
||||||
|
# Process files
|
||||||
|
for file in files:
|
||||||
|
if file.startswith('.'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
file_path = Path(root) / file
|
||||||
|
repo_data['stats']['total_files'] += 1
|
||||||
|
repo_data['stats']['total_size'] += file_path.stat().st_size
|
||||||
|
|
||||||
|
# Track languages
|
||||||
|
ext = file_path.suffix.lower()
|
||||||
|
if ext:
|
||||||
|
repo_data['stats']['languages'][ext] = repo_data['stats']['languages'].get(ext, 0) + 1
|
||||||
|
|
||||||
|
# Add to structure
|
||||||
|
repo_data['structure'].append(f"{indent} {file}")
|
||||||
|
|
||||||
|
# Decide if we should analyze this file
|
||||||
|
should_analyze = (
|
||||||
|
file.lower() in self.important_files or
|
||||||
|
ext in self.code_extensions or
|
||||||
|
repo_data['stats']['analyzed_files'] < max_files
|
||||||
|
)
|
||||||
|
|
||||||
|
if should_analyze and repo_data['stats']['analyzed_files'] < max_files:
|
||||||
|
file_info = self.get_file_info(file_path)
|
||||||
|
repo_data['files'].append(file_info)
|
||||||
|
repo_data['stats']['analyzed_files'] += 1
|
||||||
|
|
||||||
|
return repo_data
|
||||||
|
|
||||||
|
def call_claude_api(self, prompt: str, max_tokens: int = 4000) -> str:
|
||||||
|
"""Make API call to Claude using official Anthropic client."""
|
||||||
|
try:
|
||||||
|
message = self.client.messages.create(
|
||||||
|
model="claude-3-sonnet-20240229",
|
||||||
|
max_tokens=max_tokens,
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": prompt}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
return message.content[0].text
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(f"Claude API call failed: {e}")
|
||||||
|
|
||||||
|
def analyze_repository_overview(self, repo_data: Dict) -> str:
|
||||||
|
"""Get high-level repository analysis."""
|
||||||
|
print("Analyzing repository overview...")
|
||||||
|
|
||||||
|
structure_summary = "\n".join(repo_data['structure'][:100]) # Limit structure size
|
||||||
|
|
||||||
|
prompt = f"""
|
||||||
|
Analyze this GitHub repository and provide a comprehensive overview:
|
||||||
|
|
||||||
|
REPOSITORY STRUCTURE:
|
||||||
|
{structure_summary}
|
||||||
|
|
||||||
|
STATISTICS:
|
||||||
|
- Total files: {repo_data['stats']['total_files']}
|
||||||
|
- Files analyzed: {repo_data['stats']['analyzed_files']}
|
||||||
|
- Total size: {repo_data['stats']['total_size']} bytes
|
||||||
|
- Languages found: {dict(list(repo_data['stats']['languages'].items())[:10])}
|
||||||
|
|
||||||
|
Please provide:
|
||||||
|
1. **Project Type & Purpose**: What kind of project is this?
|
||||||
|
2. **Technology Stack**: What technologies, frameworks, and languages are used?
|
||||||
|
3. **Architecture Overview**: How is the project structured?
|
||||||
|
4. **Key Components**: What are the main modules/components?
|
||||||
|
5. **Development Setup**: What's needed to run this project?
|
||||||
|
6. **Code Quality Assessment**: Initial observations about code organization
|
||||||
|
"""
|
||||||
|
|
||||||
|
return self.call_claude_api(prompt)
|
||||||
|
|
||||||
|
def analyze_code_files(self, repo_data: Dict) -> str:
|
||||||
|
"""Analyze individual code files."""
|
||||||
|
print("Analyzing code files...")
|
||||||
|
|
||||||
|
# Prepare file contents for analysis
|
||||||
|
files_content = []
|
||||||
|
for file_info in repo_data['files'][:20]: # Limit to first 20 files
|
||||||
|
if file_info.get('encoding') == 'utf-8' and len(file_info.get('content', '')) < 5000:
|
||||||
|
files_content.append(f"=== {file_info['path']} ===\n{file_info['content']}\n")
|
||||||
|
|
||||||
|
files_text = "\n".join(files_content)
|
||||||
|
|
||||||
|
prompt = f"""
|
||||||
|
Analyze these key files from the repository:
|
||||||
|
|
||||||
|
{files_text}
|
||||||
|
|
||||||
|
Please provide detailed analysis covering:
|
||||||
|
1. **Code Quality**: Code style, organization, and best practices
|
||||||
|
2. **Design Patterns**: What patterns and architectural approaches are used?
|
||||||
|
3. **Dependencies & Libraries**: Key external dependencies identified
|
||||||
|
4. **Potential Issues**: Any code smells, security concerns, or improvements needed
|
||||||
|
5. **Testing Strategy**: How is testing implemented (if at all)?
|
||||||
|
6. **Documentation**: Quality of inline documentation and comments
|
||||||
|
7. **Maintainability**: How maintainable and extensible is this code?
|
||||||
|
"""
|
||||||
|
|
||||||
|
return self.call_claude_api(prompt, max_tokens=6000)
|
||||||
|
|
||||||
|
def analyze_security_and_best_practices(self, repo_data: Dict) -> str:
|
||||||
|
"""Analyze security and best practices."""
|
||||||
|
print("Analyzing security and best practices...")
|
||||||
|
|
||||||
|
# Look for security-sensitive files
|
||||||
|
security_files = []
|
||||||
|
for file_info in repo_data['files']:
|
||||||
|
path_lower = file_info['path'].lower()
|
||||||
|
if any(term in path_lower for term in ['config', 'env', 'secret', 'key', 'auth', 'security']):
|
||||||
|
if file_info.get('encoding') == 'utf-8':
|
||||||
|
security_files.append(f"=== {file_info['path']} ===\n{file_info['content'][:2000]}\n")
|
||||||
|
|
||||||
|
security_content = "\n".join(security_files[:10])
|
||||||
|
|
||||||
|
prompt = f"""
|
||||||
|
Analyze this repository for security and best practices:
|
||||||
|
|
||||||
|
SECURITY-RELEVANT FILES:
|
||||||
|
{security_content}
|
||||||
|
|
||||||
|
FILE STRUCTURE ANALYSIS:
|
||||||
|
{json.dumps(repo_data['stats'], indent=2)}
|
||||||
|
|
||||||
|
Please analyze:
|
||||||
|
1. **Security Issues**: Potential security vulnerabilities or concerns
|
||||||
|
2. **Secret Management**: How are secrets/credentials handled?
|
||||||
|
3. **Dependencies**: Are there any vulnerable dependencies?
|
||||||
|
4. **Best Practices**: Adherence to language/framework best practices
|
||||||
|
5. **Configuration**: Are configurations properly externalized?
|
||||||
|
6. **Error Handling**: How are errors handled throughout the codebase?
|
||||||
|
7. **Recommendations**: Specific suggestions for improvement
|
||||||
|
"""
|
||||||
|
|
||||||
|
return self.call_claude_api(prompt, max_tokens=5000)
|
||||||
|
|
||||||
|
def generate_comprehensive_report(self, repo_url: str, overview: str, code_analysis: str, security_analysis: str) -> str:
|
||||||
|
"""Generate final comprehensive report."""
|
||||||
|
print("Generating comprehensive report...")
|
||||||
|
|
||||||
|
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
|
||||||
|
report = f"""
|
||||||
|
# GitHub Repository Analysis Report
|
||||||
|
|
||||||
|
**Repository:** {repo_url}
|
||||||
|
**Analysis Date:** {timestamp}
|
||||||
|
**Analyzed by:** Claude AI Assistant
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Executive Summary
|
||||||
|
|
||||||
|
{overview}
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Detailed Code Analysis
|
||||||
|
|
||||||
|
{code_analysis}
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Security & Best Practices Analysis
|
||||||
|
|
||||||
|
{security_analysis}
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Recommendations Summary
|
||||||
|
|
||||||
|
Based on the analysis, here are the key recommendations for this repository:
|
||||||
|
|
||||||
|
1. **Immediate Actions**: Critical issues that should be addressed promptly
|
||||||
|
2. **Code Quality Improvements**: Suggestions for better maintainability
|
||||||
|
3. **Security Enhancements**: Steps to improve security posture
|
||||||
|
4. **Documentation**: Areas where documentation could be enhanced
|
||||||
|
5. **Architecture**: Potential architectural improvements
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
*This analysis was generated using AI and should be reviewed by human developers for accuracy and context.*
|
||||||
|
"""
|
||||||
|
|
||||||
|
return report
|
||||||
|
|
||||||
|
def analyze(self, repo_url: str, output_file: Optional[str] = None) -> str:
|
||||||
|
"""Main analysis function."""
|
||||||
|
try:
|
||||||
|
# Clone repository
|
||||||
|
self.clone_repository(repo_url)
|
||||||
|
|
||||||
|
# Scan repository structure and files
|
||||||
|
repo_data = self.scan_repository()
|
||||||
|
|
||||||
|
# Perform different types of analysis
|
||||||
|
overview = self.analyze_repository_overview(repo_data)
|
||||||
|
code_analysis = self.analyze_code_files(repo_data)
|
||||||
|
security_analysis = self.analyze_security_and_best_practices(repo_data)
|
||||||
|
|
||||||
|
# Generate comprehensive report
|
||||||
|
final_report = self.generate_comprehensive_report(
|
||||||
|
repo_url, overview, code_analysis, security_analysis
|
||||||
|
)
|
||||||
|
|
||||||
|
# Save report if output file specified
|
||||||
|
if output_file:
|
||||||
|
with open(output_file, 'w', encoding='utf-8') as f:
|
||||||
|
f.write(final_report)
|
||||||
|
print(f"Report saved to: {output_file}")
|
||||||
|
|
||||||
|
return final_report
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Cleanup temporary directory
|
||||||
|
if self.temp_dir and os.path.exists(self.temp_dir):
|
||||||
|
shutil.rmtree(self.temp_dir)
|
||||||
|
print("Temporary files cleaned up")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Analyze GitHub repository using Claude AI")
|
||||||
|
parser.add_argument("repo_url", help="GitHub repository URL")
|
||||||
|
parser.add_argument("--api-key", help="Anthropic API key (optional if set in .env)")
|
||||||
|
parser.add_argument("--output", "-o", help="Output file path (optional)")
|
||||||
|
parser.add_argument("--max-files", type=int, default=50, help="Maximum files to analyze")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Initialize analyzer
|
||||||
|
analyzer = GitHubRepoAnalyzer(args.api_key)
|
||||||
|
|
||||||
|
print("Starting GitHub repository analysis...")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
# Perform analysis
|
||||||
|
report = analyzer.analyze(args.repo_url, args.output)
|
||||||
|
|
||||||
|
# Print report if no output file specified
|
||||||
|
if not args.output:
|
||||||
|
print("\n" + "=" * 50)
|
||||||
|
print("ANALYSIS REPORT")
|
||||||
|
print("=" * 50)
|
||||||
|
print(report)
|
||||||
|
|
||||||
|
print("\nAnalysis completed successfully!")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error during analysis: {e}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
exit(main())
|
||||||
69
services/ai-analysis-service/ai-analysis/requirements.txt
Normal file
69
services/ai-analysis-service/ai-analysis/requirements.txt
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
# Core AI and API
|
||||||
|
anthropic>=0.7.0
|
||||||
|
openai>=1.0.0
|
||||||
|
|
||||||
|
# Environment management
|
||||||
|
python-dotenv>=1.0.0
|
||||||
|
|
||||||
|
# Git operations
|
||||||
|
GitPython>=3.1.0
|
||||||
|
|
||||||
|
# PDF generation
|
||||||
|
reportlab>=4.0.0
|
||||||
|
matplotlib>=3.7.0
|
||||||
|
pillow>=10.0.0
|
||||||
|
|
||||||
|
# Code analysis and parsing
|
||||||
|
ast-comments>=1.1.0
|
||||||
|
astroid>=3.0.0
|
||||||
|
pygments>=2.15.0
|
||||||
|
radon>=6.0.1
|
||||||
|
bandit>=1.7.5
|
||||||
|
flake8>=6.0.0
|
||||||
|
pylint>=3.0.0
|
||||||
|
|
||||||
|
# File operations and utilities
|
||||||
|
pathlib2>=2.3.7
|
||||||
|
chardet>=5.2.0
|
||||||
|
python-magic>=0.4.27
|
||||||
|
|
||||||
|
# Async operations
|
||||||
|
aiohttp>=3.8.0
|
||||||
|
aiofiles>=23.0.0
|
||||||
|
asyncio-throttle>=1.0.2
|
||||||
|
|
||||||
|
# Data processing
|
||||||
|
pandas>=2.0.0
|
||||||
|
numpy>=1.24.0
|
||||||
|
python-dateutil>=2.8.0
|
||||||
|
|
||||||
|
# Web scraping (for additional repo info)
|
||||||
|
requests>=2.31.0
|
||||||
|
beautifulsoup4>=4.12.0
|
||||||
|
|
||||||
|
# Testing and code quality
|
||||||
|
pytest>=7.4.0
|
||||||
|
pytest-asyncio>=0.21.0
|
||||||
|
coverage>=7.3.0
|
||||||
|
|
||||||
|
# Additional utilities for advanced analysis
|
||||||
|
networkx>=3.1.0 # For dependency graph analysis
|
||||||
|
graphviz>=0.20.0 # For visualization
|
||||||
|
jinja2>=3.1.0 # For report templating
|
||||||
|
markdown>=3.4.0 # For markdown processing
|
||||||
|
pyyaml>=6.0.0 # For YAML config files
|
||||||
|
toml>=0.10.2 # For TOML config files
|
||||||
|
xmltodict>=0.13.0 # For XML processing
|
||||||
|
|
||||||
|
# Performance monitoring
|
||||||
|
psutil>=5.9.0
|
||||||
|
memory-profiler>=0.61.0
|
||||||
|
|
||||||
|
# Progress bars and UI
|
||||||
|
tqdm>=4.65.0
|
||||||
|
rich>=13.5.0
|
||||||
|
click>=8.1.0
|
||||||
|
|
||||||
|
# Security scanning
|
||||||
|
safety>=2.3.0
|
||||||
|
pip-audit>=2.6.0
|
||||||
1570
services/ai-analysis-service/ai-analyze.py
Normal file
1570
services/ai-analysis-service/ai-analyze.py
Normal file
File diff suppressed because it is too large
Load Diff
46
services/ai-analysis-service/env.example
Normal file
46
services/ai-analysis-service/env.example
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
# AI Analysis Service Environment Configuration
|
||||||
|
|
||||||
|
# Service Configuration
|
||||||
|
PORT=8022
|
||||||
|
HOST=0.0.0.0
|
||||||
|
NODE_ENV=development
|
||||||
|
|
||||||
|
# AI API Keys
|
||||||
|
ANTHROPIC_API_KEY=your_anthropic_api_key_here
|
||||||
|
|
||||||
|
# Database Configuration
|
||||||
|
POSTGRES_HOST=localhost
|
||||||
|
POSTGRES_PORT=5432
|
||||||
|
POSTGRES_DB=dev_pipeline
|
||||||
|
POSTGRES_USER=pipeline_admin
|
||||||
|
POSTGRES_PASSWORD=secure_pipeline_2024
|
||||||
|
|
||||||
|
# Redis Configuration
|
||||||
|
REDIS_HOST=localhost
|
||||||
|
REDIS_PORT=6379
|
||||||
|
REDIS_PASSWORD=redis_secure_2024
|
||||||
|
REDIS_DB=0
|
||||||
|
|
||||||
|
# MongoDB Configuration
|
||||||
|
MONGODB_URL=mongodb://pipeline_admin:mongo_secure_2024@localhost:27017/
|
||||||
|
MONGODB_DB=repo_analyzer
|
||||||
|
|
||||||
|
# JWT Configuration
|
||||||
|
JWT_ACCESS_SECRET=access-secret-key-2024-tech4biz-secure_pipeline_2024
|
||||||
|
|
||||||
|
# Service URLs
|
||||||
|
USER_AUTH_SERVICE_URL=http://localhost:8011
|
||||||
|
|
||||||
|
# Analysis Configuration
|
||||||
|
MAX_FILES_PER_ANALYSIS=100
|
||||||
|
MAX_FILE_SIZE_MB=2
|
||||||
|
ANALYSIS_TIMEOUT_SECONDS=300
|
||||||
|
|
||||||
|
# Memory System Configuration
|
||||||
|
WORKING_MEMORY_TTL=3600
|
||||||
|
EPISODIC_RETENTION_DAYS=365
|
||||||
|
PERSISTENT_MEMORY_THRESHOLD=0.8
|
||||||
|
|
||||||
|
# Logging Configuration
|
||||||
|
LOG_LEVEL=INFO
|
||||||
|
LOG_FILE_PATH=/app/logs/ai-analysis.log
|
||||||
104
services/ai-analysis-service/migrate.sh
Executable file
104
services/ai-analysis-service/migrate.sh
Executable file
@ -0,0 +1,104 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Database Migration Script using psql
|
||||||
|
# Executes the complete 001-schema.sql file
|
||||||
|
|
||||||
|
set -e # Exit on any error
|
||||||
|
|
||||||
|
# Colors for output
|
||||||
|
RED='\033[0;31m'
|
||||||
|
GREEN='\033[0;32m'
|
||||||
|
YELLOW='\033[1;33m'
|
||||||
|
BLUE='\033[0;34m'
|
||||||
|
NC='\033[0m' # No Color
|
||||||
|
|
||||||
|
# Load environment variables
|
||||||
|
if [ -f .env ]; then
|
||||||
|
export $(cat .env | grep -v '^#' | xargs)
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Database connection parameters
|
||||||
|
DB_HOST=${POSTGRES_HOST:-localhost}
|
||||||
|
DB_PORT=${POSTGRES_PORT:-5432}
|
||||||
|
DB_NAME=${POSTGRES_DB:-dev_pipeline}
|
||||||
|
DB_USER=${POSTGRES_USER:-pipeline_admin}
|
||||||
|
DB_PASSWORD=${POSTGRES_PASSWORD:-secure_pipeline_2024}
|
||||||
|
|
||||||
|
# Schema file
|
||||||
|
SCHEMA_FILE="001-schema.sql"
|
||||||
|
|
||||||
|
echo -e "${BLUE}🔧 AI Repository Analysis Database Migration${NC}"
|
||||||
|
echo "=================================================="
|
||||||
|
echo -e "Database: ${YELLOW}${DB_NAME}@${DB_HOST}:${DB_PORT}${NC}"
|
||||||
|
echo -e "User: ${YELLOW}${DB_USER}${NC}"
|
||||||
|
echo -e "Schema file: ${YELLOW}${SCHEMA_FILE}${NC}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Check if psql is available
|
||||||
|
if ! command -v psql &> /dev/null; then
|
||||||
|
echo -e "${RED}❌ psql command not found!${NC}"
|
||||||
|
echo "Please install PostgreSQL client tools:"
|
||||||
|
echo " Ubuntu/Debian: sudo apt-get install postgresql-client"
|
||||||
|
echo " CentOS/RHEL: sudo yum install postgresql"
|
||||||
|
echo " macOS: brew install postgresql"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if schema file exists
|
||||||
|
if [ ! -f "$SCHEMA_FILE" ]; then
|
||||||
|
echo -e "${RED}❌ Schema file not found: ${SCHEMA_FILE}${NC}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo -e "${BLUE}• Executing migration...${NC}"
|
||||||
|
|
||||||
|
# Set password for psql
|
||||||
|
export PGPASSWORD="$DB_PASSWORD"
|
||||||
|
|
||||||
|
# Run migration
|
||||||
|
if psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" \
|
||||||
|
-f "$SCHEMA_FILE" \
|
||||||
|
-v ON_ERROR_STOP=1 \
|
||||||
|
--echo-errors \
|
||||||
|
--echo-queries; then
|
||||||
|
|
||||||
|
echo -e "${GREEN}✅ Migration completed successfully!${NC}"
|
||||||
|
|
||||||
|
# Verify migration
|
||||||
|
echo -e "${BLUE}• Verifying migration...${NC}"
|
||||||
|
|
||||||
|
TABLES=$(psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -c "
|
||||||
|
SELECT table_name
|
||||||
|
FROM information_schema.tables
|
||||||
|
WHERE table_schema = 'public'
|
||||||
|
AND table_name IN ('code_embeddings', 'query_embeddings', 'knowledge_embeddings',
|
||||||
|
'repository_metadata', 'analysis_sessions', 'file_analysis_history')
|
||||||
|
ORDER BY table_name;
|
||||||
|
" | tr -d ' ')
|
||||||
|
|
||||||
|
if [ -n "$TABLES" ]; then
|
||||||
|
TABLE_COUNT=$(echo "$TABLES" | wc -l)
|
||||||
|
echo -e "${GREEN}✓ Found ${TABLE_COUNT} core tables: ${TABLES}${NC}"
|
||||||
|
else
|
||||||
|
echo -e "${YELLOW}⚠ Could not verify table creation${NC}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check for pgvector extension
|
||||||
|
VECTOR_AVAILABLE=$(psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -c "
|
||||||
|
SELECT EXISTS(SELECT 1 FROM pg_extension WHERE extname = 'vector');
|
||||||
|
" | tr -d ' ')
|
||||||
|
|
||||||
|
if [ "$VECTOR_AVAILABLE" = "t" ]; then
|
||||||
|
echo -e "${GREEN}✓ pgvector extension is available${NC}"
|
||||||
|
else
|
||||||
|
echo -e "${YELLOW}⚠ pgvector extension not available - vector operations will be limited${NC}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e "${GREEN}🚀 Database migration completed successfully!${NC}"
|
||||||
|
echo -e "${GREEN}📊 Production-level database ready for AI repository analysis${NC}"
|
||||||
|
|
||||||
|
else
|
||||||
|
echo -e "${RED}❌ Migration failed!${NC}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
203
services/ai-analysis-service/migrate_database.py
Normal file
203
services/ai-analysis-service/migrate_database.py
Normal file
@ -0,0 +1,203 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Database Migration Script using psql command
|
||||||
|
Executes the complete 001-schema.sql file using PostgreSQL's psql command
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
import logging
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
def run_migration():
|
||||||
|
"""Run the database migration using psql command."""
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Database connection parameters
|
||||||
|
db_config = {
|
||||||
|
'host': os.getenv('POSTGRES_HOST', 'localhost'),
|
||||||
|
'port': os.getenv('POSTGRES_PORT', 5432),
|
||||||
|
'database': os.getenv('POSTGRES_DB', 'dev_pipeline'),
|
||||||
|
'user': os.getenv('POSTGRES_USER', 'pipeline_admin'),
|
||||||
|
'password': os.getenv('POSTGRES_PASSWORD', 'secure_pipeline_2024')
|
||||||
|
}
|
||||||
|
|
||||||
|
# Schema file path
|
||||||
|
schema_file = os.path.join(os.path.dirname(__file__), '001-schema.sql')
|
||||||
|
|
||||||
|
if not os.path.exists(schema_file):
|
||||||
|
logger.error(f"❌ Schema file not found: {schema_file}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
logger.info("🔧 Starting database migration with psql...")
|
||||||
|
logger.info(f" • Database: {db_config['database']}@{db_config['host']}:{db_config['port']}")
|
||||||
|
logger.info(f" • User: {db_config['user']}")
|
||||||
|
logger.info(f" • Schema file: {schema_file}")
|
||||||
|
|
||||||
|
# Set PGPASSWORD environment variable for psql
|
||||||
|
env = os.environ.copy()
|
||||||
|
env['PGPASSWORD'] = db_config['password']
|
||||||
|
|
||||||
|
# Build psql command
|
||||||
|
psql_cmd = [
|
||||||
|
'psql',
|
||||||
|
'-h', db_config['host'],
|
||||||
|
'-p', str(db_config['port']),
|
||||||
|
'-U', db_config['user'],
|
||||||
|
'-d', db_config['database'],
|
||||||
|
'-f', schema_file,
|
||||||
|
'-v', 'ON_ERROR_STOP=1', # Stop on first error
|
||||||
|
'--echo-errors', # Show errors
|
||||||
|
'--echo-queries' # Show queries being executed
|
||||||
|
]
|
||||||
|
|
||||||
|
logger.info(" • Executing migration...")
|
||||||
|
logger.info(f" • Command: {' '.join(psql_cmd)}")
|
||||||
|
|
||||||
|
# Run psql command
|
||||||
|
result = subprocess.run(
|
||||||
|
psql_cmd,
|
||||||
|
env=env,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=300 # 5 minute timeout
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check if psql command exists
|
||||||
|
if result.returncode == 127:
|
||||||
|
logger.error("❌ psql command not found. Please install PostgreSQL client tools.")
|
||||||
|
logger.error(" On Ubuntu/Debian: sudo apt-get install postgresql-client")
|
||||||
|
logger.error(" On CentOS/RHEL: sudo yum install postgresql")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Check for errors
|
||||||
|
if result.returncode != 0:
|
||||||
|
logger.error(f"❌ Migration failed with return code: {result.returncode}")
|
||||||
|
if result.stderr:
|
||||||
|
logger.error("STDERR:")
|
||||||
|
logger.error(result.stderr)
|
||||||
|
if result.stdout:
|
||||||
|
logger.error("STDOUT:")
|
||||||
|
logger.error(result.stdout)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Log success
|
||||||
|
logger.info("✅ Migration completed successfully!")
|
||||||
|
|
||||||
|
if result.stdout:
|
||||||
|
logger.info("Migration output:")
|
||||||
|
# Filter out common psql output noise
|
||||||
|
lines = result.stdout.split('\n')
|
||||||
|
for line in lines:
|
||||||
|
if line.strip() and not line.startswith('SET') and not line.startswith('NOTICE'):
|
||||||
|
logger.info(f" {line}")
|
||||||
|
|
||||||
|
# Verify migration by checking if key tables exist
|
||||||
|
logger.info(" • Verifying migration...")
|
||||||
|
|
||||||
|
verify_cmd = [
|
||||||
|
'psql',
|
||||||
|
'-h', db_config['host'],
|
||||||
|
'-p', str(db_config['port']),
|
||||||
|
'-U', db_config['user'],
|
||||||
|
'-d', db_config['database'],
|
||||||
|
'-t', # tuples only
|
||||||
|
'-c', """
|
||||||
|
SELECT table_name
|
||||||
|
FROM information_schema.tables
|
||||||
|
WHERE table_schema = 'public'
|
||||||
|
AND table_name IN ('code_embeddings', 'query_embeddings', 'knowledge_embeddings',
|
||||||
|
'repository_metadata', 'analysis_sessions', 'file_analysis_history')
|
||||||
|
ORDER BY table_name;
|
||||||
|
"""
|
||||||
|
]
|
||||||
|
|
||||||
|
verify_result = subprocess.run(
|
||||||
|
verify_cmd,
|
||||||
|
env=env,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=30
|
||||||
|
)
|
||||||
|
|
||||||
|
if verify_result.returncode == 0:
|
||||||
|
tables = [line.strip() for line in verify_result.stdout.split('\n') if line.strip()]
|
||||||
|
logger.info(f" ✓ Found {len(tables)} core tables: {', '.join(tables)}")
|
||||||
|
else:
|
||||||
|
logger.warning(" ⚠ Could not verify table creation")
|
||||||
|
|
||||||
|
# Check for pgvector extension
|
||||||
|
vector_cmd = [
|
||||||
|
'psql',
|
||||||
|
'-h', db_config['host'],
|
||||||
|
'-p', str(db_config['port']),
|
||||||
|
'-U', db_config['user'],
|
||||||
|
'-d', db_config['database'],
|
||||||
|
'-t',
|
||||||
|
'-c', "SELECT EXISTS(SELECT 1 FROM pg_extension WHERE extname = 'vector');"
|
||||||
|
]
|
||||||
|
|
||||||
|
vector_result = subprocess.run(
|
||||||
|
vector_cmd,
|
||||||
|
env=env,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=30
|
||||||
|
)
|
||||||
|
|
||||||
|
if vector_result.returncode == 0:
|
||||||
|
has_vector = vector_result.stdout.strip() == 't'
|
||||||
|
if has_vector:
|
||||||
|
logger.info(" ✓ pgvector extension is available")
|
||||||
|
else:
|
||||||
|
logger.warning(" ⚠ pgvector extension not available - vector operations will be limited")
|
||||||
|
|
||||||
|
logger.info("🚀 Database migration completed successfully!")
|
||||||
|
logger.info("📊 Production-level database ready for AI repository analysis")
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
logger.error("❌ Migration timed out after 5 minutes")
|
||||||
|
return False
|
||||||
|
except FileNotFoundError:
|
||||||
|
logger.error("❌ psql command not found. Please install PostgreSQL client tools.")
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"❌ Migration failed: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def check_psql_available():
|
||||||
|
"""Check if psql command is available."""
|
||||||
|
try:
|
||||||
|
result = subprocess.run(['psql', '--version'], capture_output=True, text=True)
|
||||||
|
if result.returncode == 0:
|
||||||
|
logger.info(f"✓ Found psql: {result.stdout.strip()}")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
except FileNotFoundError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
logger.info("🔧 AI Repository Analysis Database Migration")
|
||||||
|
logger.info("=" * 50)
|
||||||
|
|
||||||
|
# Check if psql is available
|
||||||
|
if not check_psql_available():
|
||||||
|
logger.error("❌ psql command not found!")
|
||||||
|
logger.error("Please install PostgreSQL client tools:")
|
||||||
|
logger.error(" Ubuntu/Debian: sudo apt-get install postgresql-client")
|
||||||
|
logger.error(" CentOS/RHEL: sudo yum install postgresql")
|
||||||
|
logger.error(" macOS: brew install postgresql")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Run migration
|
||||||
|
success = run_migration()
|
||||||
|
sys.exit(0 if success else 1)
|
||||||
25
services/ai-analysis-service/requirements.txt
Normal file
25
services/ai-analysis-service/requirements.txt
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
# Core AI and API dependencies
|
||||||
|
anthropic>=0.7.0
|
||||||
|
python-dotenv>=1.0.0
|
||||||
|
|
||||||
|
# Web framework
|
||||||
|
fastapi>=0.104.1
|
||||||
|
uvicorn>=0.24.0
|
||||||
|
pydantic>=2.5.0
|
||||||
|
|
||||||
|
# Git operations
|
||||||
|
GitPython>=3.1.40
|
||||||
|
|
||||||
|
# Database dependencies
|
||||||
|
redis>=4.5.0
|
||||||
|
pymongo>=4.5.0
|
||||||
|
psycopg2-binary>=2.9.7
|
||||||
|
|
||||||
|
# Data processing
|
||||||
|
numpy>=1.24.0
|
||||||
|
|
||||||
|
# PDF generation
|
||||||
|
reportlab>=4.0.0
|
||||||
|
|
||||||
|
# Optional: For better performance (if needed)
|
||||||
|
# sentence-transformers>=2.2.2 # Commented out - using Claude API instead
|
||||||
94
services/ai-analysis-service/run_migration.py
Normal file
94
services/ai-analysis-service/run_migration.py
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
AI Analysis Service Database Migration Runner
|
||||||
|
Runs the database migration for AI Analysis Service during container startup.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
def log(message):
|
||||||
|
"""Log with timestamp."""
|
||||||
|
print(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] {message}")
|
||||||
|
|
||||||
|
def check_database_connection():
|
||||||
|
"""Check if database is available."""
|
||||||
|
try:
|
||||||
|
import psycopg2
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
conn = psycopg2.connect(
|
||||||
|
host=os.getenv('POSTGRES_HOST', 'localhost'),
|
||||||
|
port=os.getenv('POSTGRES_PORT', 5432),
|
||||||
|
database=os.getenv('POSTGRES_DB', 'dev_pipeline'),
|
||||||
|
user=os.getenv('POSTGRES_USER', 'pipeline_admin'),
|
||||||
|
password=os.getenv('POSTGRES_PASSWORD', 'secure_pipeline_2024')
|
||||||
|
)
|
||||||
|
conn.close()
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Database connection failed: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def run_migration():
|
||||||
|
"""Run the database migration."""
|
||||||
|
try:
|
||||||
|
log("Starting AI Analysis Service database migration...")
|
||||||
|
|
||||||
|
# Check if database is available
|
||||||
|
max_retries = 30
|
||||||
|
retry_count = 0
|
||||||
|
|
||||||
|
while retry_count < max_retries:
|
||||||
|
if check_database_connection():
|
||||||
|
log("Database connection successful")
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
retry_count += 1
|
||||||
|
log(f"Database not ready, retrying in 2 seconds... ({retry_count}/{max_retries})")
|
||||||
|
time.sleep(2)
|
||||||
|
else:
|
||||||
|
log("ERROR: Could not connect to database after 60 seconds")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Run the migration script
|
||||||
|
schema_file = Path(__file__).parent / "001-schema.sql"
|
||||||
|
if not schema_file.exists():
|
||||||
|
log("ERROR: Schema file not found")
|
||||||
|
return False
|
||||||
|
|
||||||
|
log(f"Running migration from {schema_file}")
|
||||||
|
|
||||||
|
# Use psql to run the migration
|
||||||
|
env = os.environ.copy()
|
||||||
|
env['PGPASSWORD'] = os.getenv('POSTGRES_PASSWORD', 'secure_pipeline_2024')
|
||||||
|
|
||||||
|
result = subprocess.run([
|
||||||
|
'psql',
|
||||||
|
'-h', os.getenv('POSTGRES_HOST', 'localhost'),
|
||||||
|
'-p', os.getenv('POSTGRES_PORT', '5432'),
|
||||||
|
'-U', os.getenv('POSTGRES_USER', 'pipeline_admin'),
|
||||||
|
'-d', os.getenv('POSTGRES_DB', 'dev_pipeline'),
|
||||||
|
'-f', str(schema_file),
|
||||||
|
'-v', 'ON_ERROR_STOP=1'
|
||||||
|
], env=env, capture_output=True, text=True)
|
||||||
|
|
||||||
|
if result.returncode == 0:
|
||||||
|
log("✅ AI Analysis Service database migration completed successfully")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
log(f"❌ Migration failed: {result.stderr}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log(f"❌ Migration error: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
success = run_migration()
|
||||||
|
sys.exit(0 if success else 1)
|
||||||
230
services/ai-analysis-service/server.py
Normal file
230
services/ai-analysis-service/server.py
Normal file
@ -0,0 +1,230 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
AI Analysis Service HTTP Server
|
||||||
|
Provides REST API endpoints for repository analysis.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import tempfile
|
||||||
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, Any
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from fastapi import FastAPI, HTTPException, BackgroundTasks
|
||||||
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
from fastapi.responses import FileResponse
|
||||||
|
from pydantic import BaseModel
|
||||||
|
import uvicorn
|
||||||
|
|
||||||
|
# Import the AI analysis components
|
||||||
|
# Note: ai-analyze.py has a hyphen, so we need to handle the import specially
|
||||||
|
import sys
|
||||||
|
import importlib.util
|
||||||
|
|
||||||
|
# Load the ai-analyze.py module
|
||||||
|
spec = importlib.util.spec_from_file_location("ai_analyze", "/app/ai-analyze.py")
|
||||||
|
ai_analyze_module = importlib.util.module_from_spec(spec)
|
||||||
|
sys.modules["ai_analyze"] = ai_analyze_module
|
||||||
|
spec.loader.exec_module(ai_analyze_module)
|
||||||
|
|
||||||
|
# Now import the classes
|
||||||
|
from ai_analyze import EnhancedGitHubAnalyzer, get_memory_config
|
||||||
|
|
||||||
|
app = FastAPI(
|
||||||
|
title="AI Analysis Service",
|
||||||
|
description="AI-powered repository analysis with memory system",
|
||||||
|
version="1.0.0"
|
||||||
|
)
|
||||||
|
|
||||||
|
# CORS middleware
|
||||||
|
app.add_middleware(
|
||||||
|
CORSMiddleware,
|
||||||
|
allow_origins=["*"],
|
||||||
|
allow_credentials=True,
|
||||||
|
allow_methods=["*"],
|
||||||
|
allow_headers=["*"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Global analyzer instance
|
||||||
|
analyzer = None
|
||||||
|
|
||||||
|
class AnalysisRequest(BaseModel):
|
||||||
|
repo_path: str
|
||||||
|
output_format: str = "pdf" # pdf, json
|
||||||
|
max_files: int = 50
|
||||||
|
|
||||||
|
class AnalysisResponse(BaseModel):
|
||||||
|
success: bool
|
||||||
|
message: str
|
||||||
|
analysis_id: str = None
|
||||||
|
report_path: str = None
|
||||||
|
stats: Dict[str, Any] = None
|
||||||
|
|
||||||
|
@app.on_event("startup")
|
||||||
|
async def startup_event():
|
||||||
|
"""Initialize the analyzer on startup."""
|
||||||
|
global analyzer
|
||||||
|
try:
|
||||||
|
# Load environment variables
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Get API key
|
||||||
|
api_key = os.getenv('ANTHROPIC_API_KEY')
|
||||||
|
if not api_key:
|
||||||
|
raise Exception("ANTHROPIC_API_KEY not found in environment")
|
||||||
|
|
||||||
|
# Initialize analyzer
|
||||||
|
config = get_memory_config()
|
||||||
|
analyzer = EnhancedGitHubAnalyzer(api_key, config)
|
||||||
|
|
||||||
|
print("✅ AI Analysis Service initialized successfully")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Failed to initialize AI Analysis Service: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
async def health_check():
|
||||||
|
"""Health check endpoint."""
|
||||||
|
return {
|
||||||
|
"status": "healthy",
|
||||||
|
"service": "ai-analysis-service",
|
||||||
|
"timestamp": datetime.now().isoformat(),
|
||||||
|
"version": "1.0.0"
|
||||||
|
}
|
||||||
|
|
||||||
|
@app.post("/analyze", response_model=AnalysisResponse)
|
||||||
|
async def analyze_repository(request: AnalysisRequest, background_tasks: BackgroundTasks):
|
||||||
|
"""Analyze a repository."""
|
||||||
|
try:
|
||||||
|
if not analyzer:
|
||||||
|
raise HTTPException(status_code=500, detail="Analyzer not initialized")
|
||||||
|
|
||||||
|
# Generate unique analysis ID
|
||||||
|
analysis_id = f"analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
||||||
|
|
||||||
|
# Create temporary directory for this analysis
|
||||||
|
temp_dir = tempfile.mkdtemp(prefix=f"ai_analysis_{analysis_id}_")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Run analysis
|
||||||
|
analysis = await analyzer.analyze_repository_with_memory(
|
||||||
|
request.repo_path,
|
||||||
|
max_files=request.max_files
|
||||||
|
)
|
||||||
|
|
||||||
|
# Generate report
|
||||||
|
if request.output_format == "pdf":
|
||||||
|
report_path = f"/app/reports/{analysis_id}_analysis.pdf"
|
||||||
|
analyzer.create_pdf_report(analysis, report_path)
|
||||||
|
else:
|
||||||
|
report_path = f"/app/reports/{analysis_id}_analysis.json"
|
||||||
|
with open(report_path, 'w') as f:
|
||||||
|
json.dump({
|
||||||
|
"repo_path": analysis.repo_path,
|
||||||
|
"total_files": analysis.total_files,
|
||||||
|
"total_lines": analysis.total_lines,
|
||||||
|
"languages": analysis.languages,
|
||||||
|
"code_quality_score": analysis.code_quality_score,
|
||||||
|
"architecture_assessment": analysis.architecture_assessment,
|
||||||
|
"security_assessment": analysis.security_assessment,
|
||||||
|
"executive_summary": analysis.executive_summary,
|
||||||
|
"file_analyses": [
|
||||||
|
{
|
||||||
|
"path": fa.path,
|
||||||
|
"language": fa.language,
|
||||||
|
"lines_of_code": fa.lines_of_code,
|
||||||
|
"severity_score": fa.severity_score,
|
||||||
|
"issues_found": fa.issues_found,
|
||||||
|
"recommendations": fa.recommendations
|
||||||
|
} for fa in analysis.file_analyses
|
||||||
|
]
|
||||||
|
}, f, indent=2)
|
||||||
|
|
||||||
|
# Calculate stats
|
||||||
|
stats = {
|
||||||
|
"total_files": analysis.total_files,
|
||||||
|
"total_lines": analysis.total_lines,
|
||||||
|
"languages": analysis.languages,
|
||||||
|
"code_quality_score": analysis.code_quality_score,
|
||||||
|
"high_quality_files": len([fa for fa in analysis.file_analyses if fa.severity_score >= 8]),
|
||||||
|
"medium_quality_files": len([fa for fa in analysis.file_analyses if 5 <= fa.severity_score < 8]),
|
||||||
|
"low_quality_files": len([fa for fa in analysis.file_analyses if fa.severity_score < 5]),
|
||||||
|
"total_issues": sum(len(fa.issues_found) for fa in analysis.file_analyses)
|
||||||
|
}
|
||||||
|
|
||||||
|
return AnalysisResponse(
|
||||||
|
success=True,
|
||||||
|
message="Analysis completed successfully",
|
||||||
|
analysis_id=analysis_id,
|
||||||
|
report_path=report_path,
|
||||||
|
stats=stats
|
||||||
|
)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Cleanup temporary directory
|
||||||
|
if os.path.exists(temp_dir):
|
||||||
|
shutil.rmtree(temp_dir)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return AnalysisResponse(
|
||||||
|
success=False,
|
||||||
|
message=f"Analysis failed: {str(e)}",
|
||||||
|
analysis_id=None,
|
||||||
|
report_path=None,
|
||||||
|
stats=None
|
||||||
|
)
|
||||||
|
|
||||||
|
@app.get("/reports/{filename}")
|
||||||
|
async def download_report(filename: str):
|
||||||
|
"""Download analysis report."""
|
||||||
|
report_path = f"/app/reports/{filename}"
|
||||||
|
if not os.path.exists(report_path):
|
||||||
|
raise HTTPException(status_code=404, detail="Report not found")
|
||||||
|
|
||||||
|
return FileResponse(
|
||||||
|
report_path,
|
||||||
|
media_type='application/octet-stream',
|
||||||
|
filename=filename
|
||||||
|
)
|
||||||
|
|
||||||
|
@app.get("/memory/stats")
|
||||||
|
async def get_memory_stats():
|
||||||
|
"""Get memory system statistics."""
|
||||||
|
try:
|
||||||
|
if not analyzer:
|
||||||
|
raise HTTPException(status_code=500, detail="Analyzer not initialized")
|
||||||
|
|
||||||
|
stats = await analyzer.memory_manager.get_memory_stats()
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"memory_stats": stats
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Failed to get memory stats: {str(e)}")
|
||||||
|
|
||||||
|
@app.post("/memory/query")
|
||||||
|
async def query_memory(query: str, repo_context: str = ""):
|
||||||
|
"""Query the memory system."""
|
||||||
|
try:
|
||||||
|
if not analyzer:
|
||||||
|
raise HTTPException(status_code=500, detail="Analyzer not initialized")
|
||||||
|
|
||||||
|
result = await analyzer.query_memory(query, repo_context)
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"query": query,
|
||||||
|
"result": result
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Memory query failed: {str(e)}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
port = int(os.getenv('PORT', 8022))
|
||||||
|
host = os.getenv('HOST', '0.0.0.0')
|
||||||
|
|
||||||
|
print(f"🚀 Starting AI Analysis Service on {host}:{port}")
|
||||||
|
uvicorn.run(app, host=host, port=port)
|
||||||
@ -68,6 +68,7 @@ const serviceTargets = {
|
|||||||
DASHBOARD_URL: process.env.DASHBOARD_URL || 'http://localhost:8008',
|
DASHBOARD_URL: process.env.DASHBOARD_URL || 'http://localhost:8008',
|
||||||
SELF_IMPROVING_GENERATOR_URL: process.env.SELF_IMPROVING_GENERATOR_URL || 'http://localhost:8007',
|
SELF_IMPROVING_GENERATOR_URL: process.env.SELF_IMPROVING_GENERATOR_URL || 'http://localhost:8007',
|
||||||
AI_MOCKUP_URL: process.env.AI_MOCKUP_URL || 'http://localhost:8021',
|
AI_MOCKUP_URL: process.env.AI_MOCKUP_URL || 'http://localhost:8021',
|
||||||
|
AI_ANALYSIS_URL: process.env.AI_ANALYSIS_URL || 'http://localhost:8022',
|
||||||
};
|
};
|
||||||
|
|
||||||
// Log service targets for debugging
|
// Log service targets for debugging
|
||||||
@ -1984,6 +1985,76 @@ app.use('/api/mockup',
|
|||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// AI Analysis Service - Direct HTTP forwarding
|
||||||
|
console.log('🔧 Registering /api/ai-analysis proxy route...');
|
||||||
|
app.use('/api/ai-analysis',
|
||||||
|
createServiceLimiter(200),
|
||||||
|
// Allow unauthenticated access for AI analysis (public feature)
|
||||||
|
(req, res, next) => {
|
||||||
|
console.log(`🤖 [AI ANALYSIS PROXY] ${req.method} ${req.originalUrl}`);
|
||||||
|
return next();
|
||||||
|
},
|
||||||
|
(req, res, next) => {
|
||||||
|
const aiAnalysisServiceUrl = serviceTargets.AI_ANALYSIS_URL;
|
||||||
|
// Strip the /api/ai-analysis prefix so /api/ai-analysis/analyze -> /analyze at target
|
||||||
|
const rewrittenPath = (req.originalUrl || '').replace(/^\/api\/ai-analysis/, '');
|
||||||
|
const targetUrl = `${aiAnalysisServiceUrl}${rewrittenPath}`;
|
||||||
|
console.log(`🔥 [AI ANALYSIS PROXY] ${req.method} ${req.originalUrl} → ${targetUrl}`);
|
||||||
|
|
||||||
|
res.setTimeout(300000, () => { // 5 minutes timeout for analysis
|
||||||
|
console.error('❌ [AI ANALYSIS PROXY] Response timeout');
|
||||||
|
if (!res.headersSent) {
|
||||||
|
res.status(504).json({ error: 'Gateway timeout', service: 'ai-analysis' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const options = {
|
||||||
|
method: req.method,
|
||||||
|
url: targetUrl,
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'User-Agent': 'API-Gateway/1.0',
|
||||||
|
'Connection': 'keep-alive',
|
||||||
|
'Authorization': req.headers.authorization,
|
||||||
|
'X-User-ID': req.user?.id || req.user?.userId,
|
||||||
|
...(req.user?.role && { 'X-User-Role': req.user.role })
|
||||||
|
},
|
||||||
|
timeout: 240000, // 4 minutes timeout
|
||||||
|
validateStatus: () => true,
|
||||||
|
maxRedirects: 0,
|
||||||
|
maxContentLength: 100 * 1024 * 1024, // 100MB max content length
|
||||||
|
maxBodyLength: 100 * 1024 * 1024 // 100MB max body length
|
||||||
|
};
|
||||||
|
|
||||||
|
if (req.method === 'POST' || req.method === 'PUT' || req.method === 'PATCH') {
|
||||||
|
options.data = req.body || {};
|
||||||
|
console.log(`📦 [AI ANALYSIS PROXY] Request body:`, JSON.stringify(req.body));
|
||||||
|
}
|
||||||
|
|
||||||
|
axios(options)
|
||||||
|
.then(response => {
|
||||||
|
console.log(`✅ [AI ANALYSIS PROXY] Response: ${response.status} for ${req.method} ${req.originalUrl}`);
|
||||||
|
if (!res.headersSent) {
|
||||||
|
res.status(response.status).json(response.data);
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.catch(error => {
|
||||||
|
console.error(`❌ [AI ANALYSIS PROXY ERROR]:`, error.message);
|
||||||
|
if (!res.headersSent) {
|
||||||
|
if (error.response) {
|
||||||
|
res.status(error.response.status).json(error.response.data);
|
||||||
|
} else {
|
||||||
|
res.status(502).json({
|
||||||
|
error: 'AI Analysis service unavailable',
|
||||||
|
message: error.code || error.message,
|
||||||
|
service: 'ai-analysis'
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
// Gateway management endpoints
|
// Gateway management endpoints
|
||||||
app.get('/api/gateway/info', authMiddleware.verifyToken, (req, res) => {
|
app.get('/api/gateway/info', authMiddleware.verifyToken, (req, res) => {
|
||||||
res.json({
|
res.json({
|
||||||
@ -2041,9 +2112,10 @@ app.get('/', (req, res) => {
|
|||||||
deploy: '/api/deploy',
|
deploy: '/api/deploy',
|
||||||
dashboard: '/api/dashboard',
|
dashboard: '/api/dashboard',
|
||||||
self_improving: '/api/self-improving',
|
self_improving: '/api/self-improving',
|
||||||
mockup: '/api/mockup',
|
mockup: '/api/mockup',
|
||||||
unison: '/api/unison',
|
ai_analysis: '/api/ai-analysis',
|
||||||
unified: '/api/recommendations'
|
unison: '/api/unison',
|
||||||
|
unified: '/api/recommendations'
|
||||||
},
|
},
|
||||||
websocket: {
|
websocket: {
|
||||||
endpoint: '/socket.io/',
|
endpoint: '/socket.io/',
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user