-- ================================================ -- Hierarchical Data Structure Schema (Problem 4 Solution) -- Version: 1.0 -- Description: PostgreSQL tables for structured findings, metrics, and report sections -- ================================================ -- Findings table for structured issue/finding storage CREATE TABLE IF NOT EXISTS findings ( id SERIAL PRIMARY KEY, run_id VARCHAR(255) NOT NULL, module_name VARCHAR(255) NOT NULL, module_id VARCHAR(255), -- Link to MongoDB module_id -- Issue details severity VARCHAR(20) NOT NULL, -- critical, high, medium, low category VARCHAR(50) NOT NULL, -- security, performance, code_quality, architecture title VARCHAR(500) NOT NULL, description TEXT, file_path TEXT NOT NULL, line_number INTEGER, -- Impact and remediation impact TEXT, recommendation TEXT, effort_estimate VARCHAR(20), -- low, medium, high -- Timestamps created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, -- Constraints CONSTRAINT valid_severity CHECK (severity IN ('critical', 'high', 'medium', 'low')), CONSTRAINT valid_category CHECK (category IN ('security', 'performance', 'code_quality', 'architecture')), CONSTRAINT valid_effort CHECK (effort_estimate IN ('low', 'medium', 'high') OR effort_estimate IS NULL) ); -- Metrics table for code metrics aggregation CREATE TABLE IF NOT EXISTS metrics ( id SERIAL PRIMARY KEY, run_id VARCHAR(255) NOT NULL, module_name VARCHAR(255) NOT NULL, module_id VARCHAR(255), -- Link to MongoDB module_id -- Code metrics lines_of_code INTEGER DEFAULT 0, cyclomatic_complexity FLOAT DEFAULT 0.0, test_coverage FLOAT, maintainability_index FLOAT, -- Quality scores (1-5 scale) architecture_rating INTEGER CHECK (architecture_rating >= 1 AND architecture_rating <= 5), security_rating INTEGER CHECK (security_rating >= 1 AND security_rating <= 5), code_quality_rating INTEGER CHECK (code_quality_rating >= 1 AND code_quality_rating <= 5), performance_rating INTEGER CHECK (performance_rating >= 1 AND performance_rating <= 5), -- Aggregates total_issues INTEGER DEFAULT 0, critical_issues INTEGER DEFAULT 0, high_issues INTEGER DEFAULT 0, medium_issues INTEGER DEFAULT 0, low_issues INTEGER DEFAULT 0, -- Timestamps created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP ); -- Report sections table for multi-audience content CREATE TABLE IF NOT EXISTS report_sections ( id SERIAL PRIMARY KEY, run_id VARCHAR(255) NOT NULL, section_name VARCHAR(255) NOT NULL, -- 'overview', 'backend_analysis', 'security_analysis' -- Multi-audience content nontechnical_content TEXT, -- For executives manager_content TEXT, -- For managers technical_content TEXT, -- For developers -- Metadata generated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, model_version VARCHAR(50), -- Claude model version used -- Constraints CONSTRAINT valid_section_name CHECK (section_name IN ('overview', 'backend_analysis', 'frontend_analysis', 'security_analysis', 'architecture_analysis', 'performance_analysis', 'code_quality_analysis')) ); -- Analysis runs table for tracking analysis sessions CREATE TABLE IF NOT EXISTS analysis_runs ( id SERIAL PRIMARY KEY, run_id VARCHAR(255) UNIQUE NOT NULL, repository_id VARCHAR(255) NOT NULL, session_id VARCHAR(255) NOT NULL, -- Status status VARCHAR(50) NOT NULL, -- 'in_progress', 'completed', 'failed' started_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, completed_at TIMESTAMP WITH TIME ZONE, -- Summary total_modules INTEGER DEFAULT 0, total_files INTEGER DEFAULT 0, total_findings INTEGER DEFAULT 0, -- Metadata analysis_type VARCHAR(50), -- 'full', 'fast', 'basic' metadata JSONB DEFAULT '{}', -- Constraints CONSTRAINT valid_status CHECK (status IN ('in_progress', 'completed', 'failed')), CONSTRAINT valid_analysis_type CHECK (analysis_type IN ('full', 'fast', 'basic') OR analysis_type IS NULL) ); -- ================================================ -- PERFORMANCE INDEXES -- ================================================ -- Findings indexes CREATE INDEX IF NOT EXISTS idx_findings_run_module ON findings(run_id, module_name); CREATE INDEX IF NOT EXISTS idx_findings_severity ON findings(severity); CREATE INDEX IF NOT EXISTS idx_findings_category ON findings(category); CREATE INDEX IF NOT EXISTS idx_findings_file ON findings(file_path); CREATE INDEX IF NOT EXISTS idx_findings_created ON findings(created_at DESC); -- Metrics indexes CREATE INDEX IF NOT EXISTS idx_metrics_run_module ON metrics(run_id, module_name); CREATE INDEX IF NOT EXISTS idx_metrics_quality ON metrics(code_quality_rating); CREATE INDEX IF NOT EXISTS idx_metrics_security ON metrics(security_rating); CREATE INDEX IF NOT EXISTS idx_metrics_architecture ON metrics(architecture_rating); -- Report sections indexes CREATE INDEX IF NOT EXISTS idx_report_sections_run ON report_sections(run_id); CREATE INDEX IF NOT EXISTS idx_report_sections_name ON report_sections(section_name); CREATE INDEX IF NOT EXISTS idx_report_sections_generated ON report_sections(generated_at DESC); -- Analysis runs indexes CREATE INDEX IF NOT EXISTS idx_runs_repo ON analysis_runs(repository_id); CREATE INDEX IF NOT EXISTS idx_runs_status ON analysis_runs(status); CREATE INDEX IF NOT EXISTS idx_runs_started ON analysis_runs(started_at DESC); -- ================================================ -- HELPER FUNCTIONS -- ================================================ -- Function to get findings by module CREATE OR REPLACE FUNCTION get_findings_by_module( p_run_id VARCHAR(255), p_module_name VARCHAR(255) DEFAULT NULL ) RETURNS TABLE ( id INTEGER, module_name VARCHAR(255), severity VARCHAR(20), category VARCHAR(50), title VARCHAR(500), file_path TEXT, recommendation TEXT ) AS $$ BEGIN RETURN QUERY SELECT f.id, f.module_name, f.severity, f.category, f.title, f.file_path, f.recommendation FROM findings f WHERE f.run_id = p_run_id AND (p_module_name IS NULL OR f.module_name = p_module_name) ORDER BY CASE f.severity WHEN 'critical' THEN 1 WHEN 'high' THEN 2 WHEN 'medium' THEN 3 WHEN 'low' THEN 4 END, f.module_name; END; $$ LANGUAGE plpgsql; -- Function to aggregate metrics by module CREATE OR REPLACE FUNCTION aggregate_module_metrics( p_run_id VARCHAR(255) ) RETURNS TABLE ( module_name VARCHAR(255), avg_architecture_rating NUMERIC, avg_security_rating NUMERIC, avg_code_quality_rating NUMERIC, total_issues INTEGER, total_critical INTEGER, total_high INTEGER ) AS $$ BEGIN RETURN QUERY SELECT m.module_name, AVG(m.architecture_rating)::NUMERIC(5,2) as avg_architecture_rating, AVG(m.security_rating)::NUMERIC(5,2) as avg_security_rating, AVG(m.code_quality_rating)::NUMERIC(5,2) as avg_code_quality_rating, SUM(m.total_issues)::INTEGER as total_issues, SUM(m.critical_issues)::INTEGER as total_critical, SUM(m.high_issues)::INTEGER as total_high FROM metrics m WHERE m.run_id = p_run_id GROUP BY m.module_name ORDER BY m.module_name; END; $$ LANGUAGE plpgsql; -- Function to get security findings CREATE OR REPLACE FUNCTION get_security_findings( p_run_id VARCHAR(255), p_severity_filter VARCHAR(20) DEFAULT NULL ) RETURNS TABLE ( id INTEGER, module_name VARCHAR(255), severity VARCHAR(20), title VARCHAR(500), description TEXT, file_path TEXT, recommendation TEXT ) AS $$ BEGIN RETURN QUERY SELECT f.id, f.module_name, f.severity, f.title, f.description, f.file_path, f.recommendation FROM findings f WHERE f.run_id = p_run_id AND f.category = 'security' AND (p_severity_filter IS NULL OR f.severity = p_severity_filter) ORDER BY CASE f.severity WHEN 'critical' THEN 1 WHEN 'high' THEN 2 WHEN 'medium' THEN 3 WHEN 'low' THEN 4 END, f.module_name; END; $$ LANGUAGE plpgsql; -- ================================================ -- COMPLETION MESSAGE -- ================================================ DO $$ BEGIN RAISE NOTICE '================================================'; RAISE NOTICE 'Hierarchical Data Structure Schema Setup Complete'; RAISE NOTICE '================================================'; RAISE NOTICE 'Tables created: findings, metrics, report_sections, analysis_runs'; RAISE NOTICE 'Indexes created: Performance indexes for efficient querying'; RAISE NOTICE 'Functions created: get_findings_by_module, aggregate_module_metrics, get_security_findings'; RAISE NOTICE '================================================'; RAISE NOTICE 'Ready for hierarchical data structure storage'; RAISE NOTICE '================================================'; END $$;