codenuk_backend_mine/services/ai-analysis-service/002-hierarchical-schema.sql
2025-11-07 08:54:52 +05:30

267 lines
9.2 KiB
PL/PgSQL

-- ================================================
-- Hierarchical Data Structure Schema (Problem 4 Solution)
-- Version: 1.0
-- Description: PostgreSQL tables for structured findings, metrics, and report sections
-- ================================================
-- Findings table for structured issue/finding storage
CREATE TABLE IF NOT EXISTS findings (
id SERIAL PRIMARY KEY,
run_id VARCHAR(255) NOT NULL,
module_name VARCHAR(255) NOT NULL,
module_id VARCHAR(255), -- Link to MongoDB module_id
-- Issue details
severity VARCHAR(20) NOT NULL, -- critical, high, medium, low
category VARCHAR(50) NOT NULL, -- security, performance, code_quality, architecture
title VARCHAR(500) NOT NULL,
description TEXT,
file_path TEXT NOT NULL,
line_number INTEGER,
-- Impact and remediation
impact TEXT,
recommendation TEXT,
effort_estimate VARCHAR(20), -- low, medium, high
-- Timestamps
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
-- Constraints
CONSTRAINT valid_severity CHECK (severity IN ('critical', 'high', 'medium', 'low')),
CONSTRAINT valid_category CHECK (category IN ('security', 'performance', 'code_quality', 'architecture')),
CONSTRAINT valid_effort CHECK (effort_estimate IN ('low', 'medium', 'high') OR effort_estimate IS NULL)
);
-- Metrics table for code metrics aggregation
CREATE TABLE IF NOT EXISTS metrics (
id SERIAL PRIMARY KEY,
run_id VARCHAR(255) NOT NULL,
module_name VARCHAR(255) NOT NULL,
module_id VARCHAR(255), -- Link to MongoDB module_id
-- Code metrics
lines_of_code INTEGER DEFAULT 0,
cyclomatic_complexity FLOAT DEFAULT 0.0,
test_coverage FLOAT,
maintainability_index FLOAT,
-- Quality scores (1-5 scale)
architecture_rating INTEGER CHECK (architecture_rating >= 1 AND architecture_rating <= 5),
security_rating INTEGER CHECK (security_rating >= 1 AND security_rating <= 5),
code_quality_rating INTEGER CHECK (code_quality_rating >= 1 AND code_quality_rating <= 5),
performance_rating INTEGER CHECK (performance_rating >= 1 AND performance_rating <= 5),
-- Aggregates
total_issues INTEGER DEFAULT 0,
critical_issues INTEGER DEFAULT 0,
high_issues INTEGER DEFAULT 0,
medium_issues INTEGER DEFAULT 0,
low_issues INTEGER DEFAULT 0,
-- Timestamps
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
-- Report sections table for multi-audience content
CREATE TABLE IF NOT EXISTS report_sections (
id SERIAL PRIMARY KEY,
run_id VARCHAR(255) NOT NULL,
section_name VARCHAR(255) NOT NULL, -- 'overview', 'backend_analysis', 'security_analysis'
-- Multi-audience content
nontechnical_content TEXT, -- For executives
manager_content TEXT, -- For managers
technical_content TEXT, -- For developers
-- Metadata
generated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
model_version VARCHAR(50), -- Claude model version used
-- Constraints
CONSTRAINT valid_section_name CHECK (section_name IN ('overview', 'backend_analysis', 'frontend_analysis', 'security_analysis', 'architecture_analysis', 'performance_analysis', 'code_quality_analysis'))
);
-- Analysis runs table for tracking analysis sessions
CREATE TABLE IF NOT EXISTS analysis_runs (
id SERIAL PRIMARY KEY,
run_id VARCHAR(255) UNIQUE NOT NULL,
repository_id VARCHAR(255) NOT NULL,
session_id VARCHAR(255) NOT NULL,
-- Status
status VARCHAR(50) NOT NULL, -- 'in_progress', 'completed', 'failed'
started_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
completed_at TIMESTAMP WITH TIME ZONE,
-- Summary
total_modules INTEGER DEFAULT 0,
total_files INTEGER DEFAULT 0,
total_findings INTEGER DEFAULT 0,
-- Metadata
analysis_type VARCHAR(50), -- 'full', 'fast', 'basic'
metadata JSONB DEFAULT '{}',
-- Constraints
CONSTRAINT valid_status CHECK (status IN ('in_progress', 'completed', 'failed')),
CONSTRAINT valid_analysis_type CHECK (analysis_type IN ('full', 'fast', 'basic') OR analysis_type IS NULL)
);
-- ================================================
-- PERFORMANCE INDEXES
-- ================================================
-- Findings indexes
CREATE INDEX IF NOT EXISTS idx_findings_run_module ON findings(run_id, module_name);
CREATE INDEX IF NOT EXISTS idx_findings_severity ON findings(severity);
CREATE INDEX IF NOT EXISTS idx_findings_category ON findings(category);
CREATE INDEX IF NOT EXISTS idx_findings_file ON findings(file_path);
CREATE INDEX IF NOT EXISTS idx_findings_created ON findings(created_at DESC);
-- Metrics indexes
CREATE INDEX IF NOT EXISTS idx_metrics_run_module ON metrics(run_id, module_name);
CREATE INDEX IF NOT EXISTS idx_metrics_quality ON metrics(code_quality_rating);
CREATE INDEX IF NOT EXISTS idx_metrics_security ON metrics(security_rating);
CREATE INDEX IF NOT EXISTS idx_metrics_architecture ON metrics(architecture_rating);
-- Report sections indexes
CREATE INDEX IF NOT EXISTS idx_report_sections_run ON report_sections(run_id);
CREATE INDEX IF NOT EXISTS idx_report_sections_name ON report_sections(section_name);
CREATE INDEX IF NOT EXISTS idx_report_sections_generated ON report_sections(generated_at DESC);
-- Analysis runs indexes
CREATE INDEX IF NOT EXISTS idx_runs_repo ON analysis_runs(repository_id);
CREATE INDEX IF NOT EXISTS idx_runs_status ON analysis_runs(status);
CREATE INDEX IF NOT EXISTS idx_runs_started ON analysis_runs(started_at DESC);
-- ================================================
-- HELPER FUNCTIONS
-- ================================================
-- Function to get findings by module
CREATE OR REPLACE FUNCTION get_findings_by_module(
p_run_id VARCHAR(255),
p_module_name VARCHAR(255) DEFAULT NULL
)
RETURNS TABLE (
id INTEGER,
module_name VARCHAR(255),
severity VARCHAR(20),
category VARCHAR(50),
title VARCHAR(500),
file_path TEXT,
recommendation TEXT
) AS $$
BEGIN
RETURN QUERY
SELECT
f.id,
f.module_name,
f.severity,
f.category,
f.title,
f.file_path,
f.recommendation
FROM findings f
WHERE f.run_id = p_run_id
AND (p_module_name IS NULL OR f.module_name = p_module_name)
ORDER BY
CASE f.severity
WHEN 'critical' THEN 1
WHEN 'high' THEN 2
WHEN 'medium' THEN 3
WHEN 'low' THEN 4
END,
f.module_name;
END;
$$ LANGUAGE plpgsql;
-- Function to aggregate metrics by module
CREATE OR REPLACE FUNCTION aggregate_module_metrics(
p_run_id VARCHAR(255)
)
RETURNS TABLE (
module_name VARCHAR(255),
avg_architecture_rating NUMERIC,
avg_security_rating NUMERIC,
avg_code_quality_rating NUMERIC,
total_issues INTEGER,
total_critical INTEGER,
total_high INTEGER
) AS $$
BEGIN
RETURN QUERY
SELECT
m.module_name,
AVG(m.architecture_rating)::NUMERIC(5,2) as avg_architecture_rating,
AVG(m.security_rating)::NUMERIC(5,2) as avg_security_rating,
AVG(m.code_quality_rating)::NUMERIC(5,2) as avg_code_quality_rating,
SUM(m.total_issues)::INTEGER as total_issues,
SUM(m.critical_issues)::INTEGER as total_critical,
SUM(m.high_issues)::INTEGER as total_high
FROM metrics m
WHERE m.run_id = p_run_id
GROUP BY m.module_name
ORDER BY m.module_name;
END;
$$ LANGUAGE plpgsql;
-- Function to get security findings
CREATE OR REPLACE FUNCTION get_security_findings(
p_run_id VARCHAR(255),
p_severity_filter VARCHAR(20) DEFAULT NULL
)
RETURNS TABLE (
id INTEGER,
module_name VARCHAR(255),
severity VARCHAR(20),
title VARCHAR(500),
description TEXT,
file_path TEXT,
recommendation TEXT
) AS $$
BEGIN
RETURN QUERY
SELECT
f.id,
f.module_name,
f.severity,
f.title,
f.description,
f.file_path,
f.recommendation
FROM findings f
WHERE f.run_id = p_run_id
AND f.category = 'security'
AND (p_severity_filter IS NULL OR f.severity = p_severity_filter)
ORDER BY
CASE f.severity
WHEN 'critical' THEN 1
WHEN 'high' THEN 2
WHEN 'medium' THEN 3
WHEN 'low' THEN 4
END,
f.module_name;
END;
$$ LANGUAGE plpgsql;
-- ================================================
-- COMPLETION MESSAGE
-- ================================================
DO $$
BEGIN
RAISE NOTICE '================================================';
RAISE NOTICE 'Hierarchical Data Structure Schema Setup Complete';
RAISE NOTICE '================================================';
RAISE NOTICE 'Tables created: findings, metrics, report_sections, analysis_runs';
RAISE NOTICE 'Indexes created: Performance indexes for efficient querying';
RAISE NOTICE 'Functions created: get_findings_by_module, aggregate_module_metrics, get_security_findings';
RAISE NOTICE '================================================';
RAISE NOTICE 'Ready for hierarchical data structure storage';
RAISE NOTICE '================================================';
END
$$;