613 lines
22 KiB
PL/PgSQL
613 lines
22 KiB
PL/PgSQL
-- ================================================
|
|
-- Repository Analyzer Memory System Database Migration
|
|
-- Version: 1.0
|
|
-- Description: Complete database setup for AI memory system
|
|
-- ================================================
|
|
|
|
-- Enable required extensions
|
|
CREATE EXTENSION IF NOT EXISTS vector;
|
|
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
|
|
|
-- ================================================
|
|
-- CORE TABLES
|
|
-- ================================================
|
|
|
|
-- Code embeddings table for semantic search of analyzed code
|
|
CREATE TABLE IF NOT EXISTS code_embeddings (
|
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
repo_id VARCHAR(255) NOT NULL,
|
|
file_path TEXT NOT NULL,
|
|
content_hash VARCHAR(64) NOT NULL,
|
|
embedding vector(384) NOT NULL,
|
|
metadata JSONB DEFAULT '{}',
|
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
last_accessed TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
access_count INTEGER DEFAULT 0,
|
|
|
|
-- Ensure uniqueness per repo/file/hash combination
|
|
CONSTRAINT unique_code_analysis UNIQUE(repo_id, file_path, content_hash)
|
|
);
|
|
|
|
-- Query embeddings for episodic memory (user interactions)
|
|
CREATE TABLE IF NOT EXISTS query_embeddings (
|
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
session_id VARCHAR(255) NOT NULL,
|
|
query_text TEXT NOT NULL,
|
|
query_embedding vector(384) NOT NULL,
|
|
response_embedding vector(384),
|
|
repo_context VARCHAR(255),
|
|
timestamp TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
metadata JSONB DEFAULT '{}',
|
|
|
|
-- Index for session-based queries
|
|
CONSTRAINT valid_session_id CHECK (LENGTH(session_id) > 0)
|
|
);
|
|
|
|
-- Persistent knowledge embeddings for long-term learning
|
|
CREATE TABLE IF NOT EXISTS knowledge_embeddings (
|
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
fact_id VARCHAR(255) UNIQUE NOT NULL,
|
|
content TEXT NOT NULL,
|
|
category VARCHAR(100) NOT NULL,
|
|
embedding vector(384) NOT NULL,
|
|
confidence REAL DEFAULT 1.0 CHECK (confidence >= 0.0 AND confidence <= 1.0),
|
|
source_repos TEXT[] DEFAULT '{}',
|
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
last_accessed TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
access_frequency INTEGER DEFAULT 0,
|
|
|
|
-- Ensure valid categories
|
|
CONSTRAINT valid_category CHECK (category IN ('code_pattern', 'best_practice', 'vulnerability', 'architecture', 'security_vulnerability', 'performance'))
|
|
);
|
|
|
|
-- Repository metadata for tracking analyzed repositories
|
|
CREATE TABLE IF NOT EXISTS repository_metadata (
|
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
repo_id VARCHAR(255) UNIQUE NOT NULL,
|
|
repo_path TEXT NOT NULL,
|
|
repo_name VARCHAR(500),
|
|
primary_language VARCHAR(100),
|
|
total_files INTEGER DEFAULT 0,
|
|
total_lines INTEGER DEFAULT 0,
|
|
last_analyzed TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
analysis_count INTEGER DEFAULT 0,
|
|
quality_score REAL DEFAULT 5.0 CHECK (quality_score >= 0.0 AND quality_score <= 10.0),
|
|
metadata JSONB DEFAULT '{}'
|
|
);
|
|
|
|
-- Session tracking for episodic memory correlation
|
|
CREATE TABLE IF NOT EXISTS analysis_sessions (
|
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
session_id VARCHAR(255) UNIQUE NOT NULL,
|
|
user_identifier VARCHAR(255),
|
|
start_time TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
end_time TIMESTAMP WITH TIME ZONE,
|
|
total_queries INTEGER DEFAULT 0,
|
|
repositories_analyzed TEXT[] DEFAULT '{}',
|
|
session_metadata JSONB DEFAULT '{}'
|
|
);
|
|
|
|
-- File analysis history for change tracking
|
|
CREATE TABLE IF NOT EXISTS file_analysis_history (
|
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
repo_id VARCHAR(255) NOT NULL,
|
|
file_path TEXT NOT NULL,
|
|
content_hash VARCHAR(64) NOT NULL,
|
|
language VARCHAR(100),
|
|
lines_of_code INTEGER DEFAULT 0,
|
|
complexity_score REAL DEFAULT 0.0,
|
|
severity_score REAL DEFAULT 5.0 CHECK (severity_score >= 0.0 AND severity_score <= 10.0),
|
|
issues_count INTEGER DEFAULT 0,
|
|
analyzed_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
analysis_version VARCHAR(50) DEFAULT '1.0'
|
|
);
|
|
|
|
-- Memory consolidation log for tracking knowledge extraction
|
|
CREATE TABLE IF NOT EXISTS memory_consolidation_log (
|
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
source_type VARCHAR(50) NOT NULL, -- 'episodic', 'code_analysis', 'manual'
|
|
source_id VARCHAR(255) NOT NULL,
|
|
target_memory_type VARCHAR(50) NOT NULL, -- 'persistent', 'working'
|
|
target_id VARCHAR(255),
|
|
consolidation_confidence REAL DEFAULT 0.5,
|
|
consolidation_timestamp TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
consolidation_metadata JSONB DEFAULT '{}'
|
|
);
|
|
|
|
-- ================================================
|
|
-- PERFORMANCE INDEXES
|
|
-- ================================================
|
|
|
|
-- Code embeddings indexes
|
|
CREATE INDEX IF NOT EXISTS idx_code_embeddings_repo_id ON code_embeddings(repo_id);
|
|
CREATE INDEX IF NOT EXISTS idx_code_embeddings_file_path ON code_embeddings(file_path);
|
|
CREATE INDEX IF NOT EXISTS idx_code_embeddings_accessed ON code_embeddings(last_accessed DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_code_embeddings_metadata ON code_embeddings USING gin(metadata);
|
|
|
|
-- Vector similarity indexes (using IVFFlat for better performance)
|
|
CREATE INDEX IF NOT EXISTS idx_code_embeddings_vector
|
|
ON code_embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
|
|
|
|
-- Query embeddings indexes
|
|
CREATE INDEX IF NOT EXISTS idx_query_embeddings_session ON query_embeddings(session_id);
|
|
CREATE INDEX IF NOT EXISTS idx_query_embeddings_timestamp ON query_embeddings(timestamp DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_query_embeddings_repo_context ON query_embeddings(repo_context);
|
|
CREATE INDEX IF NOT EXISTS idx_query_embeddings_vector
|
|
ON query_embeddings USING ivfflat (query_embedding vector_cosine_ops) WITH (lists = 100);
|
|
|
|
-- Knowledge embeddings indexes
|
|
CREATE INDEX IF NOT EXISTS idx_knowledge_embeddings_category ON knowledge_embeddings(category);
|
|
CREATE INDEX IF NOT EXISTS idx_knowledge_embeddings_confidence ON knowledge_embeddings(confidence DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_knowledge_embeddings_access_freq ON knowledge_embeddings(access_frequency DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_knowledge_embeddings_vector
|
|
ON knowledge_embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
|
|
CREATE INDEX IF NOT EXISTS idx_knowledge_source_repos ON knowledge_embeddings USING gin(source_repos);
|
|
|
|
-- Repository metadata indexes
|
|
CREATE INDEX IF NOT EXISTS idx_repository_metadata_repo_id ON repository_metadata(repo_id);
|
|
CREATE INDEX IF NOT EXISTS idx_repository_metadata_analyzed ON repository_metadata(last_analyzed DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_repository_metadata_language ON repository_metadata(primary_language);
|
|
|
|
-- File history indexes
|
|
CREATE INDEX IF NOT EXISTS idx_file_history_repo_file ON file_analysis_history(repo_id, file_path);
|
|
CREATE INDEX IF NOT EXISTS idx_file_history_analyzed ON file_analysis_history(analyzed_at DESC);
|
|
CREATE INDEX IF NOT EXISTS idx_file_history_severity ON file_analysis_history(severity_score);
|
|
|
|
-- ================================================
|
|
-- MATERIALIZED VIEWS FOR COMMON QUERIES
|
|
-- ================================================
|
|
|
|
-- High confidence knowledge view
|
|
CREATE MATERIALIZED VIEW IF NOT EXISTS high_confidence_knowledge AS
|
|
SELECT
|
|
fact_id,
|
|
content,
|
|
category,
|
|
confidence,
|
|
source_repos,
|
|
created_at,
|
|
last_accessed,
|
|
access_frequency
|
|
FROM knowledge_embeddings
|
|
WHERE confidence > 0.8
|
|
ORDER BY confidence DESC, access_frequency DESC;
|
|
|
|
CREATE INDEX ON high_confidence_knowledge (category);
|
|
CREATE INDEX ON high_confidence_knowledge (confidence DESC);
|
|
|
|
-- Repository quality summary view
|
|
CREATE MATERIALIZED VIEW IF NOT EXISTS repository_quality_summary AS
|
|
SELECT
|
|
rm.repo_id,
|
|
rm.repo_path,
|
|
rm.repo_name,
|
|
rm.primary_language,
|
|
rm.total_files,
|
|
rm.total_lines,
|
|
rm.quality_score,
|
|
rm.last_analyzed,
|
|
COUNT(ce.id) as total_embeddings,
|
|
AVG(fah.severity_score) as avg_file_quality,
|
|
COUNT(DISTINCT fah.file_path) as analyzed_files_count
|
|
FROM repository_metadata rm
|
|
LEFT JOIN code_embeddings ce ON rm.repo_id = ce.repo_id
|
|
LEFT JOIN file_analysis_history fah ON rm.repo_id = fah.repo_id
|
|
GROUP BY rm.repo_id, rm.repo_path, rm.repo_name, rm.primary_language,
|
|
rm.total_files, rm.total_lines, rm.quality_score, rm.last_analyzed;
|
|
|
|
CREATE INDEX ON repository_quality_summary (quality_score DESC);
|
|
CREATE INDEX ON repository_quality_summary (last_analyzed DESC);
|
|
|
|
-- Recent activity view
|
|
CREATE MATERIALIZED VIEW IF NOT EXISTS recent_activity AS
|
|
SELECT
|
|
'query' as activity_type,
|
|
session_id as identifier,
|
|
query_text as description,
|
|
timestamp as activity_time,
|
|
repo_context
|
|
FROM query_embeddings
|
|
WHERE timestamp >= CURRENT_TIMESTAMP - INTERVAL '7 days'
|
|
UNION ALL
|
|
SELECT
|
|
'analysis' as activity_type,
|
|
repo_id as identifier,
|
|
file_path as description,
|
|
analyzed_at as activity_time,
|
|
repo_id as repo_context
|
|
FROM file_analysis_history
|
|
WHERE analyzed_at >= CURRENT_TIMESTAMP - INTERVAL '7 days'
|
|
ORDER BY activity_time DESC;
|
|
|
|
CREATE INDEX ON recent_activity (activity_time DESC);
|
|
CREATE INDEX ON recent_activity (activity_type);
|
|
|
|
-- ================================================
|
|
-- STORED FUNCTIONS AND PROCEDURES
|
|
-- ================================================
|
|
|
|
-- Function to refresh all materialized views
|
|
CREATE OR REPLACE FUNCTION refresh_memory_views()
|
|
RETURNS void AS $$
|
|
BEGIN
|
|
REFRESH MATERIALIZED VIEW CONCURRENTLY high_confidence_knowledge;
|
|
REFRESH MATERIALIZED VIEW CONCURRENTLY repository_quality_summary;
|
|
REFRESH MATERIALIZED VIEW CONCURRENTLY recent_activity;
|
|
|
|
-- Log the refresh
|
|
INSERT INTO memory_consolidation_log (
|
|
source_type, source_id, target_memory_type, target_id,
|
|
consolidation_confidence, consolidation_metadata
|
|
) VALUES (
|
|
'system', 'materialized_views', 'system', 'view_refresh',
|
|
1.0, '{"refresh_time": "' || CURRENT_TIMESTAMP || '"}'::jsonb
|
|
);
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- Function to calculate semantic similarity between texts
|
|
CREATE OR REPLACE FUNCTION calculate_similarity(embedding1 vector(384), embedding2 vector(384))
|
|
RETURNS real AS $$
|
|
BEGIN
|
|
RETURN 1 - (embedding1 <=> embedding2);
|
|
END;
|
|
$$ LANGUAGE plpgsql IMMUTABLE STRICT;
|
|
|
|
-- Function to update access patterns
|
|
CREATE OR REPLACE FUNCTION update_access_pattern(table_name text, id_column text, id_value text)
|
|
RETURNS void AS $$
|
|
BEGIN
|
|
CASE table_name
|
|
WHEN 'knowledge_embeddings' THEN
|
|
EXECUTE 'UPDATE knowledge_embeddings SET last_accessed = CURRENT_TIMESTAMP, access_frequency = access_frequency + 1 WHERE fact_id = $1'
|
|
USING id_value;
|
|
WHEN 'code_embeddings' THEN
|
|
EXECUTE 'UPDATE code_embeddings SET last_accessed = CURRENT_TIMESTAMP, access_count = access_count + 1 WHERE id = $1::uuid'
|
|
USING id_value;
|
|
ELSE
|
|
RAISE EXCEPTION 'Unsupported table: %', table_name;
|
|
END CASE;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- Function to cleanup old memories
|
|
CREATE OR REPLACE FUNCTION cleanup_old_memories(retention_days integer DEFAULT 365)
|
|
RETURNS integer AS $$
|
|
DECLARE
|
|
deleted_count integer := 0;
|
|
cutoff_date timestamp;
|
|
BEGIN
|
|
cutoff_date := CURRENT_TIMESTAMP - (retention_days || ' days')::interval;
|
|
|
|
-- Delete old query embeddings (episodic memories)
|
|
DELETE FROM query_embeddings WHERE timestamp < cutoff_date;
|
|
GET DIAGNOSTICS deleted_count = ROW_COUNT;
|
|
|
|
-- Update knowledge confidence based on access patterns
|
|
UPDATE knowledge_embeddings
|
|
SET confidence = LEAST(confidence * (
|
|
CASE
|
|
WHEN EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - last_accessed)) / 86400 < 30
|
|
THEN 1.05
|
|
ELSE 0.98
|
|
END *
|
|
(1.0 + LOG(access_frequency + 1) / 20.0)
|
|
), 1.0);
|
|
|
|
-- Log cleanup activity
|
|
INSERT INTO memory_consolidation_log (
|
|
source_type, source_id, target_memory_type, target_id,
|
|
consolidation_confidence, consolidation_metadata
|
|
) VALUES (
|
|
'system', 'cleanup_function', 'system', 'memory_cleanup',
|
|
1.0, ('{"deleted_records": ' || deleted_count || ', "cutoff_date": "' || cutoff_date || '"}')::jsonb
|
|
);
|
|
|
|
RETURN deleted_count;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- Function to find similar code patterns
|
|
CREATE OR REPLACE FUNCTION find_similar_code(
|
|
query_embedding vector(384),
|
|
repo_filter text DEFAULT NULL,
|
|
similarity_threshold real DEFAULT 0.7,
|
|
max_results integer DEFAULT 10
|
|
)
|
|
RETURNS TABLE (
|
|
id uuid,
|
|
repo_id varchar(255),
|
|
file_path text,
|
|
similarity real,
|
|
metadata jsonb
|
|
) AS $$
|
|
BEGIN
|
|
RETURN QUERY
|
|
SELECT
|
|
ce.id,
|
|
ce.repo_id,
|
|
ce.file_path,
|
|
(1 - (ce.embedding <=> query_embedding))::real as similarity,
|
|
ce.metadata
|
|
FROM code_embeddings ce
|
|
WHERE (repo_filter IS NULL OR ce.repo_id = repo_filter)
|
|
AND (1 - (ce.embedding <=> query_embedding)) > similarity_threshold
|
|
ORDER BY similarity DESC
|
|
LIMIT max_results;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- Function to get knowledge by category
|
|
CREATE OR REPLACE FUNCTION get_knowledge_by_category(
|
|
category_filter varchar(100),
|
|
min_confidence real DEFAULT 0.5,
|
|
max_results integer DEFAULT 20
|
|
)
|
|
RETURNS TABLE (
|
|
fact_id varchar(255),
|
|
content text,
|
|
confidence real,
|
|
access_frequency integer,
|
|
source_repos text[]
|
|
) AS $$
|
|
BEGIN
|
|
RETURN QUERY
|
|
SELECT
|
|
ke.fact_id,
|
|
ke.content,
|
|
ke.confidence,
|
|
ke.access_frequency,
|
|
ke.source_repos
|
|
FROM knowledge_embeddings ke
|
|
WHERE ke.category = category_filter
|
|
AND ke.confidence >= min_confidence
|
|
ORDER BY ke.confidence DESC, ke.access_frequency DESC
|
|
LIMIT max_results;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- ================================================
|
|
-- TRIGGERS FOR AUTOMATIC MAINTENANCE
|
|
-- ================================================
|
|
|
|
-- Trigger function to update repository metadata when embeddings are added
|
|
CREATE OR REPLACE FUNCTION update_repository_stats()
|
|
RETURNS trigger AS $$
|
|
BEGIN
|
|
-- Update or insert repository metadata
|
|
INSERT INTO repository_metadata (repo_id, repo_path, analysis_count, last_analyzed)
|
|
VALUES (NEW.repo_id, NEW.repo_id, 1, CURRENT_TIMESTAMP)
|
|
ON CONFLICT (repo_id)
|
|
DO UPDATE SET
|
|
analysis_count = repository_metadata.analysis_count + 1,
|
|
last_analyzed = CURRENT_TIMESTAMP;
|
|
|
|
RETURN NEW;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- Create triggers
|
|
DROP TRIGGER IF EXISTS trigger_update_repo_stats ON code_embeddings;
|
|
CREATE TRIGGER trigger_update_repo_stats
|
|
AFTER INSERT ON code_embeddings
|
|
FOR EACH ROW
|
|
EXECUTE FUNCTION update_repository_stats();
|
|
|
|
-- Trigger to automatically update access patterns
|
|
CREATE OR REPLACE FUNCTION auto_update_access()
|
|
RETURNS trigger AS $$
|
|
BEGIN
|
|
NEW.last_accessed = CURRENT_TIMESTAMP;
|
|
NEW.access_count = COALESCE(OLD.access_count, 0) + 1;
|
|
RETURN NEW;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
DROP TRIGGER IF EXISTS trigger_auto_access_update ON code_embeddings;
|
|
CREATE TRIGGER trigger_auto_access_update
|
|
BEFORE UPDATE ON code_embeddings
|
|
FOR EACH ROW
|
|
EXECUTE FUNCTION auto_update_access();
|
|
|
|
-- ================================================
|
|
-- SECURITY AND PERMISSIONS
|
|
-- ================================================
|
|
|
|
-- Create roles for different access levels
|
|
DO $$
|
|
BEGIN
|
|
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'repo_analyzer_read') THEN
|
|
CREATE ROLE repo_analyzer_read;
|
|
END IF;
|
|
|
|
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'repo_analyzer_write') THEN
|
|
CREATE ROLE repo_analyzer_write;
|
|
END IF;
|
|
|
|
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'repo_analyzer_admin') THEN
|
|
CREATE ROLE repo_analyzer_admin;
|
|
END IF;
|
|
END
|
|
$$;
|
|
|
|
-- Grant permissions
|
|
GRANT SELECT ON ALL TABLES IN SCHEMA public TO repo_analyzer_read;
|
|
GRANT SELECT ON high_confidence_knowledge TO repo_analyzer_read;
|
|
GRANT SELECT ON repository_quality_summary TO repo_analyzer_read;
|
|
GRANT SELECT ON recent_activity TO repo_analyzer_read;
|
|
|
|
GRANT SELECT, INSERT, UPDATE ON ALL TABLES IN SCHEMA public TO repo_analyzer_write;
|
|
GRANT SELECT ON high_confidence_knowledge TO repo_analyzer_write;
|
|
GRANT SELECT ON repository_quality_summary TO repo_analyzer_write;
|
|
GRANT SELECT ON recent_activity TO repo_analyzer_write;
|
|
GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public TO repo_analyzer_write;
|
|
|
|
GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO repo_analyzer_admin;
|
|
GRANT ALL PRIVILEGES ON high_confidence_knowledge TO repo_analyzer_admin;
|
|
GRANT ALL PRIVILEGES ON repository_quality_summary TO repo_analyzer_admin;
|
|
GRANT ALL PRIVILEGES ON recent_activity TO repo_analyzer_admin;
|
|
GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO repo_analyzer_admin;
|
|
GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO repo_analyzer_admin;
|
|
|
|
-- ================================================
|
|
-- DATA VALIDATION AND CONSTRAINTS
|
|
-- ================================================
|
|
|
|
-- Add check constraints for data quality
|
|
-- Note: Vector dimensions are validated at insertion time, no need for runtime checks
|
|
|
|
-- Add constraints for reasonable data ranges
|
|
DO $$
|
|
BEGIN
|
|
IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'reasonable_lines_of_code') THEN
|
|
ALTER TABLE file_analysis_history ADD CONSTRAINT reasonable_lines_of_code
|
|
CHECK (lines_of_code >= 0 AND lines_of_code <= 1000000);
|
|
END IF;
|
|
|
|
IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'reasonable_complexity') THEN
|
|
ALTER TABLE file_analysis_history ADD CONSTRAINT reasonable_complexity
|
|
CHECK (complexity_score >= 0.0 AND complexity_score <= 100.0);
|
|
END IF;
|
|
END
|
|
$$;
|
|
|
|
-- ================================================
|
|
-- INITIAL DATA AND CONFIGURATION
|
|
-- ================================================
|
|
|
|
-- Insert initial system configuration
|
|
INSERT INTO memory_consolidation_log (
|
|
source_type, source_id, target_memory_type, target_id,
|
|
consolidation_confidence, consolidation_metadata
|
|
) VALUES (
|
|
'system', 'database_migration', 'system', 'initial_setup',
|
|
1.0, ('{"migration_version": "1.0", "setup_time": "' || CURRENT_TIMESTAMP || '"}')::jsonb
|
|
) ON CONFLICT DO NOTHING;
|
|
|
|
-- Create initial knowledge categories
|
|
INSERT INTO knowledge_embeddings (
|
|
fact_id, content, category, embedding, confidence, source_repos
|
|
) VALUES
|
|
(
|
|
'init_security_001',
|
|
'Always validate and sanitize user input to prevent injection attacks',
|
|
'security_vulnerability',
|
|
array_fill(0.0, ARRAY[384])::vector(384),
|
|
0.95,
|
|
ARRAY[]::text[]
|
|
),
|
|
(
|
|
'init_performance_001',
|
|
'Use appropriate data structures and algorithms for better performance',
|
|
'performance',
|
|
array_fill(0.0, ARRAY[384])::vector(384),
|
|
0.9,
|
|
ARRAY[]::text[]
|
|
),
|
|
(
|
|
'init_best_practice_001',
|
|
'Follow consistent naming conventions and code formatting standards',
|
|
'best_practice',
|
|
array_fill(0.0, ARRAY[384])::vector(384),
|
|
0.85,
|
|
ARRAY[]::text[]
|
|
)
|
|
ON CONFLICT (fact_id) DO NOTHING;
|
|
|
|
-- ================================================
|
|
-- BACKUP AND MAINTENANCE PROCEDURES
|
|
-- ================================================
|
|
|
|
-- Function to create backup of critical memory data
|
|
CREATE OR REPLACE FUNCTION backup_memory_data(backup_path text DEFAULT '/tmp/memory_backup')
|
|
RETURNS text AS $$
|
|
DECLARE
|
|
backup_file text;
|
|
result_message text;
|
|
BEGIN
|
|
backup_file := backup_path || '_' || to_char(CURRENT_TIMESTAMP, 'YYYY-MM-DD_HH24-MI-SS') || '.sql';
|
|
|
|
-- This would need to be implemented with actual backup logic
|
|
-- For now, just return the intended backup file name
|
|
result_message := 'Backup would be created at: ' || backup_file;
|
|
|
|
-- Log backup activity
|
|
INSERT INTO memory_consolidation_log (
|
|
source_type, source_id, target_memory_type, target_id,
|
|
consolidation_confidence, consolidation_metadata
|
|
) VALUES (
|
|
'system', 'backup_function', 'system', 'backup_created',
|
|
1.0, ('{"backup_file": "' || backup_file || '"}')::jsonb
|
|
);
|
|
|
|
RETURN result_message;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- ================================================
|
|
-- MONITORING AND ANALYTICS
|
|
-- ================================================
|
|
|
|
-- View for system health monitoring
|
|
CREATE OR REPLACE VIEW system_health_monitor AS
|
|
SELECT
|
|
'code_embeddings' as table_name,
|
|
COUNT(*) as record_count,
|
|
MAX(created_at) as latest_record,
|
|
AVG(access_count) as avg_access_count
|
|
FROM code_embeddings
|
|
UNION ALL
|
|
SELECT
|
|
'query_embeddings' as table_name,
|
|
COUNT(*) as record_count,
|
|
MAX(timestamp) as latest_record,
|
|
NULL as avg_access_count
|
|
FROM query_embeddings
|
|
UNION ALL
|
|
SELECT
|
|
'knowledge_embeddings' as table_name,
|
|
COUNT(*) as record_count,
|
|
MAX(created_at) as latest_record,
|
|
AVG(access_frequency) as avg_access_count
|
|
FROM knowledge_embeddings;
|
|
|
|
-- Function to get comprehensive system statistics
|
|
CREATE OR REPLACE FUNCTION get_system_statistics()
|
|
RETURNS jsonb AS $$
|
|
DECLARE
|
|
stats jsonb;
|
|
BEGIN
|
|
SELECT jsonb_build_object(
|
|
'total_code_embeddings', (SELECT COUNT(*) FROM code_embeddings),
|
|
'total_query_embeddings', (SELECT COUNT(*) FROM query_embeddings),
|
|
'total_knowledge_embeddings', (SELECT COUNT(*) FROM knowledge_embeddings),
|
|
'unique_repositories', (SELECT COUNT(DISTINCT repo_id) FROM code_embeddings),
|
|
'high_confidence_knowledge', (SELECT COUNT(*) FROM knowledge_embeddings WHERE confidence > 0.8),
|
|
'recent_activity_7d', (SELECT COUNT(*) FROM query_embeddings WHERE timestamp >= CURRENT_TIMESTAMP - INTERVAL '7 days'),
|
|
'average_code_quality', (SELECT AVG(quality_score) FROM repository_metadata),
|
|
'last_updated', CURRENT_TIMESTAMP
|
|
) INTO stats;
|
|
|
|
RETURN stats;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- ================================================
|
|
-- COMPLETION MESSAGE
|
|
-- ================================================
|
|
|
|
DO $$
|
|
BEGIN
|
|
RAISE NOTICE '================================================';
|
|
RAISE NOTICE 'Repository Analyzer Memory System Database Setup Complete';
|
|
RAISE NOTICE '================================================';
|
|
RAISE NOTICE 'Tables created: code_embeddings, query_embeddings, knowledge_embeddings';
|
|
RAISE NOTICE 'Indexes created: Vector similarity indexes with IVFFlat';
|
|
RAISE NOTICE 'Functions created: Similarity search, cleanup, statistics';
|
|
RAISE NOTICE 'Materialized views created: High confidence knowledge, repository summary';
|
|
RAISE NOTICE 'Triggers created: Auto-update repository stats and access patterns';
|
|
RAISE NOTICE '================================================';
|
|
RAISE NOTICE 'Ready for AI-enhanced repository analysis with persistent memory';
|
|
RAISE NOTICE '================================================';
|
|
END
|
|
$$; |