codenuk_backend_mine/services/ai-analysis-service/test_intelligent_chunking.py
2025-11-07 08:54:52 +05:30

319 lines
11 KiB
Python

#!/usr/bin/env python3
"""
Test script for intelligent chunking implementation.
Tests the logic without requiring actual API calls or database connections.
"""
import sys
from pathlib import Path
# Add current directory to path
sys.path.insert(0, str(Path(__file__).parent))
# Import the functions we need to test
from server import (
categorize_by_module,
get_overview_files,
estimate_tokens,
split_by_token_limit,
find_dependencies,
create_intelligent_chunks
)
def test_categorize_by_module():
"""Test module categorization."""
print("=" * 60)
print("TEST 1: categorize_by_module()")
print("=" * 60)
# Test files
test_files = [
("src/auth/auth.controller.js", "export class AuthController {}"),
("src/auth/auth.service.js", "export class AuthService {}"),
("src/auth/auth.middleware.js", "export function authMiddleware() {}"),
("src/products/product.model.js", "export class Product {}"),
("src/products/product.service.js", "export class ProductService {}"),
("src/orders/order.controller.js", "export class OrderController {}"),
("README.md", "# Project Documentation"),
("package.json", '{"name": "test-project"}'),
("index.js", "const app = require('./app');"),
("src/utils/helper.js", "export function helper() {}"),
("src/config/settings.js", "export const config = {};"),
]
result = categorize_by_module(test_files)
print(f"\n✅ Categorized {len(test_files)} files into {len(result)} modules:")
for module_name, files in result.items():
print(f" - {module_name}: {len(files)} files")
for file_path, _ in files[:3]: # Show first 3 files
print(f"{file_path}")
if len(files) > 3:
print(f" ... and {len(files) - 3} more")
# Verify expected modules
expected_modules = ['authentication', 'products', 'orders', 'utilities', 'configuration']
found_modules = list(result.keys())
print(f"\n📊 Module Detection:")
for expected in expected_modules:
status = "" if expected in found_modules else ""
print(f" {status} {expected}: {'Found' if expected in found_modules else 'Not found'}")
return result
def test_get_overview_files():
"""Test overview file detection."""
print("\n" + "=" * 60)
print("TEST 2: get_overview_files()")
print("=" * 60)
test_files = [
("README.md", "# Project"),
("package.json", '{"name": "test"}'),
("index.js", "console.log('hello');"),
("src/auth/controller.js", "export class Auth {}"),
("Dockerfile", "FROM node:18"),
("tsconfig.json", '{"compilerOptions": {}}'),
]
result = get_overview_files(test_files)
print(f"\n✅ Identified {len(result)} overview files:")
for file_path, _ in result:
print(f"{file_path}")
expected_overview = ['README.md', 'package.json', 'index.js', 'Dockerfile', 'tsconfig.json']
found_overview = [f[0].split('/')[-1] for f in result]
print(f"\n📊 Overview Detection:")
for expected in expected_overview:
status = "" if expected in found_overview else ""
print(f" {status} {expected}: {'Found' if expected in found_overview else 'Not found'}")
return result
def test_estimate_tokens():
"""Test token estimation."""
print("\n" + "=" * 60)
print("TEST 3: estimate_tokens()")
print("=" * 60)
test_files = [
("file1.js", "a" * 4000), # 4000 chars = ~1000 tokens
("file2.js", "b" * 8000), # 8000 chars = ~2000 tokens
("file3.js", "c" * 2000), # 2000 chars = ~500 tokens
]
result = estimate_tokens(test_files)
expected = (4000 + 8000 + 2000) // 4 # 3500 tokens
print(f"\n✅ Estimated tokens: {result}")
print(f" Expected: ~{expected} tokens")
print(f" Status: {'✅ PASS' if abs(result - expected) < 100 else '❌ FAIL'}")
return result
def test_split_by_token_limit():
"""Test token-based splitting."""
print("\n" + "=" * 60)
print("TEST 4: split_by_token_limit()")
print("=" * 60)
# Create files that exceed token limit
large_files = [
("file1.js", "a" * 8000), # ~2000 tokens
("file2.js", "b" * 8000), # ~2000 tokens
("file3.js", "c" * 8000), # ~2000 tokens
("file4.js", "d" * 8000), # ~2000 tokens
("file5.js", "e" * 8000), # ~2000 tokens
]
# Total: ~10000 tokens, should split at 15000 limit
result = split_by_token_limit(large_files, max_tokens=15000)
print(f"\n✅ Split {len(large_files)} files into {len(result)} sub-chunks:")
for i, sub_chunk in enumerate(result, 1):
tokens = estimate_tokens(sub_chunk)
print(f" Chunk {i}: {len(sub_chunk)} files, ~{tokens} tokens")
for file_path, _ in sub_chunk:
print(f"{file_path}")
return result
def test_create_intelligent_chunks():
"""Test complete intelligent chunking."""
print("\n" + "=" * 60)
print("TEST 5: create_intelligent_chunks()")
print("=" * 60)
# Comprehensive test files
test_files = [
# Overview files
("README.md", "# Project Documentation\n\nThis is a test project."),
("package.json", '{"name": "test-project", "version": "1.0.0"}'),
("index.js", "const app = require('./app');\napp.listen(3000);"),
# Authentication module
("src/auth/auth.controller.js", "export class AuthController {\n async login() {}\n}"),
("src/auth/auth.service.js", "export class AuthService {\n async validateUser() {}\n}"),
("src/auth/auth.middleware.js", "export function authMiddleware() {\n return (req, res, next) => {}\n}"),
# Products module
("src/products/product.model.js", "export class Product {\n constructor() {}\n}"),
("src/products/product.service.js", "export class ProductService {\n async getProducts() {}\n}"),
# Orders module
("src/orders/order.controller.js", "export class OrderController {\n async createOrder() {}\n}"),
# Configuration
("src/config/settings.js", "export const config = {\n port: 3000\n};"),
# Utils
("src/utils/helper.js", "export function helper() {\n return true;\n}"),
]
chunks = create_intelligent_chunks(test_files)
print(f"\n✅ Created {len(chunks)} intelligent chunks from {len(test_files)} files:")
print()
for chunk in chunks:
chunk_id = chunk.get('id', 'unknown')
chunk_name = chunk.get('name', 'unknown')
chunk_type = chunk.get('chunk_type', 'unknown')
chunk_priority = chunk.get('priority', 0)
files = chunk.get('files', [])
deps = chunk.get('context_dependencies', [])
print(f"📦 {chunk_id}: {chunk_name} ({chunk_type}) [Priority: {chunk_priority}]")
print(f" Files: {len(files)}")
print(f" Dependencies: {len(deps)}")
for file_path, _ in files:
print(f"{file_path}")
print()
# Verify structure
print("📊 Structure Verification:")
print(f" ✅ Total chunks: {len(chunks)}")
# Check for overview chunk
overview_chunks = [c for c in chunks if c.get('chunk_type') == 'overview']
print(f" ✅ Overview chunks: {len(overview_chunks)} (expected: 1)")
# Check for module chunks
module_chunks = [c for c in chunks if c.get('chunk_type') == 'module']
print(f" ✅ Module chunks: {len(module_chunks)}")
# Verify chunk IDs are sequential
chunk_ids = [c.get('id') for c in chunks]
print(f" ✅ Chunk IDs: {chunk_ids}")
# Verify no duplicate files
all_files = []
for chunk in chunks:
for file_path, _ in chunk.get('files', []):
all_files.append(file_path)
duplicates = [f for f in all_files if all_files.count(f) > 1]
if duplicates:
print(f" ❌ Duplicate files found: {duplicates}")
else:
print(f" ✅ No duplicate files (all {len(all_files)} files unique)")
return chunks
def test_chunk_structure():
"""Test that chunks have correct structure."""
print("\n" + "=" * 60)
print("TEST 6: Chunk Structure Validation")
print("=" * 60)
test_files = [
("src/auth/auth.controller.js", "export class AuthController {}"),
("src/auth/auth.service.js", "export class AuthService {}"),
("README.md", "# Project"),
("package.json", '{"name": "test"}'),
]
chunks = create_intelligent_chunks(test_files)
required_fields = ['id', 'name', 'priority', 'files', 'context_dependencies', 'chunk_type']
print("\n✅ Validating chunk structure:")
for i, chunk in enumerate(chunks, 1):
print(f"\n Chunk {i}:")
for field in required_fields:
status = "" if field in chunk else ""
value = chunk.get(field, 'MISSING')
print(f" {status} {field}: {type(value).__name__} = {value}")
# Verify files is a list of tuples
files = chunk.get('files', [])
if files:
first_file = files[0]
if isinstance(first_file, tuple) and len(first_file) == 2:
print(f" ✅ files: List of (file_path, content) tuples")
else:
print(f" ❌ files: Invalid format - {type(first_file)}")
return chunks
def run_all_tests():
"""Run all tests."""
print("\n" + "=" * 60)
print("INTELLIGENT CHUNKING - COMPREHENSIVE TEST SUITE")
print("=" * 60)
try:
# Test 1: Module categorization
categorized = test_categorize_by_module()
assert len(categorized) > 0, "Module categorization failed"
# Test 2: Overview files
overview = test_get_overview_files()
assert len(overview) > 0, "Overview file detection failed"
# Test 3: Token estimation
tokens = test_estimate_tokens()
assert tokens > 0, "Token estimation failed"
# Test 4: Token-based splitting
split_chunks = test_split_by_token_limit()
assert len(split_chunks) > 0, "Token splitting failed"
# Test 5: Complete chunking
chunks = test_create_intelligent_chunks()
assert len(chunks) > 0, "Intelligent chunking failed"
# Test 6: Structure validation
validated_chunks = test_chunk_structure()
assert len(validated_chunks) > 0, "Structure validation failed"
print("\n" + "=" * 60)
print("✅ ALL TESTS PASSED!")
print("=" * 60)
print("\n📊 Summary:")
print(f" • Module categorization: ✅")
print(f" • Overview file detection: ✅")
print(f" • Token estimation: ✅")
print(f" • Token-based splitting: ✅")
print(f" • Intelligent chunking: ✅")
print(f" • Structure validation: ✅")
print("\n🎉 Intelligent chunking implementation is working correctly!")
return True
except Exception as e:
print("\n" + "=" * 60)
print(f"❌ TEST FAILED: {e}")
print("=" * 60)
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
success = run_all_tests()
sys.exit(0 if success else 1)