#!/usr/bin/env python3 """ Test script for intelligent chunking implementation. Tests the logic without requiring actual API calls or database connections. """ import sys from pathlib import Path # Add current directory to path sys.path.insert(0, str(Path(__file__).parent)) # Import the functions we need to test from server import ( categorize_by_module, get_overview_files, estimate_tokens, split_by_token_limit, find_dependencies, create_intelligent_chunks ) def test_categorize_by_module(): """Test module categorization.""" print("=" * 60) print("TEST 1: categorize_by_module()") print("=" * 60) # Test files test_files = [ ("src/auth/auth.controller.js", "export class AuthController {}"), ("src/auth/auth.service.js", "export class AuthService {}"), ("src/auth/auth.middleware.js", "export function authMiddleware() {}"), ("src/products/product.model.js", "export class Product {}"), ("src/products/product.service.js", "export class ProductService {}"), ("src/orders/order.controller.js", "export class OrderController {}"), ("README.md", "# Project Documentation"), ("package.json", '{"name": "test-project"}'), ("index.js", "const app = require('./app');"), ("src/utils/helper.js", "export function helper() {}"), ("src/config/settings.js", "export const config = {};"), ] result = categorize_by_module(test_files) print(f"\nāœ… Categorized {len(test_files)} files into {len(result)} modules:") for module_name, files in result.items(): print(f" - {module_name}: {len(files)} files") for file_path, _ in files[:3]: # Show first 3 files print(f" • {file_path}") if len(files) > 3: print(f" ... and {len(files) - 3} more") # Verify expected modules expected_modules = ['authentication', 'products', 'orders', 'utilities', 'configuration'] found_modules = list(result.keys()) print(f"\nšŸ“Š Module Detection:") for expected in expected_modules: status = "āœ…" if expected in found_modules else "āŒ" print(f" {status} {expected}: {'Found' if expected in found_modules else 'Not found'}") return result def test_get_overview_files(): """Test overview file detection.""" print("\n" + "=" * 60) print("TEST 2: get_overview_files()") print("=" * 60) test_files = [ ("README.md", "# Project"), ("package.json", '{"name": "test"}'), ("index.js", "console.log('hello');"), ("src/auth/controller.js", "export class Auth {}"), ("Dockerfile", "FROM node:18"), ("tsconfig.json", '{"compilerOptions": {}}'), ] result = get_overview_files(test_files) print(f"\nāœ… Identified {len(result)} overview files:") for file_path, _ in result: print(f" • {file_path}") expected_overview = ['README.md', 'package.json', 'index.js', 'Dockerfile', 'tsconfig.json'] found_overview = [f[0].split('/')[-1] for f in result] print(f"\nšŸ“Š Overview Detection:") for expected in expected_overview: status = "āœ…" if expected in found_overview else "āŒ" print(f" {status} {expected}: {'Found' if expected in found_overview else 'Not found'}") return result def test_estimate_tokens(): """Test token estimation.""" print("\n" + "=" * 60) print("TEST 3: estimate_tokens()") print("=" * 60) test_files = [ ("file1.js", "a" * 4000), # 4000 chars = ~1000 tokens ("file2.js", "b" * 8000), # 8000 chars = ~2000 tokens ("file3.js", "c" * 2000), # 2000 chars = ~500 tokens ] result = estimate_tokens(test_files) expected = (4000 + 8000 + 2000) // 4 # 3500 tokens print(f"\nāœ… Estimated tokens: {result}") print(f" Expected: ~{expected} tokens") print(f" Status: {'āœ… PASS' if abs(result - expected) < 100 else 'āŒ FAIL'}") return result def test_split_by_token_limit(): """Test token-based splitting.""" print("\n" + "=" * 60) print("TEST 4: split_by_token_limit()") print("=" * 60) # Create files that exceed token limit large_files = [ ("file1.js", "a" * 8000), # ~2000 tokens ("file2.js", "b" * 8000), # ~2000 tokens ("file3.js", "c" * 8000), # ~2000 tokens ("file4.js", "d" * 8000), # ~2000 tokens ("file5.js", "e" * 8000), # ~2000 tokens ] # Total: ~10000 tokens, should split at 15000 limit result = split_by_token_limit(large_files, max_tokens=15000) print(f"\nāœ… Split {len(large_files)} files into {len(result)} sub-chunks:") for i, sub_chunk in enumerate(result, 1): tokens = estimate_tokens(sub_chunk) print(f" Chunk {i}: {len(sub_chunk)} files, ~{tokens} tokens") for file_path, _ in sub_chunk: print(f" • {file_path}") return result def test_create_intelligent_chunks(): """Test complete intelligent chunking.""" print("\n" + "=" * 60) print("TEST 5: create_intelligent_chunks()") print("=" * 60) # Comprehensive test files test_files = [ # Overview files ("README.md", "# Project Documentation\n\nThis is a test project."), ("package.json", '{"name": "test-project", "version": "1.0.0"}'), ("index.js", "const app = require('./app');\napp.listen(3000);"), # Authentication module ("src/auth/auth.controller.js", "export class AuthController {\n async login() {}\n}"), ("src/auth/auth.service.js", "export class AuthService {\n async validateUser() {}\n}"), ("src/auth/auth.middleware.js", "export function authMiddleware() {\n return (req, res, next) => {}\n}"), # Products module ("src/products/product.model.js", "export class Product {\n constructor() {}\n}"), ("src/products/product.service.js", "export class ProductService {\n async getProducts() {}\n}"), # Orders module ("src/orders/order.controller.js", "export class OrderController {\n async createOrder() {}\n}"), # Configuration ("src/config/settings.js", "export const config = {\n port: 3000\n};"), # Utils ("src/utils/helper.js", "export function helper() {\n return true;\n}"), ] chunks = create_intelligent_chunks(test_files) print(f"\nāœ… Created {len(chunks)} intelligent chunks from {len(test_files)} files:") print() for chunk in chunks: chunk_id = chunk.get('id', 'unknown') chunk_name = chunk.get('name', 'unknown') chunk_type = chunk.get('chunk_type', 'unknown') chunk_priority = chunk.get('priority', 0) files = chunk.get('files', []) deps = chunk.get('context_dependencies', []) print(f"šŸ“¦ {chunk_id}: {chunk_name} ({chunk_type}) [Priority: {chunk_priority}]") print(f" Files: {len(files)}") print(f" Dependencies: {len(deps)}") for file_path, _ in files: print(f" • {file_path}") print() # Verify structure print("šŸ“Š Structure Verification:") print(f" āœ… Total chunks: {len(chunks)}") # Check for overview chunk overview_chunks = [c for c in chunks if c.get('chunk_type') == 'overview'] print(f" āœ… Overview chunks: {len(overview_chunks)} (expected: 1)") # Check for module chunks module_chunks = [c for c in chunks if c.get('chunk_type') == 'module'] print(f" āœ… Module chunks: {len(module_chunks)}") # Verify chunk IDs are sequential chunk_ids = [c.get('id') for c in chunks] print(f" āœ… Chunk IDs: {chunk_ids}") # Verify no duplicate files all_files = [] for chunk in chunks: for file_path, _ in chunk.get('files', []): all_files.append(file_path) duplicates = [f for f in all_files if all_files.count(f) > 1] if duplicates: print(f" āŒ Duplicate files found: {duplicates}") else: print(f" āœ… No duplicate files (all {len(all_files)} files unique)") return chunks def test_chunk_structure(): """Test that chunks have correct structure.""" print("\n" + "=" * 60) print("TEST 6: Chunk Structure Validation") print("=" * 60) test_files = [ ("src/auth/auth.controller.js", "export class AuthController {}"), ("src/auth/auth.service.js", "export class AuthService {}"), ("README.md", "# Project"), ("package.json", '{"name": "test"}'), ] chunks = create_intelligent_chunks(test_files) required_fields = ['id', 'name', 'priority', 'files', 'context_dependencies', 'chunk_type'] print("\nāœ… Validating chunk structure:") for i, chunk in enumerate(chunks, 1): print(f"\n Chunk {i}:") for field in required_fields: status = "āœ…" if field in chunk else "āŒ" value = chunk.get(field, 'MISSING') print(f" {status} {field}: {type(value).__name__} = {value}") # Verify files is a list of tuples files = chunk.get('files', []) if files: first_file = files[0] if isinstance(first_file, tuple) and len(first_file) == 2: print(f" āœ… files: List of (file_path, content) tuples") else: print(f" āŒ files: Invalid format - {type(first_file)}") return chunks def run_all_tests(): """Run all tests.""" print("\n" + "=" * 60) print("INTELLIGENT CHUNKING - COMPREHENSIVE TEST SUITE") print("=" * 60) try: # Test 1: Module categorization categorized = test_categorize_by_module() assert len(categorized) > 0, "Module categorization failed" # Test 2: Overview files overview = test_get_overview_files() assert len(overview) > 0, "Overview file detection failed" # Test 3: Token estimation tokens = test_estimate_tokens() assert tokens > 0, "Token estimation failed" # Test 4: Token-based splitting split_chunks = test_split_by_token_limit() assert len(split_chunks) > 0, "Token splitting failed" # Test 5: Complete chunking chunks = test_create_intelligent_chunks() assert len(chunks) > 0, "Intelligent chunking failed" # Test 6: Structure validation validated_chunks = test_chunk_structure() assert len(validated_chunks) > 0, "Structure validation failed" print("\n" + "=" * 60) print("āœ… ALL TESTS PASSED!") print("=" * 60) print("\nšŸ“Š Summary:") print(f" • Module categorization: āœ…") print(f" • Overview file detection: āœ…") print(f" • Token estimation: āœ…") print(f" • Token-based splitting: āœ…") print(f" • Intelligent chunking: āœ…") print(f" • Structure validation: āœ…") print("\nšŸŽ‰ Intelligent chunking implementation is working correctly!") return True except Exception as e: print("\n" + "=" * 60) print(f"āŒ TEST FAILED: {e}") print("=" * 60) import traceback traceback.print_exc() return False if __name__ == "__main__": success = run_all_tests() sys.exit(0 if success else 1)