319 lines
11 KiB
Python
319 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test script for intelligent chunking implementation.
|
|
Tests the logic without requiring actual API calls or database connections.
|
|
"""
|
|
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Add current directory to path
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
|
|
|
# Import the functions we need to test
|
|
from server import (
|
|
categorize_by_module,
|
|
get_overview_files,
|
|
estimate_tokens,
|
|
split_by_token_limit,
|
|
find_dependencies,
|
|
create_intelligent_chunks
|
|
)
|
|
|
|
def test_categorize_by_module():
|
|
"""Test module categorization."""
|
|
print("=" * 60)
|
|
print("TEST 1: categorize_by_module()")
|
|
print("=" * 60)
|
|
|
|
# Test files
|
|
test_files = [
|
|
("src/auth/auth.controller.js", "export class AuthController {}"),
|
|
("src/auth/auth.service.js", "export class AuthService {}"),
|
|
("src/auth/auth.middleware.js", "export function authMiddleware() {}"),
|
|
("src/products/product.model.js", "export class Product {}"),
|
|
("src/products/product.service.js", "export class ProductService {}"),
|
|
("src/orders/order.controller.js", "export class OrderController {}"),
|
|
("README.md", "# Project Documentation"),
|
|
("package.json", '{"name": "test-project"}'),
|
|
("index.js", "const app = require('./app');"),
|
|
("src/utils/helper.js", "export function helper() {}"),
|
|
("src/config/settings.js", "export const config = {};"),
|
|
]
|
|
|
|
result = categorize_by_module(test_files)
|
|
|
|
print(f"\n✅ Categorized {len(test_files)} files into {len(result)} modules:")
|
|
for module_name, files in result.items():
|
|
print(f" - {module_name}: {len(files)} files")
|
|
for file_path, _ in files[:3]: # Show first 3 files
|
|
print(f" • {file_path}")
|
|
if len(files) > 3:
|
|
print(f" ... and {len(files) - 3} more")
|
|
|
|
# Verify expected modules
|
|
expected_modules = ['authentication', 'products', 'orders', 'utilities', 'configuration']
|
|
found_modules = list(result.keys())
|
|
|
|
print(f"\n📊 Module Detection:")
|
|
for expected in expected_modules:
|
|
status = "✅" if expected in found_modules else "❌"
|
|
print(f" {status} {expected}: {'Found' if expected in found_modules else 'Not found'}")
|
|
|
|
return result
|
|
|
|
def test_get_overview_files():
|
|
"""Test overview file detection."""
|
|
print("\n" + "=" * 60)
|
|
print("TEST 2: get_overview_files()")
|
|
print("=" * 60)
|
|
|
|
test_files = [
|
|
("README.md", "# Project"),
|
|
("package.json", '{"name": "test"}'),
|
|
("index.js", "console.log('hello');"),
|
|
("src/auth/controller.js", "export class Auth {}"),
|
|
("Dockerfile", "FROM node:18"),
|
|
("tsconfig.json", '{"compilerOptions": {}}'),
|
|
]
|
|
|
|
result = get_overview_files(test_files)
|
|
|
|
print(f"\n✅ Identified {len(result)} overview files:")
|
|
for file_path, _ in result:
|
|
print(f" • {file_path}")
|
|
|
|
expected_overview = ['README.md', 'package.json', 'index.js', 'Dockerfile', 'tsconfig.json']
|
|
found_overview = [f[0].split('/')[-1] for f in result]
|
|
|
|
print(f"\n📊 Overview Detection:")
|
|
for expected in expected_overview:
|
|
status = "✅" if expected in found_overview else "❌"
|
|
print(f" {status} {expected}: {'Found' if expected in found_overview else 'Not found'}")
|
|
|
|
return result
|
|
|
|
def test_estimate_tokens():
|
|
"""Test token estimation."""
|
|
print("\n" + "=" * 60)
|
|
print("TEST 3: estimate_tokens()")
|
|
print("=" * 60)
|
|
|
|
test_files = [
|
|
("file1.js", "a" * 4000), # 4000 chars = ~1000 tokens
|
|
("file2.js", "b" * 8000), # 8000 chars = ~2000 tokens
|
|
("file3.js", "c" * 2000), # 2000 chars = ~500 tokens
|
|
]
|
|
|
|
result = estimate_tokens(test_files)
|
|
expected = (4000 + 8000 + 2000) // 4 # 3500 tokens
|
|
|
|
print(f"\n✅ Estimated tokens: {result}")
|
|
print(f" Expected: ~{expected} tokens")
|
|
print(f" Status: {'✅ PASS' if abs(result - expected) < 100 else '❌ FAIL'}")
|
|
|
|
return result
|
|
|
|
def test_split_by_token_limit():
|
|
"""Test token-based splitting."""
|
|
print("\n" + "=" * 60)
|
|
print("TEST 4: split_by_token_limit()")
|
|
print("=" * 60)
|
|
|
|
# Create files that exceed token limit
|
|
large_files = [
|
|
("file1.js", "a" * 8000), # ~2000 tokens
|
|
("file2.js", "b" * 8000), # ~2000 tokens
|
|
("file3.js", "c" * 8000), # ~2000 tokens
|
|
("file4.js", "d" * 8000), # ~2000 tokens
|
|
("file5.js", "e" * 8000), # ~2000 tokens
|
|
]
|
|
|
|
# Total: ~10000 tokens, should split at 15000 limit
|
|
result = split_by_token_limit(large_files, max_tokens=15000)
|
|
|
|
print(f"\n✅ Split {len(large_files)} files into {len(result)} sub-chunks:")
|
|
for i, sub_chunk in enumerate(result, 1):
|
|
tokens = estimate_tokens(sub_chunk)
|
|
print(f" Chunk {i}: {len(sub_chunk)} files, ~{tokens} tokens")
|
|
for file_path, _ in sub_chunk:
|
|
print(f" • {file_path}")
|
|
|
|
return result
|
|
|
|
def test_create_intelligent_chunks():
|
|
"""Test complete intelligent chunking."""
|
|
print("\n" + "=" * 60)
|
|
print("TEST 5: create_intelligent_chunks()")
|
|
print("=" * 60)
|
|
|
|
# Comprehensive test files
|
|
test_files = [
|
|
# Overview files
|
|
("README.md", "# Project Documentation\n\nThis is a test project."),
|
|
("package.json", '{"name": "test-project", "version": "1.0.0"}'),
|
|
("index.js", "const app = require('./app');\napp.listen(3000);"),
|
|
|
|
# Authentication module
|
|
("src/auth/auth.controller.js", "export class AuthController {\n async login() {}\n}"),
|
|
("src/auth/auth.service.js", "export class AuthService {\n async validateUser() {}\n}"),
|
|
("src/auth/auth.middleware.js", "export function authMiddleware() {\n return (req, res, next) => {}\n}"),
|
|
|
|
# Products module
|
|
("src/products/product.model.js", "export class Product {\n constructor() {}\n}"),
|
|
("src/products/product.service.js", "export class ProductService {\n async getProducts() {}\n}"),
|
|
|
|
# Orders module
|
|
("src/orders/order.controller.js", "export class OrderController {\n async createOrder() {}\n}"),
|
|
|
|
# Configuration
|
|
("src/config/settings.js", "export const config = {\n port: 3000\n};"),
|
|
|
|
# Utils
|
|
("src/utils/helper.js", "export function helper() {\n return true;\n}"),
|
|
]
|
|
|
|
chunks = create_intelligent_chunks(test_files)
|
|
|
|
print(f"\n✅ Created {len(chunks)} intelligent chunks from {len(test_files)} files:")
|
|
print()
|
|
|
|
for chunk in chunks:
|
|
chunk_id = chunk.get('id', 'unknown')
|
|
chunk_name = chunk.get('name', 'unknown')
|
|
chunk_type = chunk.get('chunk_type', 'unknown')
|
|
chunk_priority = chunk.get('priority', 0)
|
|
files = chunk.get('files', [])
|
|
deps = chunk.get('context_dependencies', [])
|
|
|
|
print(f"📦 {chunk_id}: {chunk_name} ({chunk_type}) [Priority: {chunk_priority}]")
|
|
print(f" Files: {len(files)}")
|
|
print(f" Dependencies: {len(deps)}")
|
|
for file_path, _ in files:
|
|
print(f" • {file_path}")
|
|
print()
|
|
|
|
# Verify structure
|
|
print("📊 Structure Verification:")
|
|
print(f" ✅ Total chunks: {len(chunks)}")
|
|
|
|
# Check for overview chunk
|
|
overview_chunks = [c for c in chunks if c.get('chunk_type') == 'overview']
|
|
print(f" ✅ Overview chunks: {len(overview_chunks)} (expected: 1)")
|
|
|
|
# Check for module chunks
|
|
module_chunks = [c for c in chunks if c.get('chunk_type') == 'module']
|
|
print(f" ✅ Module chunks: {len(module_chunks)}")
|
|
|
|
# Verify chunk IDs are sequential
|
|
chunk_ids = [c.get('id') for c in chunks]
|
|
print(f" ✅ Chunk IDs: {chunk_ids}")
|
|
|
|
# Verify no duplicate files
|
|
all_files = []
|
|
for chunk in chunks:
|
|
for file_path, _ in chunk.get('files', []):
|
|
all_files.append(file_path)
|
|
|
|
duplicates = [f for f in all_files if all_files.count(f) > 1]
|
|
if duplicates:
|
|
print(f" ❌ Duplicate files found: {duplicates}")
|
|
else:
|
|
print(f" ✅ No duplicate files (all {len(all_files)} files unique)")
|
|
|
|
return chunks
|
|
|
|
def test_chunk_structure():
|
|
"""Test that chunks have correct structure."""
|
|
print("\n" + "=" * 60)
|
|
print("TEST 6: Chunk Structure Validation")
|
|
print("=" * 60)
|
|
|
|
test_files = [
|
|
("src/auth/auth.controller.js", "export class AuthController {}"),
|
|
("src/auth/auth.service.js", "export class AuthService {}"),
|
|
("README.md", "# Project"),
|
|
("package.json", '{"name": "test"}'),
|
|
]
|
|
|
|
chunks = create_intelligent_chunks(test_files)
|
|
|
|
required_fields = ['id', 'name', 'priority', 'files', 'context_dependencies', 'chunk_type']
|
|
|
|
print("\n✅ Validating chunk structure:")
|
|
for i, chunk in enumerate(chunks, 1):
|
|
print(f"\n Chunk {i}:")
|
|
for field in required_fields:
|
|
status = "✅" if field in chunk else "❌"
|
|
value = chunk.get(field, 'MISSING')
|
|
print(f" {status} {field}: {type(value).__name__} = {value}")
|
|
|
|
# Verify files is a list of tuples
|
|
files = chunk.get('files', [])
|
|
if files:
|
|
first_file = files[0]
|
|
if isinstance(first_file, tuple) and len(first_file) == 2:
|
|
print(f" ✅ files: List of (file_path, content) tuples")
|
|
else:
|
|
print(f" ❌ files: Invalid format - {type(first_file)}")
|
|
|
|
return chunks
|
|
|
|
def run_all_tests():
|
|
"""Run all tests."""
|
|
print("\n" + "=" * 60)
|
|
print("INTELLIGENT CHUNKING - COMPREHENSIVE TEST SUITE")
|
|
print("=" * 60)
|
|
|
|
try:
|
|
# Test 1: Module categorization
|
|
categorized = test_categorize_by_module()
|
|
assert len(categorized) > 0, "Module categorization failed"
|
|
|
|
# Test 2: Overview files
|
|
overview = test_get_overview_files()
|
|
assert len(overview) > 0, "Overview file detection failed"
|
|
|
|
# Test 3: Token estimation
|
|
tokens = test_estimate_tokens()
|
|
assert tokens > 0, "Token estimation failed"
|
|
|
|
# Test 4: Token-based splitting
|
|
split_chunks = test_split_by_token_limit()
|
|
assert len(split_chunks) > 0, "Token splitting failed"
|
|
|
|
# Test 5: Complete chunking
|
|
chunks = test_create_intelligent_chunks()
|
|
assert len(chunks) > 0, "Intelligent chunking failed"
|
|
|
|
# Test 6: Structure validation
|
|
validated_chunks = test_chunk_structure()
|
|
assert len(validated_chunks) > 0, "Structure validation failed"
|
|
|
|
print("\n" + "=" * 60)
|
|
print("✅ ALL TESTS PASSED!")
|
|
print("=" * 60)
|
|
print("\n📊 Summary:")
|
|
print(f" • Module categorization: ✅")
|
|
print(f" • Overview file detection: ✅")
|
|
print(f" • Token estimation: ✅")
|
|
print(f" • Token-based splitting: ✅")
|
|
print(f" • Intelligent chunking: ✅")
|
|
print(f" • Structure validation: ✅")
|
|
print("\n🎉 Intelligent chunking implementation is working correctly!")
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
print("\n" + "=" * 60)
|
|
print(f"❌ TEST FAILED: {e}")
|
|
print("=" * 60)
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
if __name__ == "__main__":
|
|
success = run_all_tests()
|
|
sys.exit(0 if success else 1)
|
|
|