implemented KG DB
This commit is contained in:
parent
ffe6ca349c
commit
ad2c27d793
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -721,48 +721,47 @@ services:
|
||||
# =====================================
|
||||
|
||||
ai-analysis-service:
|
||||
build: ./services/ai-analysis-service
|
||||
container_name: pipeline_ai_analysis_service
|
||||
build:
|
||||
context: ./services/ai-analysis-service
|
||||
dockerfile: Dockerfile
|
||||
container_name: pipeline_ai_analysis
|
||||
ports:
|
||||
- "8022:8022"
|
||||
environment:
|
||||
- PORT=8022
|
||||
- HOST=0.0.0.0
|
||||
- ANTHROPIC_API_KEY=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
|
||||
- POSTGRES_HOST=postgres
|
||||
|
||||
# Neo4j Configuration
|
||||
- USE_NEO4J_KG=true
|
||||
- NEO4J_URI=bolt://neo4j:7687
|
||||
- NEO4J_USER=neo4j
|
||||
- NEO4J_PASSWORD=password
|
||||
- NEO4J_DATABASE=neo4j
|
||||
|
||||
# Report Configuration
|
||||
- REPORT_TECHNICAL_ONLY=false
|
||||
|
||||
# Existing database configurations
|
||||
- POSTGRES_HOST=pipeline_postgres
|
||||
- POSTGRES_PORT=5432
|
||||
- POSTGRES_DB=dev_pipeline
|
||||
- POSTGRES_USER=pipeline_admin
|
||||
- POSTGRES_PASSWORD=secure_pipeline_2024
|
||||
- REDIS_HOST=redis
|
||||
|
||||
- MONGODB_URL=mongodb://pipeline_admin:mongo_secure_2024@pipeline_mongodb:27017/
|
||||
- MONGODB_DB=repo_analyzer
|
||||
|
||||
- REDIS_HOST=pipeline_redis
|
||||
- REDIS_PORT=6379
|
||||
- REDIS_PASSWORD=redis_secure_2024
|
||||
- MONGODB_URL=mongodb://pipeline_admin:mongo_secure_2024@mongodb:27017/
|
||||
- MONGODB_DB=repo_analyzer
|
||||
- GIT_INTEGRATION_SERVICE_URL=http://git-integration:8012
|
||||
- CLAUDE_REQUESTS_PER_MINUTE=90
|
||||
- MAX_FILES_DEFAULT=100
|
||||
- CACHE_TTL_SECONDS=86400
|
||||
- CONTENT_MAX_TOKENS=8000
|
||||
- ENHANCED_PROCESSING_ENABLED=true
|
||||
- ENHANCED_BATCH_PROCESSING=true
|
||||
- ENHANCED_SMART_CHUNKING=true
|
||||
- ENHANCED_RATE_LIMIT=120
|
||||
- ENHANCED_BATCH_DELAY=0.05
|
||||
- ENHANCED_SMALL_FILE_DELAY=0.02
|
||||
- ENHANCED_MEDIUM_FILE_DELAY=0.05
|
||||
- ENHANCED_LARGE_FILE_DELAY=0.1
|
||||
volumes:
|
||||
- ai_analysis_logs:/app/logs
|
||||
- ./ai-analysis-reports:/app/reports
|
||||
- ai_analysis_temp:/app/temp
|
||||
depends_on:
|
||||
- neo4j
|
||||
- postgres
|
||||
- mongodb
|
||||
- redis
|
||||
networks:
|
||||
- pipeline_network
|
||||
depends_on:
|
||||
- postgres
|
||||
- redis
|
||||
- mongodb
|
||||
- git-integration
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
|
||||
8
services/ai-analysis-service/__init__.py
Normal file
8
services/ai-analysis-service/__init__.py
Normal file
@ -0,0 +1,8 @@
|
||||
# Service initialization
|
||||
# This file helps Python treat the directory as a package
|
||||
# and can be used to set up any service-wide configurations
|
||||
|
||||
from .server import app # Import the FastAPI app
|
||||
from .ai_analyze import analyze_repository # Import key functions
|
||||
|
||||
__all__ = ['app', 'analyze_repository']
|
||||
File diff suppressed because it is too large
Load Diff
@ -6,7 +6,17 @@ HOST=0.0.0.0
|
||||
NODE_ENV=development
|
||||
|
||||
# AI API Keys
|
||||
ANTHROPIC_API_KEY=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
|
||||
ANTHROPIC_API_KEY=your_anthropic_api_key
|
||||
|
||||
# Neo4j Knowledge Graph Configuration
|
||||
USE_NEO4J_KG=true
|
||||
NEO4J_URI=bolt://localhost:7687
|
||||
NEO4J_USER=neo4j
|
||||
NEO4J_PASSWORD=secure_neo4j_2024
|
||||
NEO4J_DATABASE=neo4j
|
||||
|
||||
# Report Configuration
|
||||
REPORT_TECHNICAL_ONLY=false
|
||||
|
||||
# Database Configuration
|
||||
POSTGRES_HOST=localhost
|
||||
|
||||
9
services/ai-analysis-service/knowledge_graph/__init__.py
Normal file
9
services/ai-analysis-service/knowledge_graph/__init__.py
Normal file
@ -0,0 +1,9 @@
|
||||
"""
|
||||
Knowledge graph utilities for the AI Analysis Service.
|
||||
|
||||
This package provides the Neo4j client and high-level helpers used to
|
||||
persist and query analysis results in a graph representation.
|
||||
"""
|
||||
|
||||
from .neo4j_client import Neo4jGraphClient # noqa: F401
|
||||
|
||||
328
services/ai-analysis-service/knowledge_graph/neo4j_client.py
Normal file
328
services/ai-analysis-service/knowledge_graph/neo4j_client.py
Normal file
@ -0,0 +1,328 @@
|
||||
"""
|
||||
Neo4j client helpers for the AI Analysis Service.
|
||||
|
||||
This module wraps the official Neo4j async driver and exposes a minimal
|
||||
set of convenience methods that we can reuse across the service without
|
||||
sprinkling Cypher execution boilerplate everywhere.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from contextlib import asynccontextmanager
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from typing import Any, AsyncIterator, Dict, List, Optional, Sequence
|
||||
|
||||
from neo4j import AsyncGraphDatabase
|
||||
|
||||
try:
|
||||
from neo4j import AsyncResult, AsyncSession # type: ignore
|
||||
except ImportError: # pragma: no cover - fallback for older/newer driver versions
|
||||
AsyncResult = Any # type: ignore
|
||||
AsyncSession = Any # type: ignore
|
||||
|
||||
|
||||
def _json_dumps(value: Any) -> str:
|
||||
"""Serialize complex values so we can persist them as strings safely."""
|
||||
if value is None:
|
||||
return ""
|
||||
if isinstance(value, (str, int, float, bool)):
|
||||
return str(value)
|
||||
try:
|
||||
return json.dumps(value, default=str)
|
||||
except Exception:
|
||||
return str(value)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Neo4jConfig:
|
||||
uri: str
|
||||
user: str
|
||||
password: str
|
||||
database: Optional[str] = None
|
||||
fetch_size: int = 1000
|
||||
|
||||
|
||||
class Neo4jGraphClient:
|
||||
"""
|
||||
Thin wrapper around the Neo4j async driver that provides helpers for
|
||||
writing analysis artefacts into the graph and querying them back.
|
||||
"""
|
||||
|
||||
def __init__(self, config: Neo4jConfig) -> None:
|
||||
self._config = config
|
||||
self._driver = AsyncGraphDatabase.driver(
|
||||
config.uri,
|
||||
auth=(config.user, config.password),
|
||||
# Allow long running operations while the analysis progresses
|
||||
max_connection_lifetime=3600,
|
||||
)
|
||||
|
||||
async def close(self) -> None:
|
||||
if self._driver:
|
||||
await self._driver.close()
|
||||
|
||||
@asynccontextmanager
|
||||
async def session(self) -> AsyncIterator[AsyncSession]:
|
||||
kwargs: Dict[str, Any] = {}
|
||||
if self._config.database:
|
||||
kwargs["database"] = self._config.database
|
||||
if self._config.fetch_size:
|
||||
kwargs["fetch_size"] = self._config.fetch_size
|
||||
async with self._driver.session(**kwargs) as session:
|
||||
yield session
|
||||
|
||||
async def _run_write(self, query: str, **params: Any) -> None:
|
||||
async with self.session() as session:
|
||||
async def _write(tx):
|
||||
result = await tx.run(query, **params)
|
||||
await result.consume()
|
||||
await session.execute_write(_write)
|
||||
|
||||
async def _run_read(self, query: str, **params: Any) -> List[Dict[str, Any]]:
|
||||
async with self.session() as session:
|
||||
result: AsyncResult = await session.run(query, **params)
|
||||
records = await result.data()
|
||||
return records
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Write helpers
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
async def upsert_run(self, run_id: str, repository_id: str) -> None:
|
||||
await self._run_write(
|
||||
"""
|
||||
MERGE (r:Run {run_id: $run_id})
|
||||
ON CREATE SET
|
||||
r.repository_id = $repository_id,
|
||||
r.created_at = datetime(),
|
||||
r.updated_at = datetime()
|
||||
ON MATCH SET
|
||||
r.repository_id = $repository_id,
|
||||
r.updated_at = datetime()
|
||||
""",
|
||||
run_id=run_id,
|
||||
repository_id=repository_id,
|
||||
)
|
||||
|
||||
async def clear_run(self, run_id: str) -> None:
|
||||
await self._run_write(
|
||||
"""
|
||||
MATCH (r:Run {run_id: $run_id})
|
||||
OPTIONAL MATCH (r)-[rel]-()
|
||||
DETACH DELETE r
|
||||
""",
|
||||
run_id=run_id,
|
||||
)
|
||||
|
||||
async def upsert_module_graph(
|
||||
self,
|
||||
run_id: str,
|
||||
repository_id: str,
|
||||
module_props: Dict[str, Any],
|
||||
files: Sequence[Dict[str, Any]],
|
||||
findings: Sequence[Dict[str, Any]],
|
||||
dependencies: Sequence[Dict[str, Any]],
|
||||
) -> None:
|
||||
"""
|
||||
Persist module level artefacts in a single transaction.
|
||||
"""
|
||||
# Ensure strings
|
||||
module_props = {k: _json_dumps(v) if isinstance(v, (dict, list, tuple, set)) else v for k, v in module_props.items()}
|
||||
files_payload = [
|
||||
{
|
||||
"path": item["path"],
|
||||
"props": {
|
||||
key: _json_dumps(value) if isinstance(value, (dict, list, tuple, set)) else value
|
||||
for key, value in item.get("props", {}).items()
|
||||
},
|
||||
}
|
||||
for item in files
|
||||
]
|
||||
findings_payload = [
|
||||
{
|
||||
"id": item["id"],
|
||||
"props": {
|
||||
key: _json_dumps(value) if isinstance(value, (dict, list, tuple, set)) else value
|
||||
for key, value in item.get("props", {}).items()
|
||||
},
|
||||
"file_path": item.get("file_path"),
|
||||
}
|
||||
for item in findings
|
||||
]
|
||||
dependencies_payload = [
|
||||
{
|
||||
"target": dependency.get("target"),
|
||||
"kind": dependency.get("kind", "depends_on"),
|
||||
"metadata": _json_dumps(dependency.get("metadata", {})),
|
||||
}
|
||||
for dependency in dependencies
|
||||
]
|
||||
|
||||
await self._run_write(
|
||||
"""
|
||||
MERGE (r:Run {run_id: $run_id})
|
||||
ON CREATE SET
|
||||
r.repository_id = $repository_id,
|
||||
r.created_at = datetime(),
|
||||
r.updated_at = datetime()
|
||||
ON MATCH SET
|
||||
r.repository_id = $repository_id,
|
||||
r.updated_at = datetime()
|
||||
|
||||
MERGE (m:Module {run_id: $run_id, name: $module_name})
|
||||
SET m += $module_props,
|
||||
m.updated_at = datetime()
|
||||
|
||||
MERGE (r)-[:RUN_HAS_MODULE]->(m)
|
||||
|
||||
WITH m
|
||||
UNWIND $files AS file_data
|
||||
MERGE (f:File {run_id: $run_id, path: file_data.path})
|
||||
SET f += file_data.props,
|
||||
f.updated_at = datetime()
|
||||
MERGE (m)-[:MODULE_INCLUDES_FILE]->(f)
|
||||
|
||||
WITH m
|
||||
UNWIND $findings AS finding_data
|
||||
MERGE (fd:Finding {run_id: $run_id, finding_id: finding_data.id})
|
||||
SET fd += finding_data.props,
|
||||
fd.updated_at = datetime()
|
||||
MERGE (m)-[:MODULE_HAS_FINDING]->(fd)
|
||||
FOREACH (fp IN CASE WHEN finding_data.file_path IS NULL THEN [] ELSE [finding_data.file_path] END |
|
||||
MERGE (ff:File {run_id: $run_id, path: fp})
|
||||
MERGE (fd)-[:FINDING_TOUCHES_FILE]->(ff)
|
||||
)
|
||||
|
||||
WITH m
|
||||
UNWIND $dependencies AS dependency
|
||||
FOREACH (_ IN CASE WHEN dependency.target IS NULL THEN [] ELSE [1] END |
|
||||
MERGE (dep:Module {run_id: $run_id, name: dependency.target})
|
||||
MERGE (m)-[rel:MODULE_DEPENDENCY {kind: dependency.kind}]->(dep)
|
||||
SET rel.metadata = dependency.metadata,
|
||||
rel.updated_at = datetime()
|
||||
)
|
||||
""",
|
||||
run_id=run_id,
|
||||
repository_id=repository_id,
|
||||
module_name=module_props.get("name"),
|
||||
module_props=module_props,
|
||||
files=files_payload,
|
||||
findings=findings_payload,
|
||||
dependencies=dependencies_payload,
|
||||
)
|
||||
|
||||
async def upsert_run_state(self, run_id: str, state: Dict[str, Any]) -> None:
|
||||
await self._run_write(
|
||||
"""
|
||||
MERGE (r:Run {run_id: $run_id})
|
||||
SET r.analysis_state = $state,
|
||||
r.state_updated_at = datetime()
|
||||
""",
|
||||
run_id=run_id,
|
||||
state=_json_dumps(state),
|
||||
)
|
||||
|
||||
async def upsert_synthesis(self, run_id: str, synthesis: Dict[str, Any]) -> None:
|
||||
await self._run_write(
|
||||
"""
|
||||
MERGE (r:Run {run_id: $run_id})
|
||||
SET r.synthesis_analysis = $synthesis,
|
||||
r.synthesis_updated_at = datetime()
|
||||
""",
|
||||
run_id=run_id,
|
||||
synthesis=_json_dumps(synthesis),
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Read helpers
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
async def fetch_modules(self, run_id: str) -> List[Dict[str, Any]]:
|
||||
records = await self._run_read(
|
||||
"""
|
||||
MATCH (r:Run {run_id: $run_id})-[:RUN_HAS_MODULE]->(m:Module)
|
||||
OPTIONAL MATCH (m)-[:MODULE_INCLUDES_FILE]->(f:File)
|
||||
OPTIONAL MATCH (m)-[:MODULE_HAS_FINDING]->(fd:Finding)
|
||||
OPTIONAL MATCH (fd)-[:FINDING_TOUCHES_FILE]->(ff:File)
|
||||
RETURN
|
||||
m,
|
||||
collect(DISTINCT properties(f)) AS files,
|
||||
collect(DISTINCT properties(fd)) AS findings,
|
||||
collect(DISTINCT properties(ff)) AS finding_files
|
||||
""",
|
||||
run_id=run_id,
|
||||
)
|
||||
modules: List[Dict[str, Any]] = []
|
||||
for record in records:
|
||||
module_node = record.get("m", {})
|
||||
files = record.get("files", [])
|
||||
findings = record.get("findings", [])
|
||||
finding_files = record.get("finding_files", [])
|
||||
modules.append(
|
||||
{
|
||||
"module": module_node,
|
||||
"files": files,
|
||||
"findings": findings,
|
||||
"finding_files": finding_files,
|
||||
}
|
||||
)
|
||||
return modules
|
||||
|
||||
async def fetch_run_state(self, run_id: str) -> Optional[Dict[str, Any]]:
|
||||
records = await self._run_read(
|
||||
"""
|
||||
MATCH (r:Run {run_id: $run_id})
|
||||
RETURN r.analysis_state AS analysis_state
|
||||
""",
|
||||
run_id=run_id,
|
||||
)
|
||||
if not records:
|
||||
return None
|
||||
raw_state = records[0].get("analysis_state")
|
||||
if not raw_state:
|
||||
return None
|
||||
try:
|
||||
return json.loads(raw_state)
|
||||
except json.JSONDecodeError:
|
||||
return {"raw": raw_state}
|
||||
|
||||
async def fetch_synthesis(self, run_id: str) -> Optional[Dict[str, Any]]:
|
||||
records = await self._run_read(
|
||||
"""
|
||||
MATCH (r:Run {run_id: $run_id})
|
||||
RETURN r.synthesis_analysis AS synthesis
|
||||
""",
|
||||
run_id=run_id,
|
||||
)
|
||||
if not records:
|
||||
return None
|
||||
raw_synthesis = records[0].get("synthesis")
|
||||
if not raw_synthesis:
|
||||
return None
|
||||
try:
|
||||
return json.loads(raw_synthesis)
|
||||
except json.JSONDecodeError:
|
||||
return {"raw": raw_synthesis}
|
||||
|
||||
async def fetch_run_metadata(self, run_id: str) -> Optional[Dict[str, Any]]:
|
||||
records = await self._run_read(
|
||||
"""
|
||||
MATCH (r:Run {run_id: $run_id})
|
||||
RETURN r
|
||||
""",
|
||||
run_id=run_id,
|
||||
)
|
||||
if not records:
|
||||
return None
|
||||
run_node = records[0].get("r")
|
||||
if not run_node:
|
||||
return None
|
||||
metadata = dict(run_node)
|
||||
if "created_at" in metadata and isinstance(metadata["created_at"], datetime):
|
||||
metadata["created_at"] = metadata["created_at"].isoformat()
|
||||
if "updated_at" in metadata and isinstance(metadata["updated_at"], datetime):
|
||||
metadata["updated_at"] = metadata["updated_at"].isoformat()
|
||||
return metadata
|
||||
|
||||
214
services/ai-analysis-service/knowledge_graph/operations.py
Normal file
214
services/ai-analysis-service/knowledge_graph/operations.py
Normal file
@ -0,0 +1,214 @@
|
||||
"""
|
||||
High-level knowledge graph operations used by the AI Analysis Service.
|
||||
|
||||
These helpers translate existing analysis objects into the node/relationship
|
||||
structure expected by `Neo4jGraphClient`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple
|
||||
|
||||
from .neo4j_client import Neo4jGraphClient
|
||||
|
||||
|
||||
def _safe_json(value: Any) -> str:
|
||||
if value is None:
|
||||
return ""
|
||||
if isinstance(value, (str, int, float, bool)):
|
||||
return str(value)
|
||||
try:
|
||||
return json.dumps(value, default=str)
|
||||
except Exception:
|
||||
return str(value)
|
||||
|
||||
|
||||
def _normalize_issue(issue: Any, index: int) -> Tuple[str, Dict[str, Any]]:
|
||||
"""
|
||||
Convert an issue structure that might be a string or dict into a dict.
|
||||
Returns (summary, props).
|
||||
"""
|
||||
if isinstance(issue, dict):
|
||||
summary = issue.get("title") or issue.get("issue") or issue.get("description") or f"Issue #{index}"
|
||||
props = {
|
||||
"summary": summary,
|
||||
"severity": issue.get("severity", "medium"),
|
||||
"category": issue.get("category", "general"),
|
||||
"description": issue.get("description") or issue.get("details") or "",
|
||||
"recommendation": issue.get("recommendation") or issue.get("action") or "",
|
||||
"evidence": _safe_json(issue.get("evidence")),
|
||||
}
|
||||
if issue.get("impact"):
|
||||
props["impact"] = issue["impact"]
|
||||
if issue.get("line_number"):
|
||||
props["line_number"] = issue["line_number"]
|
||||
return summary, props
|
||||
summary = str(issue)
|
||||
return summary, {
|
||||
"summary": summary,
|
||||
"severity": "medium",
|
||||
"category": "general",
|
||||
}
|
||||
|
||||
|
||||
def build_module_payload(
|
||||
run_id: str,
|
||||
repository_id: str,
|
||||
module_name: str,
|
||||
chunk: Dict[str, Any],
|
||||
chunk_analysis: Dict[str, Any],
|
||||
file_analyses: Sequence[Any],
|
||||
metadata: Dict[str, Any],
|
||||
ai_response: str,
|
||||
) -> Dict[str, Any]:
|
||||
"""Prepare module level payload for graph insertion."""
|
||||
module_id = chunk.get("id") or str(uuid.uuid4())
|
||||
module_quality = chunk_analysis.get("module_quality_score")
|
||||
module_overview = chunk_analysis.get("module_overview", "")
|
||||
module_architecture = chunk_analysis.get("module_architecture", "")
|
||||
module_security = chunk_analysis.get("module_security_assessment", "")
|
||||
module_recommendations = chunk_analysis.get("module_recommendations", [])
|
||||
|
||||
files: List[Dict[str, Any]] = []
|
||||
findings: List[Dict[str, Any]] = []
|
||||
|
||||
total_issues = 0
|
||||
total_recommendations = 0
|
||||
|
||||
for fa_index, fa in enumerate(file_analyses):
|
||||
path = getattr(fa, "path", None) or getattr(fa, "file_path", "unknown")
|
||||
issues = getattr(fa, "issues_found", None) or []
|
||||
recommendations = getattr(fa, "recommendations", None) or []
|
||||
total_issues += len(issues) if isinstance(issues, (list, tuple)) else 0
|
||||
total_recommendations += len(recommendations) if isinstance(recommendations, (list, tuple)) else 0
|
||||
|
||||
files.append(
|
||||
{
|
||||
"path": str(path),
|
||||
"props": {
|
||||
"language": getattr(fa, "language", "unknown"),
|
||||
"lines_of_code": getattr(fa, "lines_of_code", 0),
|
||||
"complexity_score": getattr(fa, "complexity_score", 0),
|
||||
"severity_score": getattr(fa, "severity_score", 0),
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
if isinstance(issues, Iterable):
|
||||
for issue_index, raw_issue in enumerate(issues):
|
||||
summary, issue_props = _normalize_issue(raw_issue, issue_index)
|
||||
finding_id = f"{module_id}:{fa_index}:{issue_index}"
|
||||
issue_props.update(
|
||||
{
|
||||
"module": module_name,
|
||||
"file_path": str(path),
|
||||
"created_at": datetime.utcnow().isoformat(),
|
||||
}
|
||||
)
|
||||
findings.append(
|
||||
{
|
||||
"id": finding_id,
|
||||
"props": issue_props,
|
||||
"file_path": str(path),
|
||||
}
|
||||
)
|
||||
|
||||
module_props: Dict[str, Any] = {
|
||||
"name": module_name,
|
||||
"module_id": module_id,
|
||||
"quality_score": module_quality,
|
||||
"overview": module_overview,
|
||||
"architecture": module_architecture,
|
||||
"security": module_security,
|
||||
"recommendations": module_recommendations,
|
||||
"analysis_payload": metadata,
|
||||
"ai_response": ai_response,
|
||||
"repository_id": repository_id,
|
||||
"total_files": len(file_analyses),
|
||||
"total_issues": total_issues,
|
||||
"total_recommendations": total_recommendations,
|
||||
"updated_at": datetime.utcnow().isoformat(),
|
||||
}
|
||||
|
||||
dependencies = []
|
||||
for dependency in metadata.get("dependencies", {}).get("depends_on_chunks", []):
|
||||
dependencies.append(
|
||||
{
|
||||
"target": dependency,
|
||||
"kind": "depends_on",
|
||||
"metadata": {"source": module_name},
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"module_props": module_props,
|
||||
"files": files,
|
||||
"findings": findings,
|
||||
"dependencies": dependencies,
|
||||
}
|
||||
|
||||
|
||||
async def store_module_analysis(
|
||||
client: Neo4jGraphClient,
|
||||
run_id: str,
|
||||
repository_id: str,
|
||||
module_payload: Dict[str, Any],
|
||||
) -> None:
|
||||
await client.upsert_module_graph(
|
||||
run_id=run_id,
|
||||
repository_id=repository_id,
|
||||
module_props=module_payload["module_props"],
|
||||
files=module_payload["files"],
|
||||
findings=module_payload["findings"],
|
||||
dependencies=module_payload["dependencies"],
|
||||
)
|
||||
|
||||
|
||||
async def store_analysis_state(client: Neo4jGraphClient, run_id: str, analysis_state: Dict[str, Any]) -> None:
|
||||
await client.upsert_run_state(run_id=run_id, state=analysis_state)
|
||||
|
||||
|
||||
async def store_synthesis(client: Neo4jGraphClient, run_id: str, synthesis: Dict[str, Any]) -> None:
|
||||
await client.upsert_synthesis(run_id=run_id, synthesis=synthesis)
|
||||
|
||||
|
||||
async def fetch_module_analyses(client: Neo4jGraphClient, run_id: str) -> List[Dict[str, Any]]:
|
||||
modules = await client.fetch_modules(run_id)
|
||||
module_analyses: List[Dict[str, Any]] = []
|
||||
for entry in modules:
|
||||
node = entry.get("module", {})
|
||||
files = entry.get("files", [])
|
||||
findings = entry.get("findings", [])
|
||||
analysis_payload = node.get("analysis_payload")
|
||||
if isinstance(analysis_payload, str):
|
||||
try:
|
||||
analysis_payload = json.loads(analysis_payload)
|
||||
except json.JSONDecodeError:
|
||||
analysis_payload = {"raw": analysis_payload}
|
||||
module_analyses.append(
|
||||
{
|
||||
"module_name": node.get("name"),
|
||||
"module_id": node.get("module_id"),
|
||||
"quality_score": node.get("quality_score"),
|
||||
"module_overview": node.get("overview"),
|
||||
"module_architecture": node.get("architecture"),
|
||||
"module_security_assessment": node.get("security"),
|
||||
"module_recommendations": node.get("recommendations"),
|
||||
"files_analyzed": [file.get("path") for file in files if file.get("path")],
|
||||
"raw_payload": analysis_payload,
|
||||
"findings": findings,
|
||||
}
|
||||
)
|
||||
return module_analyses
|
||||
|
||||
|
||||
async def fetch_run_state(client: Neo4jGraphClient, run_id: str) -> Optional[Dict[str, Any]]:
|
||||
return await client.fetch_run_state(run_id)
|
||||
|
||||
|
||||
async def fetch_synthesis(client: Neo4jGraphClient, run_id: str) -> Optional[Dict[str, Any]]:
|
||||
return await client.fetch_synthesis(run_id)
|
||||
|
||||
@ -19,6 +19,7 @@ class AnalysisProgressManager:
|
||||
self.subscribers: List[asyncio.Queue] = []
|
||||
self.redis_client: Optional[redis.Redis] = None
|
||||
self.progress_key = f"analysis_progress:{analysis_id}"
|
||||
self._complete: bool = False
|
||||
|
||||
async def connect_redis(self):
|
||||
"""Connect to Redis for progress persistence"""
|
||||
@ -103,6 +104,9 @@ class AnalysisProgressManager:
|
||||
self.unsubscribe(queue)
|
||||
|
||||
print(f"📤 Event emitted: {event_type} - {data.get('message', '')}")
|
||||
|
||||
if event_type in {"analysis_completed", "analysis_error"}:
|
||||
self._complete = True
|
||||
|
||||
async def get_progress_history(self) -> List[Dict[str, Any]]:
|
||||
"""Retrieve progress history from Redis"""
|
||||
@ -125,6 +129,12 @@ class AnalysisProgressManager:
|
||||
except Exception as e:
|
||||
print(f"⚠️ Failed to clear progress: {e}")
|
||||
|
||||
self._complete = False
|
||||
|
||||
def is_complete(self) -> bool:
|
||||
"""Return whether the analysis has completed or errored."""
|
||||
return self._complete
|
||||
|
||||
|
||||
class GlobalProgressTracker:
|
||||
"""Global singleton to track all active analyses"""
|
||||
|
||||
32
services/ai-analysis-service/pyproject.toml
Normal file
32
services/ai-analysis-service/pyproject.toml
Normal file
@ -0,0 +1,32 @@
|
||||
[build-system]
|
||||
requires = ["setuptools>=61.0", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "ai-analysis-service"
|
||||
version = "0.1.0"
|
||||
description = "AI Analysis Microservice for Code Repository Analysis"
|
||||
requires-python = ">=3.9"
|
||||
|
||||
dependencies = [
|
||||
"fastapi>=0.104.1",
|
||||
"uvicorn>=0.24.0",
|
||||
"pydantic>=2.5.0",
|
||||
"httpx>=0.25.0",
|
||||
"redis>=4.5.0",
|
||||
"psycopg2-binary>=2.9.7",
|
||||
"neo4j>=5.8.0",
|
||||
"anthropic>=0.7.0",
|
||||
"python-dotenv>=1.0.0"
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = [
|
||||
"pytest",
|
||||
"mypy",
|
||||
"black",
|
||||
"isort"
|
||||
]
|
||||
|
||||
[tool.setuptools]
|
||||
packages = ["ai_analysis_service"]
|
||||
@ -17,6 +17,7 @@ GitPython>=3.1.40
|
||||
redis>=4.5.0
|
||||
pymongo>=4.5.0
|
||||
psycopg2-binary>=2.9.7
|
||||
neo4j>=5.8.0 # Neo4j Graph Database Driver
|
||||
|
||||
# Data processing
|
||||
numpy>=1.24.0
|
||||
|
||||
@ -26,13 +26,12 @@ import uvicorn
|
||||
import mimetypes
|
||||
import httpx
|
||||
import redis
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
# PostgreSQL cursor for querying
|
||||
try:
|
||||
from psycopg2.extras import RealDictCursor
|
||||
except ImportError:
|
||||
# Fallback if psycopg2 not available
|
||||
RealDictCursor = None
|
||||
from knowledge_graph import Neo4jGraphClient
|
||||
from knowledge_graph.neo4j_client import Neo4jConfig
|
||||
from knowledge_graph import operations as kg_ops
|
||||
|
||||
# Import the AI analysis components
|
||||
# Note: ai-analyze.py has a hyphen, so we need to handle the import specially
|
||||
@ -40,7 +39,7 @@ import sys
|
||||
import importlib.util
|
||||
|
||||
# Load the ai-analyze.py module
|
||||
spec = importlib.util.spec_from_file_location("ai_analyze", "ai-analyze.py")
|
||||
spec = importlib.util.spec_from_file_location("ai_analyze", "./ai-analyze.py")
|
||||
ai_analyze_module = importlib.util.module_from_spec(spec)
|
||||
sys.modules["ai_analyze"] = ai_analyze_module
|
||||
spec.loader.exec_module(ai_analyze_module)
|
||||
@ -52,7 +51,6 @@ from ai_analyze import (
|
||||
ArchitectureAnalysis,
|
||||
SecurityAnalysis,
|
||||
CodeQualityAnalysis,
|
||||
PerformanceAnalysis,
|
||||
Issue,
|
||||
ModuleAnalysis,
|
||||
ModuleSummary
|
||||
@ -71,12 +69,14 @@ from progress_manager import AnalysisProgressManager, progress_tracker
|
||||
|
||||
# Global analyzer instance
|
||||
analyzer = None
|
||||
neo4j_client: Optional[Neo4jGraphClient] = None
|
||||
USE_KNOWLEDGE_GRAPH = False
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Lifespan context manager for startup and shutdown events."""
|
||||
# Startup
|
||||
global analyzer
|
||||
global analyzer, neo4j_client, USE_KNOWLEDGE_GRAPH
|
||||
try:
|
||||
# Load environment variables
|
||||
from dotenv import load_dotenv
|
||||
@ -116,6 +116,26 @@ async def lifespan(app: FastAPI):
|
||||
analyzer = EnhancedGitHubAnalyzer(api_key, config)
|
||||
|
||||
print("✅ AI Analysis Service initialized successfully")
|
||||
|
||||
use_kg_flag = os.getenv("USE_NEO4J_KG", os.getenv("USE_KNOWLEDGE_GRAPH", "false"))
|
||||
USE_KNOWLEDGE_GRAPH = str(use_kg_flag).lower() in ("1", "true", "yes", "on")
|
||||
if USE_KNOWLEDGE_GRAPH:
|
||||
try:
|
||||
neo4j_config = Neo4jConfig(
|
||||
uri=os.getenv("NEO4J_URI", "bolt://localhost:7687"),
|
||||
user=os.getenv("NEO4J_USER", "neo4j"),
|
||||
password=os.getenv("NEO4J_PASSWORD", "neo4j"),
|
||||
database=os.getenv("NEO4J_DATABASE") or None,
|
||||
)
|
||||
neo4j_client = Neo4jGraphClient(neo4j_config)
|
||||
print(f"✅ Knowledge graph enabled (Neo4j URI: {neo4j_config.uri})")
|
||||
except Exception as kg_error:
|
||||
neo4j_client = None
|
||||
USE_KNOWLEDGE_GRAPH = False
|
||||
print(f"⚠️ Failed to initialize Neo4j client: {kg_error}. Falling back to episodic memory.")
|
||||
else:
|
||||
neo4j_client = None
|
||||
print("ℹ️ Knowledge graph disabled (falling back to episodic memory)")
|
||||
except Exception as e:
|
||||
print(f"❌ Failed to initialize AI Analysis Service: {e}")
|
||||
raise
|
||||
@ -124,6 +144,8 @@ async def lifespan(app: FastAPI):
|
||||
|
||||
# Shutdown (if needed)
|
||||
# Cleanup code can go here if needed
|
||||
if neo4j_client:
|
||||
await neo4j_client.close()
|
||||
|
||||
app = FastAPI(
|
||||
title="AI Analysis Service",
|
||||
@ -624,6 +646,13 @@ git_client = GitIntegrationClient()
|
||||
analysis_cache = AnalysisCache()
|
||||
content_optimizer = ContentOptimizer()
|
||||
|
||||
def get_progress_manager(analysis_id: str) -> AnalysisProgressManager:
|
||||
"""Retrieve an existing progress manager or create one if missing."""
|
||||
manager = progress_tracker.get_manager(analysis_id)
|
||||
if manager is None:
|
||||
manager = progress_tracker.create_manager(analysis_id)
|
||||
return manager
|
||||
|
||||
# ============================================================================
|
||||
# TOKEN USAGE & COST TRACKING (NEW)
|
||||
# ============================================================================
|
||||
@ -1075,56 +1104,37 @@ async def stream_progress(analysis_id: str, request: Request):
|
||||
};
|
||||
"""
|
||||
async def event_generator():
|
||||
# Get or create progress manager
|
||||
manager = progress_tracker.get_manager(analysis_id)
|
||||
|
||||
if not manager:
|
||||
# Send error and close
|
||||
yield f"data: {json.dumps({'error': 'Analysis not found'})}\n\n"
|
||||
return
|
||||
|
||||
# Subscribe to updates
|
||||
queue = manager.subscribe()
|
||||
|
||||
# Properly handle event generation
|
||||
progress_mgr: Optional[AnalysisProgressManager] = None
|
||||
subscriber_queue: Optional[asyncio.Queue] = None
|
||||
try:
|
||||
# Send historical events first
|
||||
history = await manager.get_progress_history()
|
||||
for event in history:
|
||||
if await request.is_disconnected():
|
||||
break
|
||||
yield f"data: {json.dumps(event)}\n\n"
|
||||
progress_mgr = get_progress_manager(analysis_id)
|
||||
|
||||
# Stream new events
|
||||
# Make sure Redis connection exists so we can replay history
|
||||
if progress_mgr.redis_client is None:
|
||||
await progress_mgr.connect_redis()
|
||||
|
||||
# Replay cached history first
|
||||
history = await progress_mgr.get_progress_history()
|
||||
for event in history:
|
||||
yield f"data: {json.dumps(event)}\n\n"
|
||||
|
||||
# Subscribe to new events
|
||||
subscriber_queue = progress_mgr.subscribe()
|
||||
|
||||
while True:
|
||||
if await request.is_disconnected():
|
||||
event = await subscriber_queue.get()
|
||||
yield f"data: {json.dumps(event)}\n\n"
|
||||
if event.get("event") in ("analysis_completed", "analysis_error"):
|
||||
break
|
||||
|
||||
try:
|
||||
# Wait for next event with timeout
|
||||
event = await asyncio.wait_for(queue.get(), timeout=30.0)
|
||||
yield f"data: {json.dumps(event)}\n\n"
|
||||
|
||||
# If analysis completed, close stream
|
||||
if event.get('event') in ['analysis_completed', 'analysis_error']:
|
||||
break
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
# Send keepalive ping
|
||||
yield f": keepalive\n\n"
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
error_payload = {"error": str(e)}
|
||||
yield f"data: {json.dumps(error_payload)}\n\n"
|
||||
finally:
|
||||
manager.unsubscribe(queue)
|
||||
if progress_mgr and subscriber_queue:
|
||||
progress_mgr.unsubscribe(subscriber_queue)
|
||||
|
||||
return StreamingResponse(
|
||||
event_generator(),
|
||||
media_type="text/event-stream",
|
||||
headers={
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"X-Accel-Buffering": "no" # Disable nginx buffering
|
||||
}
|
||||
)
|
||||
return StreamingResponse(event_generator(), media_type="text/event-stream")
|
||||
|
||||
@app.post("/analyze")
|
||||
async def analyze_repository(request: AnalysisRequest, background_tasks: BackgroundTasks):
|
||||
@ -3190,187 +3200,32 @@ async def analyze_files_smart_batch(files_batch: List[Tuple[str, str]], reposito
|
||||
|
||||
async def store_chunk_analysis_in_memory(chunk: Dict, file_analyses: List, chunk_analysis: Dict, repository_id: str, session_id: str = None, analysis_state: Optional[Dict] = None):
|
||||
"""
|
||||
Store detailed chunk-level analysis in episodic memory (MongoDB).
|
||||
Creates one record per chunk with comprehensive analysis data.
|
||||
Now includes progressive context (Option 3: Hybrid Approach).
|
||||
Store chunk analysis in memory using either Neo4j Knowledge Graph or Episodic Memory.
|
||||
Supports fallback mechanisms for robust storage.
|
||||
"""
|
||||
try:
|
||||
if not analyzer or not hasattr(analyzer, 'memory_manager'):
|
||||
print("⚠️ [MEMORY] Memory manager not available, skipping chunk storage")
|
||||
return
|
||||
|
||||
# Get session ID from analyzer
|
||||
if not session_id:
|
||||
session_id = getattr(analyzer, 'session_id', str(uuid.uuid4()))
|
||||
|
||||
chunk_id = chunk.get('id', 'unknown')
|
||||
chunk_name = chunk.get('name', 'unknown')
|
||||
chunk_type = chunk.get('chunk_type', 'module')
|
||||
chunk_priority = chunk.get('priority', 2)
|
||||
dependencies = chunk.get('context_dependencies', [])
|
||||
|
||||
# Calculate chunk metrics
|
||||
# Validate input parameters
|
||||
if not chunk or not file_analyses:
|
||||
print("❌ [MEMORY] Invalid chunk or file_analyses")
|
||||
return None
|
||||
|
||||
# Extract necessary variables (these were missing in the original implementation)
|
||||
chunk_name = chunk.get('name', 'Unknown Chunk')
|
||||
chunk_type = chunk.get('type', 'generic')
|
||||
chunk_priority = chunk.get('priority', 5)
|
||||
total_files = len(file_analyses)
|
||||
total_lines = sum(fa.lines_of_code for fa in file_analyses if fa.lines_of_code is not None)
|
||||
total_issues = sum(len(fa.issues_found) if isinstance(fa.issues_found, (list, tuple)) else 0 for fa in file_analyses)
|
||||
total_recommendations = sum(len(fa.recommendations) if isinstance(fa.recommendations, (list, tuple)) else 0 for fa in file_analyses)
|
||||
|
||||
# Calculate quality distribution
|
||||
high_quality = len([fa for fa in file_analyses if fa.severity_score >= 8])
|
||||
medium_quality = len([fa for fa in file_analyses if 5 <= fa.severity_score < 8])
|
||||
low_quality = len([fa for fa in file_analyses if fa.severity_score < 5])
|
||||
|
||||
# Get module quality score from chunk_analysis or calculate from files
|
||||
module_quality = chunk_analysis.get('module_quality_score',
|
||||
sum(fa.severity_score for fa in file_analyses if fa.severity_score is not None) / total_files if total_files > 0 else 5.0)
|
||||
|
||||
# Build comprehensive AI response text with CODE EVIDENCE
|
||||
# FIX: Convert all values to strings immediately to prevent TypeError
|
||||
module_overview = chunk_analysis.get('module_overview', f"Analysis of {chunk_name} module")
|
||||
if isinstance(module_overview, dict):
|
||||
module_overview = json.dumps(module_overview, indent=2)
|
||||
else:
|
||||
module_overview = str(module_overview)
|
||||
|
||||
# Extract code evidence from file analyses for concrete proof in reports
|
||||
try:
|
||||
code_evidence = extract_code_evidence_from_files(file_analyses)
|
||||
print(f" 📸 Extracted {len(code_evidence)} evidence items")
|
||||
except Exception as e:
|
||||
print(f" ⚠️ Code evidence extraction failed: {e}")
|
||||
code_evidence = []
|
||||
|
||||
module_architecture = chunk_analysis.get('module_architecture', 'Architecture analysis in progress')
|
||||
if isinstance(module_architecture, dict):
|
||||
module_architecture = json.dumps(module_architecture, indent=2)
|
||||
else:
|
||||
module_architecture = str(module_architecture)
|
||||
|
||||
module_security = chunk_analysis.get('module_security_assessment', 'Security assessment in progress')
|
||||
if isinstance(module_security, dict):
|
||||
module_security = json.dumps(module_security, indent=2)
|
||||
else:
|
||||
module_security = str(module_security)
|
||||
|
||||
ai_response_parts = [
|
||||
f"# COMPREHENSIVE ANALYSIS: {chunk_name.upper()}",
|
||||
f"Chunk ID: {chunk_id}",
|
||||
f"Chunk Type: {chunk_type}",
|
||||
"",
|
||||
f"## MODULE OVERVIEW",
|
||||
module_overview,
|
||||
"",
|
||||
f"## MODULE METRICS",
|
||||
f"- Module Quality Score: {module_quality:.1f}/10",
|
||||
f"- Total Files: {total_files}",
|
||||
f"- Total Lines of Code: {total_lines:,}",
|
||||
f"- Total Issues: {total_issues}",
|
||||
f"- Total Recommendations: {total_recommendations}",
|
||||
f"- High Quality Files (Score >= 8): {high_quality}",
|
||||
f"- Medium Quality Files (Score 5-7): {medium_quality}",
|
||||
f"- Low Quality Files (Score < 5): {low_quality}",
|
||||
"",
|
||||
f"## ARCHITECTURE ASSESSMENT",
|
||||
module_architecture,
|
||||
"",
|
||||
f"## SECURITY ASSESSMENT",
|
||||
module_security,
|
||||
"",
|
||||
f"## MODULE RECOMMENDATIONS",
|
||||
]
|
||||
|
||||
module_recs = chunk_analysis.get('module_recommendations', [])
|
||||
if module_recs:
|
||||
for rec in module_recs:
|
||||
# Handle both string and dict recommendations
|
||||
if isinstance(rec, dict):
|
||||
rec_text = rec.get('text', str(rec.get('recommendation', '')))[:200]
|
||||
else:
|
||||
rec_text = str(rec)
|
||||
ai_response_parts.append(f"- {rec_text}")
|
||||
else:
|
||||
ai_response_parts.append("- Review module structure")
|
||||
|
||||
ai_response_parts.extend([
|
||||
"",
|
||||
"## CODE EVIDENCE & FINDINGS",
|
||||
""
|
||||
])
|
||||
|
||||
# Add code evidence section
|
||||
if code_evidence:
|
||||
ai_response_parts.append("### SPECIFIC CODE ISSUES WITH EVIDENCE:")
|
||||
for evidence in code_evidence[:10]: # Top 10 most critical
|
||||
ai_response_parts.extend([
|
||||
f"**File:** {evidence['file']}",
|
||||
f"**Issue:** {evidence['issue']}",
|
||||
f"**Line {evidence['line_number']}:**",
|
||||
"```" + evidence['language'],
|
||||
evidence['code_snippet'],
|
||||
"```",
|
||||
f"**Recommendation:** {evidence['recommendation']}",
|
||||
""
|
||||
])
|
||||
|
||||
ai_response_parts.extend([
|
||||
"",
|
||||
"## FILE-LEVEL ANALYSIS SUMMARY",
|
||||
""
|
||||
])
|
||||
|
||||
# Add detailed file analyses
|
||||
for fa in file_analyses:
|
||||
ai_response_parts.extend([
|
||||
f"### {fa.path}",
|
||||
f"- Language: {fa.language}",
|
||||
f"- Lines of Code: {fa.lines_of_code}",
|
||||
f"- Quality Score: {fa.severity_score:.1f}/10",
|
||||
f"- Complexity Score: {fa.complexity_score:.1f}/10",
|
||||
f"- Issues: {len(fa.issues_found) if isinstance(fa.issues_found, (list, tuple)) else 0}",
|
||||
""
|
||||
])
|
||||
|
||||
if fa.issues_found:
|
||||
ai_response_parts.append("**Issues Found:**")
|
||||
for issue in fa.issues_found[:5]: # Top 5 issues
|
||||
# Handle both string and dict issues
|
||||
if isinstance(issue, dict):
|
||||
issue_text = issue.get('title', str(issue.get('description', '')))[:200]
|
||||
else:
|
||||
issue_text = str(issue)
|
||||
ai_response_parts.append(f"- {issue_text}")
|
||||
ai_response_parts.append("")
|
||||
|
||||
if fa.recommendations:
|
||||
ai_response_parts.append("**Recommendations:**")
|
||||
for rec in fa.recommendations[:5]: # Top 5 recommendations
|
||||
# Handle both string and dict recommendations
|
||||
if isinstance(rec, dict):
|
||||
rec_text = rec.get('text', str(rec.get('recommendation', '')))[:200]
|
||||
else:
|
||||
rec_text = str(rec)
|
||||
ai_response_parts.append(f"- {rec_text}")
|
||||
ai_response_parts.append("")
|
||||
|
||||
if fa.detailed_analysis:
|
||||
# Ensure detailed_analysis is a string, not a dict
|
||||
detailed_analysis_text = str(fa.detailed_analysis) if not isinstance(fa.detailed_analysis, str) else fa.detailed_analysis
|
||||
ai_response_parts.extend([
|
||||
"**Detailed Analysis:**",
|
||||
detailed_analysis_text,
|
||||
""
|
||||
])
|
||||
|
||||
# Final safety check: Convert all items to strings before joining
|
||||
total_lines = sum(fa.lines_of_code for fa in file_analyses)
|
||||
dependencies = chunk.get('dependencies', [])
|
||||
module_quality = chunk_analysis.get('module_quality', 5.0)
|
||||
total_issues = sum(len(fa.issues_found) for fa in file_analyses)
|
||||
total_recommendations = sum(len(fa.recommendations) for fa in file_analyses)
|
||||
high_quality = len([fa for fa in file_analyses if fa.complexity_score and fa.complexity_score <= 3])
|
||||
medium_quality = len([fa for fa in file_analyses if fa.complexity_score and 3 < fa.complexity_score <= 7])
|
||||
low_quality = len([fa for fa in file_analyses if fa.complexity_score and fa.complexity_score > 7])
|
||||
|
||||
# Prepare AI response (this was also missing)
|
||||
ai_response_parts_clean = []
|
||||
for item in ai_response_parts:
|
||||
if isinstance(item, dict):
|
||||
# Convert dict to JSON string (json is already imported at module level)
|
||||
ai_response_parts_clean.append(json.dumps(item, indent=2))
|
||||
elif isinstance(item, (list, tuple)):
|
||||
# Convert list/tuple to string representation
|
||||
ai_response_parts_clean.append(str(item))
|
||||
else:
|
||||
for item in chunk_analysis.get('ai_response_parts', []):
|
||||
ai_response_parts_clean.append(str(item))
|
||||
|
||||
ai_response = "\n".join(ai_response_parts_clean)
|
||||
@ -3379,105 +3234,69 @@ async def store_chunk_analysis_in_memory(chunk: Dict, file_analyses: List, chunk
|
||||
file_names = [fa.path for fa in file_analyses]
|
||||
user_query = f"Analysis of chunk: {chunk_name} ({chunk_type}) - {total_files} files: {', '.join(file_names[:5])}{'...' if len(file_names) > 5 else ''}"
|
||||
|
||||
# Prepare file analyses data for storage (OPTIMIZATION: Store only paths, not content)
|
||||
# IMPORTANT: Never store file content in episodic memory to save storage space
|
||||
# Prepare file analyses data for storage
|
||||
file_analyses_data = []
|
||||
for fa in file_analyses:
|
||||
file_data = {
|
||||
'file_path': str(fa.path), # Only store path, not content
|
||||
'file_path': str(fa.path),
|
||||
'language': fa.language,
|
||||
'lines_of_code': fa.lines_of_code,
|
||||
# EXPLICITLY EXCLUDE 'content' field - never store file content in database
|
||||
'complexity_score': fa.complexity_score,
|
||||
'severity_score': fa.severity_score,
|
||||
'issues_found': fa.issues_found if isinstance(fa.issues_found, (list, tuple)) else [],
|
||||
'recommendations': fa.recommendations if isinstance(fa.recommendations, (list, tuple)) else [],
|
||||
'detailed_analysis': fa.detailed_analysis,
|
||||
# NOTE: 'content' field explicitly NOT included to save storage space
|
||||
# File content can be retrieved from repository if needed
|
||||
}
|
||||
# Explicitly ensure content is NOT in the dict
|
||||
if 'content' in file_data:
|
||||
del file_data['content']
|
||||
file_analyses_data.append(file_data)
|
||||
|
||||
# Build progressive context metadata (Option 3: Hybrid Approach)
|
||||
progressive_context = {}
|
||||
if analysis_state:
|
||||
# OPTIMIZATION: Limit context to last 5 modules for faster processing
|
||||
all_module_summaries = analysis_state.get('module_summaries', {})
|
||||
modules_analyzed = analysis_state.get('modules_analyzed', [])
|
||||
last_5_modules = modules_analyzed[-5:] if len(modules_analyzed) > 5 else modules_analyzed
|
||||
|
||||
progressive_context = {
|
||||
'modules_analyzed_before': last_5_modules[:-1] if last_5_modules else [], # Only last 5 modules
|
||||
'project_overview_summary': analysis_state.get('project_overview', '')[:300] if analysis_state.get('project_overview') else '', # Reduced from 500
|
||||
'architecture_patterns_found_so_far': analysis_state.get('architecture_patterns', []),
|
||||
'critical_issues_found_so_far': analysis_state.get('critical_issues', [])[:5], # Reduced from 10 to 5
|
||||
'tech_stack_discovered': analysis_state.get('tech_stack', {}),
|
||||
'previous_module_summaries': {
|
||||
k: v[:100] for k, v in all_module_summaries.items() # Reduced from 200 to 100 chars
|
||||
if k != chunk_name and k in last_5_modules # Only last 5 modules
|
||||
}
|
||||
}
|
||||
|
||||
# Get run_id from analyzer if available (for hierarchical storage compatibility)
|
||||
# Get run_id
|
||||
run_id = getattr(analyzer, 'run_id', None)
|
||||
if not run_id:
|
||||
# Try to extract from session_id or generate
|
||||
run_id = f"repo_analysis_{repository_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
||||
|
||||
# Build comprehensive metadata
|
||||
metadata = {
|
||||
'type': 'module_analysis', # IMPORTANT: Mark as module_analysis for retrieval
|
||||
'run_id': run_id, # IMPORTANT: Include run_id for retrieval
|
||||
'chunk_id': chunk_id,
|
||||
'type': 'module_analysis',
|
||||
'run_id': run_id,
|
||||
'chunk_name': chunk_name,
|
||||
'chunk_type': chunk_type,
|
||||
'chunk_priority': chunk_priority,
|
||||
'module_name': chunk_name if chunk_type == 'module' else None,
|
||||
'total_files_in_chunk': total_files,
|
||||
'total_lines_in_chunk': total_lines,
|
||||
'chunk_token_count': estimate_tokens(chunk.get('files', [])),
|
||||
'context_dependencies': dependencies,
|
||||
'repository_id': repository_id,
|
||||
'analysis_type': 'intelligent_chunking',
|
||||
|
||||
# NEW: Progressive Context (Option 3)
|
||||
'progressive_context': progressive_context,
|
||||
|
||||
# Chunk metrics
|
||||
'total_files_in_chunk': total_files,
|
||||
'chunk_metrics': {
|
||||
'average_quality_score': module_quality,
|
||||
'total_issues': total_issues,
|
||||
'total_recommendations': total_recommendations,
|
||||
'average_complexity': sum(fa.complexity_score for fa in file_analyses if fa.complexity_score is not None) / total_files if total_files > 0 else 5.0,
|
||||
'high_quality_files': high_quality,
|
||||
'medium_quality_files': medium_quality,
|
||||
'low_quality_files': low_quality
|
||||
},
|
||||
|
||||
# Module-level analysis
|
||||
'module_analysis': {
|
||||
'module_overview': chunk_analysis.get('module_overview', ''),
|
||||
'module_architecture': chunk_analysis.get('module_architecture', ''),
|
||||
'module_security_assessment': chunk_analysis.get('module_security_assessment', ''),
|
||||
'module_recommendations': chunk_analysis.get('module_recommendations', [])
|
||||
},
|
||||
|
||||
# Dependencies
|
||||
'dependencies': {
|
||||
'depends_on_chunks': dependencies,
|
||||
'imports_from': [] # Can be enhanced with actual import analysis
|
||||
},
|
||||
|
||||
# File analyses (detailed)
|
||||
'file_analyses': file_analyses_data
|
||||
}
|
||||
|
||||
# Store in episodic memory
|
||||
print(f" 💾 Storing {chunk_name} in episodic memory...")
|
||||
print(f" 📊 Metadata type: {metadata.get('type')}, Run ID: {metadata.get('run_id')[:30]}...")
|
||||
# Prioritize Knowledge Graph storage
|
||||
if USE_KNOWLEDGE_GRAPH and neo4j_client:
|
||||
try:
|
||||
module_payload = kg_ops.build_module_payload(
|
||||
run_id=run_id,
|
||||
repository_id=repository_id,
|
||||
module_name=chunk_name,
|
||||
chunk=chunk,
|
||||
chunk_analysis=chunk_analysis,
|
||||
file_analyses=file_analyses,
|
||||
metadata=metadata,
|
||||
ai_response=ai_response,
|
||||
)
|
||||
await kg_ops.store_module_analysis(
|
||||
client=neo4j_client,
|
||||
run_id=run_id,
|
||||
repository_id=repository_id,
|
||||
module_payload=module_payload,
|
||||
)
|
||||
print(f" ✅ Stored in Neo4j knowledge graph (module: {chunk_name})")
|
||||
return module_payload["module_props"]["module_id"]
|
||||
except Exception as kg_error:
|
||||
print(f" ⚠️ Failed to store module in knowledge graph: {kg_error}. Falling back to episodic memory.")
|
||||
|
||||
# Fallback to Episodic Memory
|
||||
try:
|
||||
memory_id = await analyzer.memory_manager.store_episodic_memory(
|
||||
session_id=session_id,
|
||||
@ -3487,27 +3306,12 @@ async def store_chunk_analysis_in_memory(chunk: Dict, file_analyses: List, chunk
|
||||
metadata=metadata
|
||||
)
|
||||
print(f" ✅ Stored in episodic memory with ID: {memory_id}")
|
||||
return memory_id
|
||||
except Exception as memory_error:
|
||||
print(f" ❌ Failed to store in episodic memory: {memory_error}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
raise
|
||||
|
||||
# Option 3: Also store/update cumulative analysis_state record
|
||||
if analysis_state:
|
||||
try:
|
||||
await store_cumulative_analysis_state(
|
||||
session_id=session_id,
|
||||
repository_id=repository_id,
|
||||
analysis_state=analysis_state,
|
||||
chunk_sequence=len(analysis_state.get('modules_analyzed', []))
|
||||
)
|
||||
print(f" ✅ Cumulative state stored")
|
||||
except Exception as state_error:
|
||||
print(f" ⚠️ Failed to store cumulative state: {state_error}")
|
||||
|
||||
print(f"✅ [MEMORY] Stored chunk analysis: {chunk_name} (ID: {memory_id})")
|
||||
return memory_id
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ [MEMORY] Failed to store chunk analysis: {e}")
|
||||
@ -3521,8 +3325,16 @@ async def store_cumulative_analysis_state(session_id: str, repository_id: str, a
|
||||
This provides a single source of truth for the current analysis state.
|
||||
"""
|
||||
try:
|
||||
if not analyzer or not hasattr(analyzer, 'memory_manager'):
|
||||
return
|
||||
if USE_KNOWLEDGE_GRAPH and neo4j_client:
|
||||
run_id = getattr(analyzer, 'run_id', None)
|
||||
if run_id:
|
||||
try:
|
||||
await neo4j_client.upsert_run(run_id=run_id, repository_id=repository_id)
|
||||
await kg_ops.store_analysis_state(neo4j_client, run_id, analysis_state)
|
||||
print(f" ✅ Knowledge graph analysis state updated (chunk {chunk_sequence})")
|
||||
return
|
||||
except Exception as kg_error:
|
||||
print(f" ⚠️ Failed to update knowledge graph state: {kg_error}")
|
||||
|
||||
user_query = f"Repository Analysis State - After Chunk {chunk_sequence}"
|
||||
|
||||
@ -3872,7 +3684,7 @@ async def store_findings_postgresql(
|
||||
"""Store structured findings in PostgreSQL for efficient querying."""
|
||||
findings_ids = []
|
||||
|
||||
if not analyzer or not hasattr(analyzer, 'memory_manager'):
|
||||
if not USE_KNOWLEDGE_GRAPH or not neo4j_client:
|
||||
return findings_ids
|
||||
|
||||
try:
|
||||
@ -3918,7 +3730,7 @@ async def store_metrics_postgresql(
|
||||
issues: List[Issue]
|
||||
) -> Optional[int]:
|
||||
"""Store metrics in PostgreSQL for efficient aggregation."""
|
||||
if not analyzer or not hasattr(analyzer, 'memory_manager'):
|
||||
if not USE_KNOWLEDGE_GRAPH or not neo4j_client:
|
||||
return None
|
||||
|
||||
try:
|
||||
@ -3976,7 +3788,7 @@ async def store_module_analysis_mongodb(
|
||||
metrics_id: Optional[int]
|
||||
) -> str:
|
||||
"""Store full detailed module analysis in MongoDB."""
|
||||
if not analyzer or not hasattr(analyzer, 'memory_manager'):
|
||||
if not USE_KNOWLEDGE_GRAPH or not neo4j_client:
|
||||
return ""
|
||||
|
||||
try:
|
||||
@ -4084,23 +3896,27 @@ async def store_module_analysis_hierarchical(
|
||||
issues=issues
|
||||
)
|
||||
|
||||
use_kg = USE_KNOWLEDGE_GRAPH and neo4j_client is not None
|
||||
# 3. Store full analysis in MongoDB (detailed context)
|
||||
mongo_id = await store_module_analysis_mongodb(
|
||||
module_id=module_id,
|
||||
module_name=module_name,
|
||||
chunk=chunk,
|
||||
chunk_analysis=chunk_analysis,
|
||||
file_analyses=file_analyses,
|
||||
architecture=architecture,
|
||||
security=security,
|
||||
code_quality=code_quality,
|
||||
issues=issues,
|
||||
repository_id=repository_id,
|
||||
run_id=run_id,
|
||||
session_id=session_id,
|
||||
findings_ids=findings_ids,
|
||||
metrics_id=metrics_id
|
||||
)
|
||||
if use_kg:
|
||||
mongo_id = ""
|
||||
else:
|
||||
mongo_id = await store_module_analysis_mongodb(
|
||||
module_id=module_id,
|
||||
module_name=module_name,
|
||||
chunk=chunk,
|
||||
chunk_analysis=chunk_analysis,
|
||||
file_analyses=file_analyses,
|
||||
architecture=architecture,
|
||||
security=security,
|
||||
code_quality=code_quality,
|
||||
issues=issues,
|
||||
repository_id=repository_id,
|
||||
run_id=run_id,
|
||||
session_id=session_id,
|
||||
findings_ids=findings_ids,
|
||||
metrics_id=metrics_id
|
||||
)
|
||||
|
||||
return mongo_id, findings_ids, metrics_id
|
||||
|
||||
@ -4110,7 +3926,7 @@ async def store_module_analysis_hierarchical(
|
||||
|
||||
async def get_findings_by_module(run_id: str, module_name: Optional[str] = None) -> List[Dict]:
|
||||
"""Get findings by module from PostgreSQL (efficient query)."""
|
||||
if not analyzer or not hasattr(analyzer, 'memory_manager'):
|
||||
if not USE_KNOWLEDGE_GRAPH or not neo4j_client:
|
||||
return []
|
||||
|
||||
try:
|
||||
@ -4160,7 +3976,7 @@ async def get_findings_by_module(run_id: str, module_name: Optional[str] = None)
|
||||
|
||||
async def get_metrics_by_module(run_id: str, module_name: Optional[str] = None) -> List[Dict]:
|
||||
"""Get metrics by module from PostgreSQL (efficient aggregation)."""
|
||||
if not analyzer or not hasattr(analyzer, 'memory_manager'):
|
||||
if not USE_KNOWLEDGE_GRAPH or not neo4j_client:
|
||||
return []
|
||||
|
||||
try:
|
||||
@ -4196,7 +4012,7 @@ async def get_metrics_by_module(run_id: str, module_name: Optional[str] = None)
|
||||
|
||||
async def get_security_findings(run_id: str, severity_filter: Optional[str] = None) -> List[Dict]:
|
||||
"""Get security findings from PostgreSQL (efficient query)."""
|
||||
if not analyzer or not hasattr(analyzer, 'memory_manager'):
|
||||
if not USE_KNOWLEDGE_GRAPH or not neo4j_client:
|
||||
return []
|
||||
|
||||
try:
|
||||
@ -4245,7 +4061,7 @@ async def get_security_findings(run_id: str, severity_filter: Optional[str] = No
|
||||
|
||||
async def get_module_analysis_from_mongodb(run_id: str, module_name: str) -> Optional[Dict]:
|
||||
"""Get full detailed module analysis from MongoDB."""
|
||||
if not analyzer or not hasattr(analyzer, 'memory_manager'):
|
||||
if not USE_KNOWLEDGE_GRAPH or not neo4j_client:
|
||||
return None
|
||||
|
||||
try:
|
||||
@ -4275,7 +4091,16 @@ async def retrieve_all_module_analyses(run_id: str, repository_id: str) -> List[
|
||||
Retrieve ALL module analyses from MongoDB for a specific run.
|
||||
Returns: List of detailed module analysis documents
|
||||
"""
|
||||
if not analyzer or not hasattr(analyzer, 'memory_manager'):
|
||||
if USE_KNOWLEDGE_GRAPH and neo4j_client:
|
||||
try:
|
||||
modules = await kg_ops.fetch_module_analyses(neo4j_client, run_id)
|
||||
print(f" ✅ Retrieved {len(modules)} modules from knowledge graph")
|
||||
return modules
|
||||
except Exception as kg_error:
|
||||
print(f"⚠️ [REPORT] Failed to retrieve modules from knowledge graph: {kg_error}")
|
||||
return []
|
||||
|
||||
if not USE_KNOWLEDGE_GRAPH or not neo4j_client:
|
||||
return []
|
||||
|
||||
try:
|
||||
@ -4340,7 +4165,19 @@ async def retrieve_synthesis_analysis(run_id: str, repository_id: str) -> Option
|
||||
Retrieve synthesis analysis from MongoDB.
|
||||
Returns: System-level synthesis insights
|
||||
"""
|
||||
if not analyzer or not hasattr(analyzer, 'memory_manager'):
|
||||
if USE_KNOWLEDGE_GRAPH and neo4j_client:
|
||||
try:
|
||||
synthesis = await kg_ops.fetch_synthesis(neo4j_client, run_id)
|
||||
if synthesis:
|
||||
print(" ✅ Found synthesis analysis in knowledge graph")
|
||||
else:
|
||||
print(" ⚠️ Synthesis analysis not found in knowledge graph")
|
||||
return synthesis
|
||||
except Exception as kg_error:
|
||||
print(f"⚠️ [REPORT] Failed to retrieve synthesis from knowledge graph: {kg_error}")
|
||||
return None
|
||||
|
||||
if not USE_KNOWLEDGE_GRAPH or not neo4j_client:
|
||||
return None
|
||||
|
||||
try:
|
||||
@ -4373,7 +4210,19 @@ async def retrieve_cumulative_analysis_state(run_id: str, repository_id: str, se
|
||||
Retrieve cumulative analysis state (progressive context).
|
||||
Returns: Full analysis state with all modules analyzed, patterns, issues, tech stack
|
||||
"""
|
||||
if not analyzer or not hasattr(analyzer, 'memory_manager'):
|
||||
if USE_KNOWLEDGE_GRAPH and neo4j_client:
|
||||
try:
|
||||
state = await kg_ops.fetch_run_state(neo4j_client, run_id)
|
||||
if state:
|
||||
print(" ✅ Retrieved analysis state from knowledge graph")
|
||||
else:
|
||||
print(" ⚠️ Analysis state not found in knowledge graph")
|
||||
return state
|
||||
except Exception as kg_error:
|
||||
print(f"⚠️ [REPORT] Failed to fetch analysis state from knowledge graph: {kg_error}")
|
||||
return None
|
||||
|
||||
if not USE_KNOWLEDGE_GRAPH or not neo4j_client:
|
||||
return None
|
||||
|
||||
try:
|
||||
@ -4445,21 +4294,22 @@ async def retrieve_comprehensive_report_context(
|
||||
"""
|
||||
print(f"📊 [REPORT] Retrieving comprehensive context for run_id: {run_id}")
|
||||
|
||||
# 1. Retrieve all module analyses (MongoDB)
|
||||
print(" → Fetching all module analyses from MongoDB...")
|
||||
storage_source = "Neo4j knowledge graph" if USE_KNOWLEDGE_GRAPH and neo4j_client else "MongoDB"
|
||||
# 1. Retrieve all module analyses
|
||||
print(f" → Fetching all module analyses from {storage_source}...")
|
||||
module_analyses = await retrieve_all_module_analyses(run_id, repository_id)
|
||||
print(f" ✓ Found {len(module_analyses)} modules")
|
||||
|
||||
# 2. Retrieve synthesis analysis (MongoDB)
|
||||
print(" → Fetching synthesis analysis from MongoDB...")
|
||||
# 2. Retrieve synthesis analysis
|
||||
print(f" → Fetching synthesis analysis from {storage_source}...")
|
||||
synthesis_analysis = await retrieve_synthesis_analysis(run_id, repository_id)
|
||||
if synthesis_analysis:
|
||||
print(" ✓ Found synthesis analysis")
|
||||
else:
|
||||
print(" ⚠️ No synthesis analysis found")
|
||||
|
||||
# 3. Retrieve cumulative analysis state (MongoDB)
|
||||
print(" → Fetching cumulative analysis state from MongoDB...")
|
||||
# 3. Retrieve cumulative analysis state
|
||||
print(f" → Fetching cumulative analysis state from {storage_source}...")
|
||||
analysis_state = await retrieve_cumulative_analysis_state(run_id, repository_id, session_id)
|
||||
if analysis_state:
|
||||
print(" ✓ Found cumulative analysis state")
|
||||
@ -4872,107 +4722,45 @@ async def store_synthesis_analysis_in_memory(
|
||||
session_id: str,
|
||||
analysis_state: Dict
|
||||
) -> Optional[str]:
|
||||
"""Store synthesis analysis in episodic memory."""
|
||||
"""
|
||||
Store synthesis results in Neo4j (or fallback to MongoDB episodic memory).
|
||||
"""
|
||||
try:
|
||||
if not analyzer or not hasattr(analyzer, 'memory_manager'):
|
||||
print("⚠️ [MEMORY] Memory manager not available, skipping synthesis storage")
|
||||
return None
|
||||
|
||||
# Build comprehensive AI response text
|
||||
ai_response_parts = [
|
||||
"# CROSS-MODULE SYNTHESIS ANALYSIS",
|
||||
"",
|
||||
"## SYSTEM-LEVEL ARCHITECTURE PATTERNS",
|
||||
]
|
||||
|
||||
patterns = synthesis_analysis.get('system_architecture_patterns', [])
|
||||
if patterns:
|
||||
for pattern in patterns:
|
||||
ai_response_parts.append(f"- {pattern}")
|
||||
else:
|
||||
ai_response_parts.append("- No system-level patterns identified")
|
||||
|
||||
ai_response_parts.extend([
|
||||
"",
|
||||
"## CROSS-CUTTING ISSUES",
|
||||
])
|
||||
|
||||
cross_cutting = synthesis_analysis.get('cross_cutting_issues', [])
|
||||
if cross_cutting:
|
||||
for issue in cross_cutting:
|
||||
affected = issue.get('affected_modules', [])
|
||||
severity = issue.get('severity', 'medium')
|
||||
ai_response_parts.append(f"- **{severity.upper()}**: {issue.get('issue', '')} (Affects: {', '.join(affected)})")
|
||||
else:
|
||||
ai_response_parts.append("- No cross-cutting issues identified")
|
||||
|
||||
ai_response_parts.extend([
|
||||
"",
|
||||
"## SYSTEM-WIDE RISKS",
|
||||
])
|
||||
|
||||
risks = synthesis_analysis.get('system_wide_risks', [])
|
||||
if risks:
|
||||
for risk in risks:
|
||||
severity = risk.get('severity', 'medium')
|
||||
ai_response_parts.append(f"- **{severity.upper()}**: {risk.get('risk', '')} - {risk.get('impact', '')}")
|
||||
else:
|
||||
ai_response_parts.append("- No system-wide risks identified")
|
||||
|
||||
ai_response_parts.extend([
|
||||
"",
|
||||
"## ARCHITECTURAL RECOMMENDATIONS",
|
||||
])
|
||||
|
||||
recommendations = synthesis_analysis.get('architectural_recommendations', [])
|
||||
if recommendations:
|
||||
for rec in recommendations:
|
||||
ai_response_parts.append(f"- {rec}")
|
||||
else:
|
||||
ai_response_parts.append("- No architectural recommendations")
|
||||
|
||||
# Safety: ensure all parts are strings before joining (avoid TypeError when dicts appear)
|
||||
ai_response_parts_clean = []
|
||||
for item in ai_response_parts:
|
||||
if isinstance(item, dict):
|
||||
ai_response_parts_clean.append(json.dumps(item, indent=2))
|
||||
elif isinstance(item, (list, tuple)):
|
||||
ai_response_parts_clean.append(str(item))
|
||||
else:
|
||||
ai_response_parts_clean.append(str(item))
|
||||
ai_response = "\n".join(ai_response_parts_clean)
|
||||
|
||||
user_query = f"Cross-Module Synthesis Analysis for repository {repository_id}"
|
||||
|
||||
# Get run_id from analyzer for proper retrieval
|
||||
run_id = getattr(analyzer, 'run_id', None)
|
||||
if not run_id:
|
||||
run_id = f"repo_analysis_{repository_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
||||
|
||||
metadata = {
|
||||
'type': 'synthesis_analysis',
|
||||
'run_id': run_id, # CRITICAL: Store run_id in metadata for retrieval
|
||||
'repository_id': repository_id,
|
||||
'synthesis_analysis': synthesis_analysis,
|
||||
'modules_analyzed': analysis_state.get('modules_analyzed', []),
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
memory_id = await analyzer.memory_manager.store_episodic_memory(
|
||||
session_id=session_id,
|
||||
user_query=user_query,
|
||||
ai_response=ai_response,
|
||||
repo_context=repository_id,
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
print(f"💾 [MEMORY] Stored synthesis analysis in episodic memory (ID: {memory_id})")
|
||||
return memory_id
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ [MEMORY] Failed to store synthesis analysis: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return None
|
||||
|
||||
if USE_KNOWLEDGE_GRAPH and neo4j_client:
|
||||
try:
|
||||
await kg_ops.store_analysis_state(neo4j_client, run_id, analysis_state)
|
||||
await kg_ops.store_synthesis(neo4j_client, run_id, synthesis_analysis)
|
||||
print("✅ [MEMORY] Stored synthesis analysis in Neo4j knowledge graph")
|
||||
return run_id
|
||||
except Exception as kg_error:
|
||||
print(f"⚠️ [MEMORY] Failed to store synthesis in Neo4j: {kg_error}")
|
||||
|
||||
if analyzer and hasattr(analyzer, 'memory_manager'):
|
||||
try:
|
||||
memory_id = await analyzer.memory_manager.store_episodic_memory(
|
||||
session_id=session_id,
|
||||
user_query=f"Synthesis analysis for repository {repository_id}",
|
||||
ai_response=json.dumps(synthesis_analysis),
|
||||
repo_context=repository_id,
|
||||
metadata={
|
||||
"type": "synthesis_analysis",
|
||||
"run_id": run_id,
|
||||
"analysis_state": analysis_state,
|
||||
}
|
||||
)
|
||||
print(f"✅ [MEMORY] Stored synthesis analysis in episodic memory (ID: {memory_id})")
|
||||
return memory_id
|
||||
except Exception as episodic_error:
|
||||
print(f"⚠️ [MEMORY] Failed to store synthesis in episodic memory: {episodic_error}")
|
||||
|
||||
return None
|
||||
|
||||
except Exception as err:
|
||||
print(f"❌ [MEMORY] Error storing synthesis analysis: {err}")
|
||||
return None
|
||||
|
||||
# ============================================================================
|
||||
@ -4996,6 +4784,14 @@ def build_report_generation_prompt(
|
||||
"generate a comprehensive, structured analysis report based on detailed module analyses",
|
||||
"and system-level synthesis insights.",
|
||||
"",
|
||||
"## REPORT STYLE REQUIREMENTS",
|
||||
"",
|
||||
"- Maintain a professional, technical tone.",
|
||||
"- Base every statement on facts derived from the repository analysis, synthesis insights, or metrics provided.",
|
||||
"- Do NOT use analogies, metaphors, storytelling, or speculative language.",
|
||||
"- Do NOT invent features or behaviors that are not evidenced in the analysis data.",
|
||||
"- Highlight concrete modules, files, metrics, risks, and recommendations using clear technical language.",
|
||||
"",
|
||||
"## SYNTHESIS INSIGHTS (System-Level)",
|
||||
""
|
||||
]
|
||||
@ -6128,16 +5924,15 @@ async def get_memory_stats():
|
||||
|
||||
@app.post("/memory/query")
|
||||
async def query_memory(query: str, repo_context: str = ""):
|
||||
"""Query the memory system."""
|
||||
"""
|
||||
Placeholder for memory query implementation.
|
||||
"""
|
||||
try:
|
||||
if not analyzer:
|
||||
raise HTTPException(status_code=500, detail="Analyzer not initialized")
|
||||
|
||||
result = await analyzer.query_memory(query, repo_context)
|
||||
# Simulated memory query logic
|
||||
return {
|
||||
"success": True,
|
||||
"query": query,
|
||||
"result": result
|
||||
"result": f"Simulated result for query: {query}"
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Memory query failed: {str(e)}")
|
||||
@ -6181,9 +5976,11 @@ async def get_performance_stats():
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
port = int(os.getenv('PORT', 8022))
|
||||
host = os.getenv('HOST', '0.0.0.0')
|
||||
|
||||
print(f"🚀 Starting AI Analysis Service on {host}:{port}")
|
||||
uvicorn.run(app, host=host, port=port)
|
||||
uvicorn.run(
|
||||
"server:app",
|
||||
host=os.getenv("HOST", "0.0.0.0"),
|
||||
port=int(os.getenv("PORT", "8022")),
|
||||
reload=False,
|
||||
)
|
||||
|
||||
@ -1,183 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test data storage in all databases for AI Analysis Service
|
||||
"""
|
||||
|
||||
import os
|
||||
import psycopg2
|
||||
import redis
|
||||
import pymongo
|
||||
import json
|
||||
from datetime import datetime
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
def test_postgres_data_storage():
|
||||
"""Test PostgreSQL data storage"""
|
||||
try:
|
||||
conn = psycopg2.connect(
|
||||
host='localhost',
|
||||
port=5432,
|
||||
database='dev_pipeline',
|
||||
user='pipeline_admin',
|
||||
password='secure_pipeline_2024'
|
||||
)
|
||||
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Check repositories
|
||||
cursor.execute("SELECT COUNT(*) FROM all_repositories;")
|
||||
repo_count = cursor.fetchone()[0]
|
||||
|
||||
# Check analysis sessions
|
||||
cursor.execute("SELECT COUNT(*) FROM analysis_sessions;")
|
||||
session_count = cursor.fetchone()[0]
|
||||
|
||||
# Check file analysis history
|
||||
cursor.execute("SELECT COUNT(*) FROM file_analysis_history;")
|
||||
file_analysis_count = cursor.fetchone()[0]
|
||||
|
||||
# Check code embeddings
|
||||
cursor.execute("SELECT COUNT(*) FROM code_embeddings;")
|
||||
embedding_count = cursor.fetchone()[0]
|
||||
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
print(f"📊 PostgreSQL Data Storage:")
|
||||
print(f" 📁 Repositories: {repo_count}")
|
||||
print(f" 🔍 Analysis Sessions: {session_count}")
|
||||
print(f" 📄 File Analyses: {file_analysis_count}")
|
||||
print(f" 🧠 Code Embeddings: {embedding_count}")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ PostgreSQL data check failed: {e}")
|
||||
return False
|
||||
|
||||
def test_redis_data_storage():
|
||||
"""Test Redis data storage"""
|
||||
try:
|
||||
r = redis.Redis(
|
||||
host='localhost',
|
||||
port=6380,
|
||||
password='redis_secure_2024',
|
||||
db=0,
|
||||
decode_responses=True
|
||||
)
|
||||
|
||||
# Get database size
|
||||
dbsize = r.dbsize()
|
||||
|
||||
# Get all keys
|
||||
keys = r.keys('*')
|
||||
|
||||
print(f"📊 Redis Data Storage:")
|
||||
print(f" 🔑 Total Keys: {dbsize}")
|
||||
if keys:
|
||||
print(f" 📋 Sample Keys: {keys[:5]}")
|
||||
else:
|
||||
print(f" 📋 No keys found")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Redis data check failed: {e}")
|
||||
return False
|
||||
|
||||
def test_mongodb_data_storage():
|
||||
"""Test MongoDB data storage"""
|
||||
try:
|
||||
client = pymongo.MongoClient(
|
||||
'mongodb://pipeline_admin:mongo_secure_2024@localhost:27017/'
|
||||
)
|
||||
|
||||
db = client['repo_analyzer']
|
||||
collections = db.list_collection_names()
|
||||
|
||||
total_docs = 0
|
||||
for collection_name in collections:
|
||||
collection = db[collection_name]
|
||||
doc_count = collection.count_documents({})
|
||||
total_docs += doc_count
|
||||
print(f" 📄 {collection_name}: {doc_count} documents")
|
||||
|
||||
print(f"📊 MongoDB Data Storage:")
|
||||
print(f" 📁 Collections: {len(collections)}")
|
||||
print(f" 📄 Total Documents: {total_docs}")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ MongoDB data check failed: {e}")
|
||||
return False
|
||||
|
||||
def test_analysis_reports():
|
||||
"""Test analysis reports storage"""
|
||||
try:
|
||||
reports_dir = "/home/tech4biz/Desktop/prakash/codenuk/backend_new/codenuk_backend_mine/services/ai-analysis-service/reports"
|
||||
|
||||
if not os.path.exists(reports_dir):
|
||||
print(f"❌ Reports directory not found: {reports_dir}")
|
||||
return False
|
||||
|
||||
report_files = [f for f in os.listdir(reports_dir) if f.endswith('.json')]
|
||||
|
||||
print(f"📊 Analysis Reports:")
|
||||
print(f" 📁 Reports Directory: {reports_dir}")
|
||||
print(f" 📄 Report Files: {len(report_files)}")
|
||||
|
||||
if report_files:
|
||||
# Check the latest report
|
||||
latest_report = max(report_files, key=lambda x: os.path.getctime(os.path.join(reports_dir, x)))
|
||||
report_path = os.path.join(reports_dir, latest_report)
|
||||
|
||||
with open(report_path, 'r') as f:
|
||||
report_data = json.load(f)
|
||||
|
||||
print(f" 📋 Latest Report: {latest_report}")
|
||||
print(f" 📊 Repository ID: {report_data.get('repository_id', 'N/A')}")
|
||||
print(f" 📁 Total Files: {report_data.get('total_files', 'N/A')}")
|
||||
print(f" 📄 Total Lines: {report_data.get('total_lines', 'N/A')}")
|
||||
print(f" 🎯 Quality Score: {report_data.get('code_quality_score', 'N/A')}")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Analysis reports check failed: {e}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
"""Test all data storage systems"""
|
||||
print("🔍 Testing Data Storage Systems...")
|
||||
print("=" * 60)
|
||||
|
||||
postgres_ok = test_postgres_data_storage()
|
||||
print()
|
||||
|
||||
redis_ok = test_redis_data_storage()
|
||||
print()
|
||||
|
||||
mongodb_ok = test_mongodb_data_storage()
|
||||
print()
|
||||
|
||||
reports_ok = test_analysis_reports()
|
||||
print()
|
||||
|
||||
print("=" * 60)
|
||||
print(f"📊 Storage Summary:")
|
||||
print(f" PostgreSQL: {'✅' if postgres_ok else '❌'}")
|
||||
print(f" Redis: {'✅' if redis_ok else '❌'}")
|
||||
print(f" MongoDB: {'✅' if mongodb_ok else '❌'}")
|
||||
print(f" Reports: {'✅' if reports_ok else '❌'}")
|
||||
|
||||
if all([postgres_ok, redis_ok, mongodb_ok, reports_ok]):
|
||||
print("🎉 All data storage systems working!")
|
||||
else:
|
||||
print("⚠️ Some data storage systems have issues")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -1,106 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test database connections for AI Analysis Service
|
||||
"""
|
||||
|
||||
import os
|
||||
import psycopg2
|
||||
import redis
|
||||
import pymongo
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
def test_postgres_connection():
|
||||
"""Test PostgreSQL connection"""
|
||||
try:
|
||||
conn = psycopg2.connect(
|
||||
host=os.getenv('POSTGRES_HOST', 'localhost'),
|
||||
port=os.getenv('POSTGRES_PORT', 5432),
|
||||
database=os.getenv('POSTGRES_DB', 'dev_pipeline'),
|
||||
user=os.getenv('POSTGRES_USER', 'pipeline_admin'),
|
||||
password=os.getenv('POSTGRES_PASSWORD', 'secure_pipeline_2024')
|
||||
)
|
||||
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT COUNT(*) FROM all_repositories;")
|
||||
count = cursor.fetchone()[0]
|
||||
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
print(f"✅ PostgreSQL: Connected successfully, {count} repositories found")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ PostgreSQL: Connection failed - {e}")
|
||||
return False
|
||||
|
||||
def test_redis_connection():
|
||||
"""Test Redis connection"""
|
||||
try:
|
||||
r = redis.Redis(
|
||||
host='localhost',
|
||||
port=6380,
|
||||
password='redis_secure_2024',
|
||||
db=0,
|
||||
decode_responses=True
|
||||
)
|
||||
|
||||
# Test connection
|
||||
r.ping()
|
||||
|
||||
# Get database size
|
||||
dbsize = r.dbsize()
|
||||
|
||||
print(f"✅ Redis: Connected successfully, {dbsize} keys found")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Redis: Connection failed - {e}")
|
||||
return False
|
||||
|
||||
def test_mongodb_connection():
|
||||
"""Test MongoDB connection"""
|
||||
try:
|
||||
client = pymongo.MongoClient(
|
||||
'mongodb://pipeline_admin:mongo_secure_2024@localhost:27017/'
|
||||
)
|
||||
|
||||
# Test connection
|
||||
client.admin.command('ping')
|
||||
|
||||
# Get database info
|
||||
db = client[os.getenv('MONGODB_DB', 'repo_analyzer')]
|
||||
collections = db.list_collection_names()
|
||||
|
||||
print(f"✅ MongoDB: Connected successfully, {len(collections)} collections found")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ MongoDB: Connection failed - {e}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
"""Test all database connections"""
|
||||
print("🔍 Testing Database Connections...")
|
||||
print("=" * 50)
|
||||
|
||||
postgres_ok = test_postgres_connection()
|
||||
redis_ok = test_redis_connection()
|
||||
mongodb_ok = test_mongodb_connection()
|
||||
|
||||
print("=" * 50)
|
||||
print(f"📊 Connection Summary:")
|
||||
print(f" PostgreSQL: {'✅' if postgres_ok else '❌'}")
|
||||
print(f" Redis: {'✅' if redis_ok else '❌'}")
|
||||
print(f" MongoDB: {'✅' if mongodb_ok else '❌'}")
|
||||
|
||||
if all([postgres_ok, redis_ok, mongodb_ok]):
|
||||
print("🎉 All database connections successful!")
|
||||
else:
|
||||
print("⚠️ Some database connections failed")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -1,271 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test frontend compatibility for multi-level report generation
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
def test_api_response_format():
|
||||
"""Test that API response format matches frontend expectations."""
|
||||
print("\n" + "=" * 60)
|
||||
print("Testing API Response Format Compatibility")
|
||||
print("=" * 60)
|
||||
|
||||
# Expected response format from frontend
|
||||
expected_fields = {
|
||||
'success': bool,
|
||||
'message': str,
|
||||
'analysis_id': str,
|
||||
'report_path': (str, type(None)),
|
||||
'stats': (dict, type(None))
|
||||
}
|
||||
|
||||
# Check if our response matches
|
||||
print("\n✅ Expected API Response Format:")
|
||||
print(" {")
|
||||
for field, field_type in expected_fields.items():
|
||||
if isinstance(field_type, tuple):
|
||||
print(f" '{field}': {field_type[0].__name__} or {field_type[1].__name__}")
|
||||
else:
|
||||
print(f" '{field}': {field_type.__name__}")
|
||||
print(" }")
|
||||
|
||||
# Check server.py response format
|
||||
print("\n✅ Backend Response Format (server.py line 700-706):")
|
||||
print(" AnalysisResponse(")
|
||||
print(" success=True,")
|
||||
print(" message='Analysis started successfully',")
|
||||
print(" analysis_id=analysis_id,")
|
||||
print(" report_path=None, # Will be available when analysis completes")
|
||||
print(" stats=None # Will be available when analysis completes")
|
||||
print(" )")
|
||||
|
||||
print("\n✅ Backend Completion Event (server.py line 1193-1199):")
|
||||
print(" analysis_completed event:")
|
||||
print(" {")
|
||||
print(" 'message': 'Analysis completed successfully',")
|
||||
print(" 'analysis_id': analysis_id,")
|
||||
print(" 'report_path': report_path,")
|
||||
print(" 'percent': 100,")
|
||||
print(" 'stats': stats")
|
||||
print(" }")
|
||||
|
||||
print("\n✅ Format matches frontend expectations!")
|
||||
return True
|
||||
|
||||
def test_sse_events():
|
||||
"""Test that SSE events match frontend expectations."""
|
||||
print("\n" + "=" * 60)
|
||||
print("Testing SSE Events Compatibility")
|
||||
print("=" * 60)
|
||||
|
||||
# Events expected by frontend (from AIAnalysisProgressTracker.tsx)
|
||||
frontend_events = [
|
||||
'analysis_started',
|
||||
'files_discovered',
|
||||
'file_analysis_started',
|
||||
'file_analysis_completed',
|
||||
'file_analysis_error',
|
||||
'smart_batch_started',
|
||||
'smart_batch_completed',
|
||||
'batch_completed',
|
||||
'repository_analysis_started',
|
||||
'report_generation_started',
|
||||
'analysis_completed',
|
||||
'analysis_error'
|
||||
]
|
||||
|
||||
print("\n✅ Frontend expects these SSE events:")
|
||||
for event in frontend_events:
|
||||
print(f" - {event}")
|
||||
|
||||
# Check if we emit all required events
|
||||
print("\n✅ Backend emits these events:")
|
||||
backend_events = [
|
||||
'analysis_started', # line 641
|
||||
'report_generation_started', # line 1111
|
||||
'analysis_completed', # line 1193
|
||||
'analysis_error', # line 1150, 1217
|
||||
'report_progress' # NEW - additional event for detailed progress
|
||||
]
|
||||
|
||||
for event in backend_events:
|
||||
print(f" - {event}")
|
||||
|
||||
# Check compatibility
|
||||
missing_events = [e for e in frontend_events if e not in backend_events and e not in ['files_discovered', 'file_analysis_started', 'file_analysis_completed', 'file_analysis_error', 'smart_batch_started', 'smart_batch_completed', 'batch_completed', 'repository_analysis_started']]
|
||||
|
||||
if missing_events:
|
||||
print(f"\n⚠️ Some frontend events not emitted by backend: {missing_events}")
|
||||
print(" (These may be emitted by other parts of the analysis flow)")
|
||||
else:
|
||||
print("\n✅ All critical events are emitted!")
|
||||
|
||||
# Check if new events are compatible
|
||||
print("\n✅ New 'report_progress' event:")
|
||||
print(" - Not in frontend handler, but will be ignored gracefully")
|
||||
print(" - Adds detailed progress updates during PDF generation")
|
||||
print(" - Compatible: Frontend ignores unknown events")
|
||||
|
||||
return True
|
||||
|
||||
def test_report_download():
|
||||
"""Test that report download endpoint exists."""
|
||||
print("\n" + "=" * 60)
|
||||
print("Testing Report Download Endpoint")
|
||||
print("=" * 60)
|
||||
|
||||
print("\n✅ Frontend expects:")
|
||||
print(" GET /api/ai-analysis/reports/{filename}")
|
||||
|
||||
print("\n✅ Backend provides:")
|
||||
print(" @app.get('/reports/{filename}') # server.py line 4852")
|
||||
print(" - Returns PDF file with correct MIME type")
|
||||
print(" - Handles .pdf and .json files")
|
||||
print(" - Returns 404 if report not found")
|
||||
|
||||
print("\n✅ Endpoint exists and is compatible!")
|
||||
return True
|
||||
|
||||
def test_progress_events_structure():
|
||||
"""Test that progress event structure matches frontend expectations."""
|
||||
print("\n" + "=" * 60)
|
||||
print("Testing Progress Event Structure")
|
||||
print("=" * 60)
|
||||
|
||||
# Expected event structure from frontend
|
||||
print("\n✅ Frontend expects ProgressEvent structure:")
|
||||
print(" {")
|
||||
print(" analysis_id: string,")
|
||||
print(" event: string,")
|
||||
print(" data: {")
|
||||
print(" message: string,")
|
||||
print(" file_path?: string,")
|
||||
print(" current?: number,")
|
||||
print(" total?: number,")
|
||||
print(" percent?: number,")
|
||||
print(" report_path?: string,")
|
||||
print(" stats?: any,")
|
||||
print(" error?: string")
|
||||
print(" },")
|
||||
print(" timestamp: string")
|
||||
print(" }")
|
||||
|
||||
print("\n✅ Backend emits events with structure:")
|
||||
print(" {")
|
||||
print(" 'event': 'event_name',")
|
||||
print(" 'data': {")
|
||||
print(" 'message': '...',")
|
||||
print(" 'percent': 85,")
|
||||
print(" 'report_path': '...',")
|
||||
print(" 'stats': {...}")
|
||||
print(" }")
|
||||
print(" }")
|
||||
|
||||
print("\n✅ Structure matches frontend expectations!")
|
||||
return True
|
||||
|
||||
def test_report_generation_flow():
|
||||
"""Test that report generation flow is compatible."""
|
||||
print("\n" + "=" * 60)
|
||||
print("Testing Report Generation Flow")
|
||||
print("=" * 60)
|
||||
|
||||
print("\n✅ Expected Flow:")
|
||||
print(" 1. Frontend calls POST /api/ai-analysis/analyze-repository")
|
||||
print(" 2. Backend returns { success: true, analysis_id: '...' }")
|
||||
print(" 3. Frontend connects to SSE: /api/ai-analysis/progress/{analysis_id}")
|
||||
print(" 4. Backend emits events:")
|
||||
print(" - analysis_started")
|
||||
print(" - ... (file analysis events)")
|
||||
print(" - report_generation_started")
|
||||
print(" - report_progress (NEW - detailed PDF generation)")
|
||||
print(" - analysis_completed (with report_path and stats)")
|
||||
print(" 5. Frontend downloads PDF from /api/ai-analysis/reports/{filename}")
|
||||
|
||||
print("\n✅ Our Implementation:")
|
||||
print(" ✅ Step 1-2: Compatible (same response format)")
|
||||
print(" ✅ Step 3: Compatible (SSE endpoint exists)")
|
||||
print(" ✅ Step 4: Compatible (all events emitted)")
|
||||
print(" ✅ Step 5: Compatible (download endpoint exists)")
|
||||
|
||||
print("\n✅ All steps are compatible!")
|
||||
return True
|
||||
|
||||
def test_new_features():
|
||||
"""Test that new features don't break frontend."""
|
||||
print("\n" + "=" * 60)
|
||||
print("Testing New Features Compatibility")
|
||||
print("=" * 60)
|
||||
|
||||
print("\n✅ New Features:")
|
||||
print(" 1. Multi-level PDF report (100+ pages)")
|
||||
print(" - Still generates PDF, same format")
|
||||
print(" - Same download endpoint")
|
||||
print(" - Compatible ✅")
|
||||
|
||||
print("\n 2. Context retrieval from MongoDB/PostgreSQL")
|
||||
print(" - Internal implementation detail")
|
||||
print(" - Frontend doesn't need to know")
|
||||
print(" - Compatible ✅")
|
||||
|
||||
print("\n 3. Architecture sections (Frontend, Backend, Database, API)")
|
||||
print(" - Part of PDF content")
|
||||
print(" - Frontend doesn't parse PDF")
|
||||
print(" - Compatible ✅")
|
||||
|
||||
print("\n 4. Report progress events")
|
||||
print(" - Additional events for detailed progress")
|
||||
print(" - Frontend ignores unknown events gracefully")
|
||||
print(" - Compatible ✅")
|
||||
|
||||
print("\n✅ All new features are backward compatible!")
|
||||
return True
|
||||
|
||||
def run_all_tests():
|
||||
"""Run all compatibility tests."""
|
||||
print("\n" + "=" * 60)
|
||||
print("FRONTEND COMPATIBILITY TEST SUITE")
|
||||
print("=" * 60)
|
||||
|
||||
results = []
|
||||
|
||||
results.append(("API Response Format", test_api_response_format()))
|
||||
results.append(("SSE Events", test_sse_events()))
|
||||
results.append(("Report Download", test_report_download()))
|
||||
results.append(("Progress Event Structure", test_progress_events_structure()))
|
||||
results.append(("Report Generation Flow", test_report_generation_flow()))
|
||||
results.append(("New Features Compatibility", test_new_features()))
|
||||
|
||||
# Summary
|
||||
print("\n" + "=" * 60)
|
||||
print("TEST SUMMARY")
|
||||
print("=" * 60)
|
||||
|
||||
passed = 0
|
||||
failed = 0
|
||||
|
||||
for test_name, result in results:
|
||||
status = "✅ PASSED" if result else "❌ FAILED"
|
||||
print(f"{test_name}: {status}")
|
||||
if result:
|
||||
passed += 1
|
||||
else:
|
||||
failed += 1
|
||||
|
||||
print(f"\nTotal: {passed} passed, {failed} failed out of {len(results)} tests")
|
||||
|
||||
if failed == 0:
|
||||
print("\n✅ All compatibility tests passed!")
|
||||
print("✅ Frontend integration is fully compatible!")
|
||||
return True
|
||||
else:
|
||||
print(f"\n⚠️ {failed} test(s) failed. Please review.")
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = run_all_tests()
|
||||
sys.exit(0 if success else 1)
|
||||
|
||||
@ -1,318 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for intelligent chunking implementation.
|
||||
Tests the logic without requiring actual API calls or database connections.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add current directory to path
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
# Import the functions we need to test
|
||||
from server import (
|
||||
categorize_by_module,
|
||||
get_overview_files,
|
||||
estimate_tokens,
|
||||
split_by_token_limit,
|
||||
find_dependencies,
|
||||
create_intelligent_chunks
|
||||
)
|
||||
|
||||
def test_categorize_by_module():
|
||||
"""Test module categorization."""
|
||||
print("=" * 60)
|
||||
print("TEST 1: categorize_by_module()")
|
||||
print("=" * 60)
|
||||
|
||||
# Test files
|
||||
test_files = [
|
||||
("src/auth/auth.controller.js", "export class AuthController {}"),
|
||||
("src/auth/auth.service.js", "export class AuthService {}"),
|
||||
("src/auth/auth.middleware.js", "export function authMiddleware() {}"),
|
||||
("src/products/product.model.js", "export class Product {}"),
|
||||
("src/products/product.service.js", "export class ProductService {}"),
|
||||
("src/orders/order.controller.js", "export class OrderController {}"),
|
||||
("README.md", "# Project Documentation"),
|
||||
("package.json", '{"name": "test-project"}'),
|
||||
("index.js", "const app = require('./app');"),
|
||||
("src/utils/helper.js", "export function helper() {}"),
|
||||
("src/config/settings.js", "export const config = {};"),
|
||||
]
|
||||
|
||||
result = categorize_by_module(test_files)
|
||||
|
||||
print(f"\n✅ Categorized {len(test_files)} files into {len(result)} modules:")
|
||||
for module_name, files in result.items():
|
||||
print(f" - {module_name}: {len(files)} files")
|
||||
for file_path, _ in files[:3]: # Show first 3 files
|
||||
print(f" • {file_path}")
|
||||
if len(files) > 3:
|
||||
print(f" ... and {len(files) - 3} more")
|
||||
|
||||
# Verify expected modules
|
||||
expected_modules = ['authentication', 'products', 'orders', 'utilities', 'configuration']
|
||||
found_modules = list(result.keys())
|
||||
|
||||
print(f"\n📊 Module Detection:")
|
||||
for expected in expected_modules:
|
||||
status = "✅" if expected in found_modules else "❌"
|
||||
print(f" {status} {expected}: {'Found' if expected in found_modules else 'Not found'}")
|
||||
|
||||
return result
|
||||
|
||||
def test_get_overview_files():
|
||||
"""Test overview file detection."""
|
||||
print("\n" + "=" * 60)
|
||||
print("TEST 2: get_overview_files()")
|
||||
print("=" * 60)
|
||||
|
||||
test_files = [
|
||||
("README.md", "# Project"),
|
||||
("package.json", '{"name": "test"}'),
|
||||
("index.js", "console.log('hello');"),
|
||||
("src/auth/controller.js", "export class Auth {}"),
|
||||
("Dockerfile", "FROM node:18"),
|
||||
("tsconfig.json", '{"compilerOptions": {}}'),
|
||||
]
|
||||
|
||||
result = get_overview_files(test_files)
|
||||
|
||||
print(f"\n✅ Identified {len(result)} overview files:")
|
||||
for file_path, _ in result:
|
||||
print(f" • {file_path}")
|
||||
|
||||
expected_overview = ['README.md', 'package.json', 'index.js', 'Dockerfile', 'tsconfig.json']
|
||||
found_overview = [f[0].split('/')[-1] for f in result]
|
||||
|
||||
print(f"\n📊 Overview Detection:")
|
||||
for expected in expected_overview:
|
||||
status = "✅" if expected in found_overview else "❌"
|
||||
print(f" {status} {expected}: {'Found' if expected in found_overview else 'Not found'}")
|
||||
|
||||
return result
|
||||
|
||||
def test_estimate_tokens():
|
||||
"""Test token estimation."""
|
||||
print("\n" + "=" * 60)
|
||||
print("TEST 3: estimate_tokens()")
|
||||
print("=" * 60)
|
||||
|
||||
test_files = [
|
||||
("file1.js", "a" * 4000), # 4000 chars = ~1000 tokens
|
||||
("file2.js", "b" * 8000), # 8000 chars = ~2000 tokens
|
||||
("file3.js", "c" * 2000), # 2000 chars = ~500 tokens
|
||||
]
|
||||
|
||||
result = estimate_tokens(test_files)
|
||||
expected = (4000 + 8000 + 2000) // 4 # 3500 tokens
|
||||
|
||||
print(f"\n✅ Estimated tokens: {result}")
|
||||
print(f" Expected: ~{expected} tokens")
|
||||
print(f" Status: {'✅ PASS' if abs(result - expected) < 100 else '❌ FAIL'}")
|
||||
|
||||
return result
|
||||
|
||||
def test_split_by_token_limit():
|
||||
"""Test token-based splitting."""
|
||||
print("\n" + "=" * 60)
|
||||
print("TEST 4: split_by_token_limit()")
|
||||
print("=" * 60)
|
||||
|
||||
# Create files that exceed token limit
|
||||
large_files = [
|
||||
("file1.js", "a" * 8000), # ~2000 tokens
|
||||
("file2.js", "b" * 8000), # ~2000 tokens
|
||||
("file3.js", "c" * 8000), # ~2000 tokens
|
||||
("file4.js", "d" * 8000), # ~2000 tokens
|
||||
("file5.js", "e" * 8000), # ~2000 tokens
|
||||
]
|
||||
|
||||
# Total: ~10000 tokens, should split at 15000 limit
|
||||
result = split_by_token_limit(large_files, max_tokens=15000)
|
||||
|
||||
print(f"\n✅ Split {len(large_files)} files into {len(result)} sub-chunks:")
|
||||
for i, sub_chunk in enumerate(result, 1):
|
||||
tokens = estimate_tokens(sub_chunk)
|
||||
print(f" Chunk {i}: {len(sub_chunk)} files, ~{tokens} tokens")
|
||||
for file_path, _ in sub_chunk:
|
||||
print(f" • {file_path}")
|
||||
|
||||
return result
|
||||
|
||||
def test_create_intelligent_chunks():
|
||||
"""Test complete intelligent chunking."""
|
||||
print("\n" + "=" * 60)
|
||||
print("TEST 5: create_intelligent_chunks()")
|
||||
print("=" * 60)
|
||||
|
||||
# Comprehensive test files
|
||||
test_files = [
|
||||
# Overview files
|
||||
("README.md", "# Project Documentation\n\nThis is a test project."),
|
||||
("package.json", '{"name": "test-project", "version": "1.0.0"}'),
|
||||
("index.js", "const app = require('./app');\napp.listen(3000);"),
|
||||
|
||||
# Authentication module
|
||||
("src/auth/auth.controller.js", "export class AuthController {\n async login() {}\n}"),
|
||||
("src/auth/auth.service.js", "export class AuthService {\n async validateUser() {}\n}"),
|
||||
("src/auth/auth.middleware.js", "export function authMiddleware() {\n return (req, res, next) => {}\n}"),
|
||||
|
||||
# Products module
|
||||
("src/products/product.model.js", "export class Product {\n constructor() {}\n}"),
|
||||
("src/products/product.service.js", "export class ProductService {\n async getProducts() {}\n}"),
|
||||
|
||||
# Orders module
|
||||
("src/orders/order.controller.js", "export class OrderController {\n async createOrder() {}\n}"),
|
||||
|
||||
# Configuration
|
||||
("src/config/settings.js", "export const config = {\n port: 3000\n};"),
|
||||
|
||||
# Utils
|
||||
("src/utils/helper.js", "export function helper() {\n return true;\n}"),
|
||||
]
|
||||
|
||||
chunks = create_intelligent_chunks(test_files)
|
||||
|
||||
print(f"\n✅ Created {len(chunks)} intelligent chunks from {len(test_files)} files:")
|
||||
print()
|
||||
|
||||
for chunk in chunks:
|
||||
chunk_id = chunk.get('id', 'unknown')
|
||||
chunk_name = chunk.get('name', 'unknown')
|
||||
chunk_type = chunk.get('chunk_type', 'unknown')
|
||||
chunk_priority = chunk.get('priority', 0)
|
||||
files = chunk.get('files', [])
|
||||
deps = chunk.get('context_dependencies', [])
|
||||
|
||||
print(f"📦 {chunk_id}: {chunk_name} ({chunk_type}) [Priority: {chunk_priority}]")
|
||||
print(f" Files: {len(files)}")
|
||||
print(f" Dependencies: {len(deps)}")
|
||||
for file_path, _ in files:
|
||||
print(f" • {file_path}")
|
||||
print()
|
||||
|
||||
# Verify structure
|
||||
print("📊 Structure Verification:")
|
||||
print(f" ✅ Total chunks: {len(chunks)}")
|
||||
|
||||
# Check for overview chunk
|
||||
overview_chunks = [c for c in chunks if c.get('chunk_type') == 'overview']
|
||||
print(f" ✅ Overview chunks: {len(overview_chunks)} (expected: 1)")
|
||||
|
||||
# Check for module chunks
|
||||
module_chunks = [c for c in chunks if c.get('chunk_type') == 'module']
|
||||
print(f" ✅ Module chunks: {len(module_chunks)}")
|
||||
|
||||
# Verify chunk IDs are sequential
|
||||
chunk_ids = [c.get('id') for c in chunks]
|
||||
print(f" ✅ Chunk IDs: {chunk_ids}")
|
||||
|
||||
# Verify no duplicate files
|
||||
all_files = []
|
||||
for chunk in chunks:
|
||||
for file_path, _ in chunk.get('files', []):
|
||||
all_files.append(file_path)
|
||||
|
||||
duplicates = [f for f in all_files if all_files.count(f) > 1]
|
||||
if duplicates:
|
||||
print(f" ❌ Duplicate files found: {duplicates}")
|
||||
else:
|
||||
print(f" ✅ No duplicate files (all {len(all_files)} files unique)")
|
||||
|
||||
return chunks
|
||||
|
||||
def test_chunk_structure():
|
||||
"""Test that chunks have correct structure."""
|
||||
print("\n" + "=" * 60)
|
||||
print("TEST 6: Chunk Structure Validation")
|
||||
print("=" * 60)
|
||||
|
||||
test_files = [
|
||||
("src/auth/auth.controller.js", "export class AuthController {}"),
|
||||
("src/auth/auth.service.js", "export class AuthService {}"),
|
||||
("README.md", "# Project"),
|
||||
("package.json", '{"name": "test"}'),
|
||||
]
|
||||
|
||||
chunks = create_intelligent_chunks(test_files)
|
||||
|
||||
required_fields = ['id', 'name', 'priority', 'files', 'context_dependencies', 'chunk_type']
|
||||
|
||||
print("\n✅ Validating chunk structure:")
|
||||
for i, chunk in enumerate(chunks, 1):
|
||||
print(f"\n Chunk {i}:")
|
||||
for field in required_fields:
|
||||
status = "✅" if field in chunk else "❌"
|
||||
value = chunk.get(field, 'MISSING')
|
||||
print(f" {status} {field}: {type(value).__name__} = {value}")
|
||||
|
||||
# Verify files is a list of tuples
|
||||
files = chunk.get('files', [])
|
||||
if files:
|
||||
first_file = files[0]
|
||||
if isinstance(first_file, tuple) and len(first_file) == 2:
|
||||
print(f" ✅ files: List of (file_path, content) tuples")
|
||||
else:
|
||||
print(f" ❌ files: Invalid format - {type(first_file)}")
|
||||
|
||||
return chunks
|
||||
|
||||
def run_all_tests():
|
||||
"""Run all tests."""
|
||||
print("\n" + "=" * 60)
|
||||
print("INTELLIGENT CHUNKING - COMPREHENSIVE TEST SUITE")
|
||||
print("=" * 60)
|
||||
|
||||
try:
|
||||
# Test 1: Module categorization
|
||||
categorized = test_categorize_by_module()
|
||||
assert len(categorized) > 0, "Module categorization failed"
|
||||
|
||||
# Test 2: Overview files
|
||||
overview = test_get_overview_files()
|
||||
assert len(overview) > 0, "Overview file detection failed"
|
||||
|
||||
# Test 3: Token estimation
|
||||
tokens = test_estimate_tokens()
|
||||
assert tokens > 0, "Token estimation failed"
|
||||
|
||||
# Test 4: Token-based splitting
|
||||
split_chunks = test_split_by_token_limit()
|
||||
assert len(split_chunks) > 0, "Token splitting failed"
|
||||
|
||||
# Test 5: Complete chunking
|
||||
chunks = test_create_intelligent_chunks()
|
||||
assert len(chunks) > 0, "Intelligent chunking failed"
|
||||
|
||||
# Test 6: Structure validation
|
||||
validated_chunks = test_chunk_structure()
|
||||
assert len(validated_chunks) > 0, "Structure validation failed"
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("✅ ALL TESTS PASSED!")
|
||||
print("=" * 60)
|
||||
print("\n📊 Summary:")
|
||||
print(f" • Module categorization: ✅")
|
||||
print(f" • Overview file detection: ✅")
|
||||
print(f" • Token estimation: ✅")
|
||||
print(f" • Token-based splitting: ✅")
|
||||
print(f" • Intelligent chunking: ✅")
|
||||
print(f" • Structure validation: ✅")
|
||||
print("\n🎉 Intelligent chunking implementation is working correctly!")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print("\n" + "=" * 60)
|
||||
print(f"❌ TEST FAILED: {e}")
|
||||
print("=" * 60)
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = run_all_tests()
|
||||
sys.exit(0 if success else 1)
|
||||
|
||||
103
services/ai-analysis-service/test_knowledge_graph_operations.py
Normal file
103
services/ai-analysis-service/test_knowledge_graph_operations.py
Normal file
@ -0,0 +1,103 @@
|
||||
"""
|
||||
Unit tests for knowledge graph helpers.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from knowledge_graph import operations as kg_ops
|
||||
|
||||
|
||||
class _DummyFileAnalysis:
|
||||
def __init__(
|
||||
self,
|
||||
path: str,
|
||||
language: str,
|
||||
lines_of_code: int,
|
||||
severity_score: float,
|
||||
complexity_score: float,
|
||||
issues_found,
|
||||
recommendations,
|
||||
detailed_analysis: str,
|
||||
) -> None:
|
||||
self.path = path
|
||||
self.language = language
|
||||
self.lines_of_code = lines_of_code
|
||||
self.severity_score = severity_score
|
||||
self.complexity_score = complexity_score
|
||||
self.issues_found = issues_found
|
||||
self.recommendations = recommendations
|
||||
self.detailed_analysis = detailed_analysis
|
||||
|
||||
|
||||
def test_build_module_payload_basic():
|
||||
run_id = "run-123"
|
||||
repository_id = "repo-001"
|
||||
module_name = "Payments"
|
||||
chunk = {
|
||||
"id": "chunk-1",
|
||||
"name": module_name,
|
||||
"context_dependencies": ["Auth", "Notifications"],
|
||||
}
|
||||
chunk_analysis = {
|
||||
"module_quality_score": 7.4,
|
||||
"module_overview": "Handles payment orchestration.",
|
||||
"module_architecture": "Microservice communicating via REST APIs.",
|
||||
"module_security_assessment": "Uses token-based authentication.",
|
||||
"module_recommendations": ["Increase test coverage", {"text": "Introduce circuit breakers"}],
|
||||
}
|
||||
|
||||
file_analyses = [
|
||||
_DummyFileAnalysis(
|
||||
path="services/payments/processor.py",
|
||||
language="Python",
|
||||
lines_of_code=215,
|
||||
severity_score=4.3,
|
||||
complexity_score=6.1,
|
||||
issues_found=[
|
||||
{
|
||||
"title": "Missing retry logic",
|
||||
"severity": "high",
|
||||
"category": "reliability",
|
||||
"line_number": 58,
|
||||
"recommendation": "Add exponential backoff retry",
|
||||
}
|
||||
],
|
||||
recommendations=["Refactor long function"],
|
||||
detailed_analysis="Processor heavily relies on synchronous calls.",
|
||||
)
|
||||
]
|
||||
|
||||
metadata = {
|
||||
"type": "module_analysis",
|
||||
"chunk_metrics": {"total_issues": 1},
|
||||
"dependencies": {"depends_on_chunks": ["Auth", "Notifications"]},
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
}
|
||||
ai_response = "Detailed module analysis"
|
||||
|
||||
payload = kg_ops.build_module_payload(
|
||||
run_id=run_id,
|
||||
repository_id=repository_id,
|
||||
module_name=module_name,
|
||||
chunk=chunk,
|
||||
chunk_analysis=chunk_analysis,
|
||||
file_analyses=file_analyses,
|
||||
metadata=metadata,
|
||||
ai_response=ai_response,
|
||||
)
|
||||
|
||||
module_props = payload["module_props"]
|
||||
files = payload["files"]
|
||||
findings = payload["findings"]
|
||||
dependencies = payload["dependencies"]
|
||||
|
||||
assert module_props["name"] == module_name
|
||||
assert module_props["total_files"] == len(file_analyses)
|
||||
assert "analysis_payload" in module_props
|
||||
assert files[0]["path"] == "services/payments/processor.py"
|
||||
assert files[0]["props"]["language"] == "Python"
|
||||
assert len(findings) == 1
|
||||
assert findings[0]["props"]["severity"] == "high"
|
||||
assert dependencies[0]["target"] == "Auth"
|
||||
assert dependencies[1]["target"] == "Notifications"
|
||||
|
||||
@ -1,244 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for multi-level report generation and context retrieval
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Add current directory to path
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
load_dotenv()
|
||||
|
||||
async def test_context_retrieval():
|
||||
"""Test context retrieval functions."""
|
||||
print("\n" + "=" * 60)
|
||||
print("Testing Context Retrieval Functions")
|
||||
print("=" * 60)
|
||||
|
||||
try:
|
||||
from server import (
|
||||
retrieve_all_module_analyses,
|
||||
retrieve_synthesis_analysis,
|
||||
retrieve_cumulative_analysis_state,
|
||||
retrieve_all_findings,
|
||||
retrieve_all_metrics,
|
||||
retrieve_comprehensive_report_context
|
||||
)
|
||||
|
||||
print("✅ All context retrieval functions imported")
|
||||
|
||||
# Test with a dummy run_id
|
||||
test_run_id = "test_run_123"
|
||||
test_repository_id = "test_repo_123"
|
||||
test_session_id = "test_session_123"
|
||||
|
||||
print(f"\nTesting with run_id: {test_run_id}")
|
||||
print(f"Repository ID: {test_repository_id}")
|
||||
print(f"Session ID: {test_session_id}")
|
||||
|
||||
# Test each function
|
||||
print("\n1. Testing retrieve_all_module_analyses...")
|
||||
modules = await retrieve_all_module_analyses(test_run_id, test_repository_id)
|
||||
print(f" ✓ Found {len(modules)} modules")
|
||||
|
||||
print("\n2. Testing retrieve_synthesis_analysis...")
|
||||
synthesis = await retrieve_synthesis_analysis(test_run_id, test_repository_id)
|
||||
if synthesis:
|
||||
print(f" ✓ Found synthesis analysis")
|
||||
else:
|
||||
print(f" ⚠️ No synthesis analysis found (expected for test)")
|
||||
|
||||
print("\n3. Testing retrieve_cumulative_analysis_state...")
|
||||
state = await retrieve_cumulative_analysis_state(test_run_id, test_repository_id, test_session_id)
|
||||
if state:
|
||||
print(f" ✓ Found cumulative analysis state")
|
||||
else:
|
||||
print(f" ⚠️ No cumulative analysis state found (expected for test)")
|
||||
|
||||
print("\n4. Testing retrieve_all_findings...")
|
||||
findings = await retrieve_all_findings(test_run_id)
|
||||
print(f" ✓ Found findings for {len(findings)} modules")
|
||||
|
||||
print("\n5. Testing retrieve_all_metrics...")
|
||||
metrics = await retrieve_all_metrics(test_run_id)
|
||||
print(f" ✓ Found metrics for {len(metrics)} modules")
|
||||
|
||||
print("\n6. Testing retrieve_comprehensive_report_context...")
|
||||
context = await retrieve_comprehensive_report_context(
|
||||
run_id=test_run_id,
|
||||
repository_id=test_repository_id,
|
||||
session_id=test_session_id
|
||||
)
|
||||
|
||||
print(f" ✓ Context retrieved:")
|
||||
print(f" - Modules: {context.get('total_modules', 0)}")
|
||||
print(f" - Findings: {context.get('total_findings', 0)}")
|
||||
print(f" - Has synthesis: {bool(context.get('synthesis_analysis'))}")
|
||||
print(f" - Has analysis state: {bool(context.get('analysis_state'))}")
|
||||
|
||||
print("\n✅ All context retrieval tests passed!")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n❌ Context retrieval test failed: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
def test_pdf_method_exists():
|
||||
"""Test that the new PDF method exists."""
|
||||
print("\n" + "=" * 60)
|
||||
print("Testing PDF Report Method")
|
||||
print("=" * 60)
|
||||
|
||||
try:
|
||||
# Import using the same method as server.py
|
||||
import sys
|
||||
import importlib.util
|
||||
|
||||
spec = importlib.util.spec_from_file_location("ai_analyze", "ai-analyze.py")
|
||||
ai_analyze_module = importlib.util.module_from_spec(spec)
|
||||
sys.modules["ai_analyze"] = ai_analyze_module
|
||||
spec.loader.exec_module(ai_analyze_module)
|
||||
|
||||
from ai_analyze import EnhancedGitHubAnalyzer
|
||||
|
||||
print("✅ EnhancedGitHubAnalyzer imported successfully")
|
||||
|
||||
# Check if new method exists
|
||||
if hasattr(EnhancedGitHubAnalyzer, 'create_multi_level_pdf_report'):
|
||||
print("✅ create_multi_level_pdf_report method exists")
|
||||
|
||||
# Check method signature
|
||||
import inspect
|
||||
sig = inspect.signature(EnhancedGitHubAnalyzer.create_multi_level_pdf_report)
|
||||
params = list(sig.parameters.keys())
|
||||
print(f" Method parameters: {', '.join(params)}")
|
||||
|
||||
if 'comprehensive_context' in params:
|
||||
print(" ✓ comprehensive_context parameter exists")
|
||||
if 'output_path' in params:
|
||||
print(" ✓ output_path parameter exists")
|
||||
if 'repository_id' in params:
|
||||
print(" ✓ repository_id parameter exists")
|
||||
if 'run_id' in params:
|
||||
print(" ✓ run_id parameter exists")
|
||||
|
||||
return True
|
||||
else:
|
||||
print("❌ create_multi_level_pdf_report method not found")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ PDF method test failed: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
def test_database_tables():
|
||||
"""Test that database tables exist."""
|
||||
print("\n" + "=" * 60)
|
||||
print("Testing Database Tables")
|
||||
print("=" * 60)
|
||||
|
||||
try:
|
||||
import psycopg2
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
conn = psycopg2.connect(
|
||||
host=os.getenv('POSTGRES_HOST', 'localhost'),
|
||||
port=os.getenv('POSTGRES_PORT', '5432'),
|
||||
database=os.getenv('POSTGRES_DB', 'dev_pipeline'),
|
||||
user=os.getenv('POSTGRES_USER', 'pipeline_admin'),
|
||||
password=os.getenv('POSTGRES_PASSWORD', 'secure_pipeline_2024')
|
||||
)
|
||||
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Check each table
|
||||
tables_to_check = ['findings', 'metrics', 'report_sections', 'analysis_runs']
|
||||
|
||||
for table_name in tables_to_check:
|
||||
cursor.execute(f"""
|
||||
SELECT COUNT(*)
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema = 'public'
|
||||
AND table_name = %s
|
||||
""", (table_name,))
|
||||
|
||||
exists = cursor.fetchone()[0] > 0
|
||||
|
||||
if exists:
|
||||
# Get row count
|
||||
cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
|
||||
count = cursor.fetchone()[0]
|
||||
print(f"✅ Table '{table_name}' exists ({count} rows)")
|
||||
else:
|
||||
print(f"❌ Table '{table_name}' does not exist")
|
||||
return False
|
||||
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
print("\n✅ All database tables verified!")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Database test failed: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
async def run_all_tests():
|
||||
"""Run all tests."""
|
||||
print("\n" + "=" * 60)
|
||||
print("MULTI-LEVEL REPORT IMPLEMENTATION TEST SUITE")
|
||||
print("=" * 60)
|
||||
|
||||
results = []
|
||||
|
||||
# Test 1: Database tables
|
||||
results.append(("Database Tables", test_database_tables()))
|
||||
|
||||
# Test 2: PDF method exists
|
||||
results.append(("PDF Method", test_pdf_method_exists()))
|
||||
|
||||
# Test 3: Context retrieval
|
||||
results.append(("Context Retrieval", await test_context_retrieval()))
|
||||
|
||||
# Summary
|
||||
print("\n" + "=" * 60)
|
||||
print("TEST SUMMARY")
|
||||
print("=" * 60)
|
||||
|
||||
passed = 0
|
||||
failed = 0
|
||||
|
||||
for test_name, result in results:
|
||||
status = "✅ PASSED" if result else "❌ FAILED"
|
||||
print(f"{test_name}: {status}")
|
||||
if result:
|
||||
passed += 1
|
||||
else:
|
||||
failed += 1
|
||||
|
||||
print(f"\nTotal: {passed} passed, {failed} failed out of {len(results)} tests")
|
||||
|
||||
if failed == 0:
|
||||
print("\n✅ All tests passed! Implementation is ready.")
|
||||
return True
|
||||
else:
|
||||
print(f"\n⚠️ {failed} test(s) failed. Please review the errors above.")
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = asyncio.run(run_all_tests())
|
||||
sys.exit(0 if success else 1)
|
||||
|
||||
@ -1,309 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for progressive context implementation.
|
||||
Tests the logic without requiring actual API calls or database connections.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
# Add current directory to path
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
# Import the functions we need to test
|
||||
from server import (
|
||||
build_context_from_state,
|
||||
update_state_with_findings,
|
||||
create_intelligent_chunks,
|
||||
build_intelligent_chunk_prompt
|
||||
)
|
||||
|
||||
# Mock FileAnalysis class
|
||||
class MockFileAnalysis:
|
||||
def __init__(self, path, severity_score, issues_found=None, complexity_score=5.0):
|
||||
self.path = path
|
||||
self.severity_score = severity_score
|
||||
self.issues_found = issues_found or []
|
||||
self.complexity_score = complexity_score
|
||||
self.language = "javascript"
|
||||
self.lines_of_code = 100
|
||||
self.recommendations = []
|
||||
self.detailed_analysis = "Mock analysis"
|
||||
|
||||
def test_build_context_from_state():
|
||||
"""Test building context from analysis state."""
|
||||
print("=" * 60)
|
||||
print("TEST 1: build_context_from_state()")
|
||||
print("=" * 60)
|
||||
|
||||
# Create analysis state with progressive data
|
||||
analysis_state = {
|
||||
'modules_analyzed': ['project_overview', 'authentication'],
|
||||
'project_overview': 'Node.js e-commerce platform with Express backend and React frontend',
|
||||
'module_summaries': {
|
||||
'project_overview': 'Modern e-commerce platform with microservices architecture',
|
||||
'authentication': 'JWT-based authentication with rate limiting missing'
|
||||
},
|
||||
'architecture_patterns': ['MVC', 'Service Layer'],
|
||||
'critical_issues': [
|
||||
{'module': 'authentication', 'issue': 'Missing rate limiting on auth endpoints'}
|
||||
],
|
||||
'tech_stack': {
|
||||
'frontend': 'React',
|
||||
'backend': 'Node.js',
|
||||
'database': 'PostgreSQL'
|
||||
},
|
||||
'dependency_context': {
|
||||
'chunk_001': 'Project overview and setup',
|
||||
'chunk_002': 'Authentication module with JWT'
|
||||
}
|
||||
}
|
||||
|
||||
# Test chunk (products module)
|
||||
current_chunk = {
|
||||
'name': 'products',
|
||||
'id': 'chunk_003',
|
||||
'chunk_type': 'module',
|
||||
'context_dependencies': ['chunk_001', 'chunk_002']
|
||||
}
|
||||
|
||||
context = build_context_from_state(analysis_state, current_chunk)
|
||||
|
||||
print("\n✅ Generated context:")
|
||||
print(context)
|
||||
print()
|
||||
|
||||
# Verify context contains expected sections
|
||||
assert "PROJECT OVERVIEW" in context, "Context should include project overview"
|
||||
assert "PREVIOUSLY ANALYZED MODULES" in context, "Context should include module summaries"
|
||||
assert "ARCHITECTURE PATTERNS" in context, "Context should include architecture patterns"
|
||||
assert "CRITICAL ISSUES" in context, "Context should include critical issues"
|
||||
assert "TECH STACK" in context, "Context should include tech stack"
|
||||
assert "DEPENDENCY CONTEXT" in context, "Context should include dependency context"
|
||||
|
||||
print("✅ All context sections present!")
|
||||
return True
|
||||
|
||||
def test_update_state_with_findings():
|
||||
"""Test updating analysis state with new findings."""
|
||||
print("\n" + "=" * 60)
|
||||
print("TEST 2: update_state_with_findings()")
|
||||
print("=" * 60)
|
||||
|
||||
# Initial state
|
||||
analysis_state = {
|
||||
'modules_analyzed': ['project_overview'],
|
||||
'module_summaries': {
|
||||
'project_overview': 'Node.js e-commerce platform'
|
||||
},
|
||||
'architecture_patterns': [],
|
||||
'critical_issues': [],
|
||||
'dependency_context': {}
|
||||
}
|
||||
|
||||
# New chunk analysis
|
||||
chunk = {
|
||||
'name': 'authentication',
|
||||
'id': 'chunk_002',
|
||||
'chunk_type': 'module'
|
||||
}
|
||||
|
||||
chunk_analysis = {
|
||||
'module_overview': 'JWT-based authentication module with rate limiting missing',
|
||||
'module_architecture': 'Uses MVC pattern with Service Layer for business logic',
|
||||
'module_quality_score': 6.5
|
||||
}
|
||||
|
||||
# Mock file analyses
|
||||
file_analyses = [
|
||||
MockFileAnalysis('auth.controller.js', 7.0, ['No rate limiting']),
|
||||
MockFileAnalysis('auth.service.js', 8.0),
|
||||
MockFileAnalysis('auth.middleware.js', 4.0, ['Weak validation']) # Low quality
|
||||
]
|
||||
|
||||
# Update state
|
||||
updated_state = update_state_with_findings(analysis_state.copy(), chunk, chunk_analysis, file_analyses)
|
||||
|
||||
print("\n✅ Updated state:")
|
||||
print(f" Modules analyzed: {updated_state.get('modules_analyzed', [])}")
|
||||
print(f" Architecture patterns: {updated_state.get('architecture_patterns', [])}")
|
||||
print(f" Critical issues: {len(updated_state.get('critical_issues', []))}")
|
||||
print(f" Module summaries: {list(updated_state.get('module_summaries', {}).keys())}")
|
||||
print()
|
||||
|
||||
# Verify updates
|
||||
assert 'authentication' in updated_state['modules_analyzed'], "Authentication should be in modules_analyzed"
|
||||
assert 'MVC' in updated_state['architecture_patterns'], "MVC pattern should be detected"
|
||||
assert 'Service Layer' in updated_state['architecture_patterns'], "Service Layer pattern should be detected"
|
||||
assert len(updated_state['critical_issues']) > 0, "Critical issues should be added"
|
||||
assert 'authentication' in updated_state['module_summaries'], "Module summary should be stored"
|
||||
|
||||
print("✅ State updated correctly!")
|
||||
return True
|
||||
|
||||
def test_progressive_context_flow():
|
||||
"""Test the complete progressive context flow."""
|
||||
print("\n" + "=" * 60)
|
||||
print("TEST 3: Progressive Context Flow (Simulated)")
|
||||
print("=" * 60)
|
||||
|
||||
# Simulate chunk processing flow
|
||||
test_files = [
|
||||
("README.md", "# Project\n\nNode.js e-commerce platform"),
|
||||
("package.json", '{"name": "ecommerce", "dependencies": {"express": "^4.0"}}'),
|
||||
("src/auth/auth.controller.js", "export class AuthController {}"),
|
||||
("src/auth/auth.service.js", "export class AuthService {}"),
|
||||
("src/products/product.controller.js", "export class ProductController {}"),
|
||||
]
|
||||
|
||||
# Create chunks
|
||||
chunks = create_intelligent_chunks(test_files)
|
||||
|
||||
print(f"\n✅ Created {len(chunks)} chunks:")
|
||||
for chunk in chunks:
|
||||
print(f" - {chunk['name']} ({chunk['chunk_type']}): {len(chunk['files'])} files")
|
||||
|
||||
# Simulate progressive analysis
|
||||
analysis_state = {}
|
||||
|
||||
print("\n📊 Simulating progressive analysis:")
|
||||
|
||||
for i, chunk in enumerate(chunks, 1):
|
||||
chunk_name = chunk['name']
|
||||
print(f"\n Chunk {i}: {chunk_name}")
|
||||
|
||||
# Build context (what would be used in prompt)
|
||||
context = build_context_from_state(analysis_state, chunk)
|
||||
if context:
|
||||
print(f" 📚 Context available: {len(context)} chars")
|
||||
else:
|
||||
print(f" 📚 No context (first chunk)")
|
||||
|
||||
# Simulate chunk analysis results
|
||||
chunk_analysis = {
|
||||
'module_overview': f"Analysis of {chunk_name} module",
|
||||
'module_architecture': 'MVC pattern' if chunk_name != 'project_overview' else 'Node.js setup',
|
||||
'module_quality_score': 7.5
|
||||
}
|
||||
|
||||
# Mock file analyses
|
||||
file_analyses = [
|
||||
MockFileAnalysis(f"{chunk_name}_file{i}.js", 7.0 + i*0.1)
|
||||
for i in range(len(chunk['files']))
|
||||
]
|
||||
|
||||
# Update state
|
||||
analysis_state = update_state_with_findings(analysis_state.copy(), chunk, chunk_analysis, file_analyses)
|
||||
|
||||
print(f" ✅ State updated: {len(analysis_state.get('modules_analyzed', []))} modules analyzed")
|
||||
if analysis_state.get('architecture_patterns'):
|
||||
print(f" 📐 Patterns: {', '.join(analysis_state.get('architecture_patterns', []))}")
|
||||
|
||||
print("\n📊 Final Analysis State:")
|
||||
print(f" Modules analyzed: {', '.join(analysis_state.get('modules_analyzed', []))}")
|
||||
print(f" Architecture patterns: {', '.join(analysis_state.get('architecture_patterns', []))}")
|
||||
print(f" Critical issues: {len(analysis_state.get('critical_issues', []))}")
|
||||
print(f" Module summaries: {len(analysis_state.get('module_summaries', {}))}")
|
||||
|
||||
# Verify final state
|
||||
assert len(analysis_state.get('modules_analyzed', [])) == len(chunks), "All chunks should be analyzed"
|
||||
assert len(analysis_state.get('architecture_patterns', [])) > 0, "Patterns should be detected"
|
||||
|
||||
print("\n✅ Progressive context flow working correctly!")
|
||||
return True
|
||||
|
||||
def test_prompt_includes_context():
|
||||
"""Test that prompts include progressive context."""
|
||||
print("\n" + "=" * 60)
|
||||
print("TEST 4: Prompt Includes Progressive Context")
|
||||
print("=" * 60)
|
||||
|
||||
# Create analysis state
|
||||
analysis_state = {
|
||||
'modules_analyzed': ['project_overview', 'authentication'],
|
||||
'project_overview': 'Node.js platform',
|
||||
'module_summaries': {
|
||||
'authentication': 'JWT auth module'
|
||||
},
|
||||
'architecture_patterns': ['MVC'],
|
||||
'critical_issues': [
|
||||
{'module': 'authentication', 'issue': 'Missing rate limiting'}
|
||||
],
|
||||
'tech_stack': {'backend': 'Node.js'}
|
||||
}
|
||||
|
||||
# Test chunk
|
||||
chunk = {
|
||||
'name': 'products',
|
||||
'chunk_type': 'module',
|
||||
'files': [('product.controller.js', 'export class ProductController {}')]
|
||||
}
|
||||
|
||||
# Build prompt
|
||||
prompt = build_intelligent_chunk_prompt(chunk, analysis_state)
|
||||
|
||||
print("\n✅ Generated prompt (first 500 chars):")
|
||||
print(prompt[:500])
|
||||
print("...")
|
||||
print()
|
||||
|
||||
# Verify prompt includes context
|
||||
assert "CONTEXT FROM PREVIOUS ANALYSIS" in prompt, "Prompt should include context section"
|
||||
assert "PROJECT OVERVIEW" in prompt, "Prompt should include project overview"
|
||||
assert "PREVIOUSLY ANALYZED MODULES" in prompt, "Prompt should include module summaries"
|
||||
assert "ARCHITECTURE PATTERNS" in prompt, "Prompt should include architecture patterns"
|
||||
assert "CRITICAL ISSUES" in prompt, "Prompt should include critical issues"
|
||||
|
||||
print("✅ Prompt includes all context sections!")
|
||||
|
||||
# Test without context (first chunk)
|
||||
prompt_no_context = build_intelligent_chunk_prompt(chunk, None)
|
||||
assert "CONTEXT FROM PREVIOUS ANALYSIS" not in prompt_no_context, "First chunk should not have context"
|
||||
|
||||
print("✅ Prompt correctly omits context for first chunk!")
|
||||
return True
|
||||
|
||||
def run_all_tests():
|
||||
"""Run all tests."""
|
||||
print("\n" + "=" * 60)
|
||||
print("PROGRESSIVE CONTEXT - COMPREHENSIVE TEST SUITE")
|
||||
print("=" * 60)
|
||||
|
||||
try:
|
||||
# Test 1: Context building
|
||||
test_build_context_from_state()
|
||||
|
||||
# Test 2: State updates
|
||||
test_update_state_with_findings()
|
||||
|
||||
# Test 3: Complete flow
|
||||
test_progressive_context_flow()
|
||||
|
||||
# Test 4: Prompt generation
|
||||
test_prompt_includes_context()
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("✅ ALL TESTS PASSED!")
|
||||
print("=" * 60)
|
||||
print("\n📊 Summary:")
|
||||
print(" • Context building: ✅")
|
||||
print(" • State updates: ✅")
|
||||
print(" • Progressive flow: ✅")
|
||||
print(" • Prompt generation: ✅")
|
||||
print("\n🎉 Progressive context implementation is working correctly!")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print("\n" + "=" * 60)
|
||||
print(f"❌ TEST FAILED: {e}")
|
||||
print("=" * 60)
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = run_all_tests()
|
||||
sys.exit(0 if success else 1)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user