implemented KG DB

2025-11-13 09:07:54 +05:30 · 2025-11-13 09:07:54 +05:30 · ad2c27d793
commit ad2c27d793
parent ffe6ca349c
20 changed files with 4080 additions and 2472 deletions
--- a/ai-analysis-reports/repo_analysis_e395ea2c-ea3b-43e0-94af-c95b2815aac1_20251110_032129_analysis.pdf
+++ b/ai-analysis-reports/repo_analysis_e395ea2c-ea3b-43e0-94af-c95b2815aac1_20251110_032129_analysis.pdf
--- a/ai-analysis-reports/repo_analysis_e395ea2c-ea3b-43e0-94af-c95b2815aac1_20251110_055924_analysis.pdf
+++ b/ai-analysis-reports/repo_analysis_e395ea2c-ea3b-43e0-94af-c95b2815aac1_20251110_055924_analysis.pdf
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -721,48 +721,47 @@ services:
  # =====================================
  
  ai-analysis-service:
-    build: ./services/ai-analysis-service
-    container_name: pipeline_ai_analysis_service
+    build: 
+      context: ./services/ai-analysis-service
+      dockerfile: Dockerfile
+    container_name: pipeline_ai_analysis
    ports:
      - "8022:8022"
    environment:
      - PORT=8022
      - HOST=0.0.0.0
      - ANTHROPIC_API_KEY=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
-      - POSTGRES_HOST=postgres
+      
+      # Neo4j Configuration
+      - USE_NEO4J_KG=true
+      - NEO4J_URI=bolt://neo4j:7687
+      - NEO4J_USER=neo4j
+      - NEO4J_PASSWORD=password
+      - NEO4J_DATABASE=neo4j
+      
+      # Report Configuration
+      - REPORT_TECHNICAL_ONLY=false
+      
+      # Existing database configurations
+      - POSTGRES_HOST=pipeline_postgres
      - POSTGRES_PORT=5432
      - POSTGRES_DB=dev_pipeline
      - POSTGRES_USER=pipeline_admin
      - POSTGRES_PASSWORD=secure_pipeline_2024
-      - REDIS_HOST=redis
+      
+      - MONGODB_URL=mongodb://pipeline_admin:mongo_secure_2024@pipeline_mongodb:27017/
+      - MONGODB_DB=repo_analyzer
+      
+      - REDIS_HOST=pipeline_redis
      - REDIS_PORT=6379
      - REDIS_PASSWORD=redis_secure_2024
-      - MONGODB_URL=mongodb://pipeline_admin:mongo_secure_2024@mongodb:27017/
-      - MONGODB_DB=repo_analyzer
-      - GIT_INTEGRATION_SERVICE_URL=http://git-integration:8012
-      - CLAUDE_REQUESTS_PER_MINUTE=90
-      - MAX_FILES_DEFAULT=100
-      - CACHE_TTL_SECONDS=86400
-      - CONTENT_MAX_TOKENS=8000
-      - ENHANCED_PROCESSING_ENABLED=true
-      - ENHANCED_BATCH_PROCESSING=true
-      - ENHANCED_SMART_CHUNKING=true
-      - ENHANCED_RATE_LIMIT=120
-      - ENHANCED_BATCH_DELAY=0.05
-      - ENHANCED_SMALL_FILE_DELAY=0.02
-      - ENHANCED_MEDIUM_FILE_DELAY=0.05
-      - ENHANCED_LARGE_FILE_DELAY=0.1
-    volumes:
-      - ai_analysis_logs:/app/logs
-      - ./ai-analysis-reports:/app/reports
-      - ai_analysis_temp:/app/temp
+    depends_on:
+      - neo4j
+      - postgres
+      - mongodb
+      - redis
    networks:
      - pipeline_network
-    depends_on:
-      - postgres
-      - redis
-      - mongodb
-      - git-integration
    deploy:
      resources:
        limits:
--- a/services/ai-analysis-service/init.py
+++ b/services/ai-analysis-service/init.py
@ -0,0 +1,8 @@
+# Service initialization
+# This file helps Python treat the directory as a package
+# and can be used to set up any service-wide configurations
+
+from .server import app  # Import the FastAPI app
+from .ai_analyze import analyze_repository  # Import key functions
+
+__all__ = ['app', 'analyze_repository']
--- a/services/ai-analysis-service/ai-analyze.py
+++ b/services/ai-analysis-service/ai-analyze.py
--- a/services/ai-analysis-service/env.example
+++ b/services/ai-analysis-service/env.example
@ -6,7 +6,17 @@ HOST=0.0.0.0
 NODE_ENV=development

 # AI API Keys
-ANTHROPIC_API_KEY=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
+ANTHROPIC_API_KEY=your_anthropic_api_key
+
+# Neo4j Knowledge Graph Configuration
+USE_NEO4J_KG=true
+NEO4J_URI=bolt://localhost:7687
+NEO4J_USER=neo4j
+NEO4J_PASSWORD=secure_neo4j_2024
+NEO4J_DATABASE=neo4j
+
+# Report Configuration
+REPORT_TECHNICAL_ONLY=false

 # Database Configuration
 POSTGRES_HOST=localhost
--- a/services/ai-analysis-service/knowledge_graph/init.py
+++ b/services/ai-analysis-service/knowledge_graph/init.py
@ -0,0 +1,9 @@
+"""
+Knowledge graph utilities for the AI Analysis Service.
+
+This package provides the Neo4j client and high-level helpers used to
+persist and query analysis results in a graph representation.
+"""
+
+from .neo4j_client import Neo4jGraphClient  # noqa: F401
+
--- a/services/ai-analysis-service/knowledge_graph/neo4j_client.py
+++ b/services/ai-analysis-service/knowledge_graph/neo4j_client.py
@ -0,0 +1,328 @@
+"""
+Neo4j client helpers for the AI Analysis Service.
+
+This module wraps the official Neo4j async driver and exposes a minimal
+set of convenience methods that we can reuse across the service without
+sprinkling Cypher execution boilerplate everywhere.
+"""
+
+from __future__ import annotations
+
+import json
+from contextlib import asynccontextmanager
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Any, AsyncIterator, Dict, List, Optional, Sequence
+
+from neo4j import AsyncGraphDatabase
+
+try:
+    from neo4j import AsyncResult, AsyncSession  # type: ignore
+except ImportError:  # pragma: no cover - fallback for older/newer driver versions
+    AsyncResult = Any  # type: ignore
+    AsyncSession = Any  # type: ignore
+
+
+def _json_dumps(value: Any) -> str:
+    """Serialize complex values so we can persist them as strings safely."""
+    if value is None:
+        return ""
+    if isinstance(value, (str, int, float, bool)):
+        return str(value)
+    try:
+        return json.dumps(value, default=str)
+    except Exception:
+        return str(value)
+
+
+@dataclass
+class Neo4jConfig:
+    uri: str
+    user: str
+    password: str
+    database: Optional[str] = None
+    fetch_size: int = 1000
+
+
+class Neo4jGraphClient:
+    """
+    Thin wrapper around the Neo4j async driver that provides helpers for
+    writing analysis artefacts into the graph and querying them back.
+    """
+
+    def __init__(self, config: Neo4jConfig) -> None:
+        self._config = config
+        self._driver = AsyncGraphDatabase.driver(
+            config.uri,
+            auth=(config.user, config.password),
+            # Allow long running operations while the analysis progresses
+            max_connection_lifetime=3600,
+        )
+
+    async def close(self) -> None:
+        if self._driver:
+            await self._driver.close()
+
+    @asynccontextmanager
+    async def session(self) -> AsyncIterator[AsyncSession]:
+        kwargs: Dict[str, Any] = {}
+        if self._config.database:
+            kwargs["database"] = self._config.database
+        if self._config.fetch_size:
+            kwargs["fetch_size"] = self._config.fetch_size
+        async with self._driver.session(**kwargs) as session:
+            yield session
+
+    async def _run_write(self, query: str, **params: Any) -> None:
+        async with self.session() as session:
+            async def _write(tx):
+                result = await tx.run(query, **params)
+                await result.consume()
+            await session.execute_write(_write)
+
+    async def _run_read(self, query: str, **params: Any) -> List[Dict[str, Any]]:
+        async with self.session() as session:
+            result: AsyncResult = await session.run(query, **params)
+            records = await result.data()
+            return records
+
+    # ------------------------------------------------------------------ #
+    # Write helpers
+    # ------------------------------------------------------------------ #
+
+    async def upsert_run(self, run_id: str, repository_id: str) -> None:
+        await self._run_write(
+            """
+            MERGE (r:Run {run_id: $run_id})
+            ON CREATE SET
+                r.repository_id = $repository_id,
+                r.created_at = datetime(),
+                r.updated_at = datetime()
+            ON MATCH SET
+                r.repository_id = $repository_id,
+                r.updated_at = datetime()
+            """,
+            run_id=run_id,
+            repository_id=repository_id,
+        )
+
+    async def clear_run(self, run_id: str) -> None:
+        await self._run_write(
+            """
+            MATCH (r:Run {run_id: $run_id})
+            OPTIONAL MATCH (r)-[rel]-()
+            DETACH DELETE r
+            """,
+            run_id=run_id,
+        )
+
+    async def upsert_module_graph(
+        self,
+        run_id: str,
+        repository_id: str,
+        module_props: Dict[str, Any],
+        files: Sequence[Dict[str, Any]],
+        findings: Sequence[Dict[str, Any]],
+        dependencies: Sequence[Dict[str, Any]],
+    ) -> None:
+        """
+        Persist module level artefacts in a single transaction.
+        """
+        # Ensure strings
+        module_props = {k: _json_dumps(v) if isinstance(v, (dict, list, tuple, set)) else v for k, v in module_props.items()}
+        files_payload = [
+            {
+                "path": item["path"],
+                "props": {
+                    key: _json_dumps(value) if isinstance(value, (dict, list, tuple, set)) else value
+                    for key, value in item.get("props", {}).items()
+                },
+            }
+            for item in files
+        ]
+        findings_payload = [
+            {
+                "id": item["id"],
+                "props": {
+                    key: _json_dumps(value) if isinstance(value, (dict, list, tuple, set)) else value
+                    for key, value in item.get("props", {}).items()
+                },
+                "file_path": item.get("file_path"),
+            }
+            for item in findings
+        ]
+        dependencies_payload = [
+            {
+                "target": dependency.get("target"),
+                "kind": dependency.get("kind", "depends_on"),
+                "metadata": _json_dumps(dependency.get("metadata", {})),
+            }
+            for dependency in dependencies
+        ]
+
+        await self._run_write(
+            """
+            MERGE (r:Run {run_id: $run_id})
+            ON CREATE SET
+                r.repository_id = $repository_id,
+                r.created_at = datetime(),
+                r.updated_at = datetime()
+            ON MATCH SET
+                r.repository_id = $repository_id,
+                r.updated_at = datetime()
+
+            MERGE (m:Module {run_id: $run_id, name: $module_name})
+            SET m += $module_props,
+                m.updated_at = datetime()
+
+            MERGE (r)-[:RUN_HAS_MODULE]->(m)
+
+            WITH m
+            UNWIND $files AS file_data
+            MERGE (f:File {run_id: $run_id, path: file_data.path})
+            SET f += file_data.props,
+                f.updated_at = datetime()
+            MERGE (m)-[:MODULE_INCLUDES_FILE]->(f)
+
+            WITH m
+            UNWIND $findings AS finding_data
+            MERGE (fd:Finding {run_id: $run_id, finding_id: finding_data.id})
+            SET fd += finding_data.props,
+                fd.updated_at = datetime()
+            MERGE (m)-[:MODULE_HAS_FINDING]->(fd)
+            FOREACH (fp IN CASE WHEN finding_data.file_path IS NULL THEN [] ELSE [finding_data.file_path] END |
+                MERGE (ff:File {run_id: $run_id, path: fp})
+                MERGE (fd)-[:FINDING_TOUCHES_FILE]->(ff)
+            )
+
+            WITH m
+            UNWIND $dependencies AS dependency
+            FOREACH (_ IN CASE WHEN dependency.target IS NULL THEN [] ELSE [1] END |
+                MERGE (dep:Module {run_id: $run_id, name: dependency.target})
+                MERGE (m)-[rel:MODULE_DEPENDENCY {kind: dependency.kind}]->(dep)
+                SET rel.metadata = dependency.metadata,
+                    rel.updated_at = datetime()
+            )
+            """,
+            run_id=run_id,
+            repository_id=repository_id,
+            module_name=module_props.get("name"),
+            module_props=module_props,
+            files=files_payload,
+            findings=findings_payload,
+            dependencies=dependencies_payload,
+        )
+
+    async def upsert_run_state(self, run_id: str, state: Dict[str, Any]) -> None:
+        await self._run_write(
+            """
+            MERGE (r:Run {run_id: $run_id})
+            SET r.analysis_state = $state,
+                r.state_updated_at = datetime()
+            """,
+            run_id=run_id,
+            state=_json_dumps(state),
+        )
+
+    async def upsert_synthesis(self, run_id: str, synthesis: Dict[str, Any]) -> None:
+        await self._run_write(
+            """
+            MERGE (r:Run {run_id: $run_id})
+            SET r.synthesis_analysis = $synthesis,
+                r.synthesis_updated_at = datetime()
+            """,
+            run_id=run_id,
+            synthesis=_json_dumps(synthesis),
+        )
+
+    # ------------------------------------------------------------------ #
+    # Read helpers
+    # ------------------------------------------------------------------ #
+
+    async def fetch_modules(self, run_id: str) -> List[Dict[str, Any]]:
+        records = await self._run_read(
+            """
+            MATCH (r:Run {run_id: $run_id})-[:RUN_HAS_MODULE]->(m:Module)
+            OPTIONAL MATCH (m)-[:MODULE_INCLUDES_FILE]->(f:File)
+            OPTIONAL MATCH (m)-[:MODULE_HAS_FINDING]->(fd:Finding)
+            OPTIONAL MATCH (fd)-[:FINDING_TOUCHES_FILE]->(ff:File)
+            RETURN
+                m,
+                collect(DISTINCT properties(f)) AS files,
+                collect(DISTINCT properties(fd)) AS findings,
+                collect(DISTINCT properties(ff)) AS finding_files
+            """,
+            run_id=run_id,
+        )
+        modules: List[Dict[str, Any]] = []
+        for record in records:
+            module_node = record.get("m", {})
+            files = record.get("files", [])
+            findings = record.get("findings", [])
+            finding_files = record.get("finding_files", [])
+            modules.append(
+                {
+                    "module": module_node,
+                    "files": files,
+                    "findings": findings,
+                    "finding_files": finding_files,
+                }
+            )
+        return modules
+
+    async def fetch_run_state(self, run_id: str) -> Optional[Dict[str, Any]]:
+        records = await self._run_read(
+            """
+            MATCH (r:Run {run_id: $run_id})
+            RETURN r.analysis_state AS analysis_state
+            """,
+            run_id=run_id,
+        )
+        if not records:
+            return None
+        raw_state = records[0].get("analysis_state")
+        if not raw_state:
+            return None
+        try:
+            return json.loads(raw_state)
+        except json.JSONDecodeError:
+            return {"raw": raw_state}
+
+    async def fetch_synthesis(self, run_id: str) -> Optional[Dict[str, Any]]:
+        records = await self._run_read(
+            """
+            MATCH (r:Run {run_id: $run_id})
+            RETURN r.synthesis_analysis AS synthesis
+            """,
+            run_id=run_id,
+        )
+        if not records:
+            return None
+        raw_synthesis = records[0].get("synthesis")
+        if not raw_synthesis:
+            return None
+        try:
+            return json.loads(raw_synthesis)
+        except json.JSONDecodeError:
+            return {"raw": raw_synthesis}
+
+    async def fetch_run_metadata(self, run_id: str) -> Optional[Dict[str, Any]]:
+        records = await self._run_read(
+            """
+            MATCH (r:Run {run_id: $run_id})
+            RETURN r
+            """,
+            run_id=run_id,
+        )
+        if not records:
+            return None
+        run_node = records[0].get("r")
+        if not run_node:
+            return None
+        metadata = dict(run_node)
+        if "created_at" in metadata and isinstance(metadata["created_at"], datetime):
+            metadata["created_at"] = metadata["created_at"].isoformat()
+        if "updated_at" in metadata and isinstance(metadata["updated_at"], datetime):
+            metadata["updated_at"] = metadata["updated_at"].isoformat()
+        return metadata
+
--- a/services/ai-analysis-service/knowledge_graph/operations.py
+++ b/services/ai-analysis-service/knowledge_graph/operations.py
@ -0,0 +1,214 @@
+"""
+High-level knowledge graph operations used by the AI Analysis Service.
+
+These helpers translate existing analysis objects into the node/relationship
+structure expected by `Neo4jGraphClient`.
+"""
+
+from __future__ import annotations
+
+import json
+import uuid
+from datetime import datetime
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple
+
+from .neo4j_client import Neo4jGraphClient
+
+
+def _safe_json(value: Any) -> str:
+    if value is None:
+        return ""
+    if isinstance(value, (str, int, float, bool)):
+        return str(value)
+    try:
+        return json.dumps(value, default=str)
+    except Exception:
+        return str(value)
+
+
+def _normalize_issue(issue: Any, index: int) -> Tuple[str, Dict[str, Any]]:
+    """
+    Convert an issue structure that might be a string or dict into a dict.
+    Returns (summary, props).
+    """
+    if isinstance(issue, dict):
+        summary = issue.get("title") or issue.get("issue") or issue.get("description") or f"Issue #{index}"
+        props = {
+            "summary": summary,
+            "severity": issue.get("severity", "medium"),
+            "category": issue.get("category", "general"),
+            "description": issue.get("description") or issue.get("details") or "",
+            "recommendation": issue.get("recommendation") or issue.get("action") or "",
+            "evidence": _safe_json(issue.get("evidence")),
+        }
+        if issue.get("impact"):
+            props["impact"] = issue["impact"]
+        if issue.get("line_number"):
+            props["line_number"] = issue["line_number"]
+        return summary, props
+    summary = str(issue)
+    return summary, {
+        "summary": summary,
+        "severity": "medium",
+        "category": "general",
+    }
+
+
+def build_module_payload(
+    run_id: str,
+    repository_id: str,
+    module_name: str,
+    chunk: Dict[str, Any],
+    chunk_analysis: Dict[str, Any],
+    file_analyses: Sequence[Any],
+    metadata: Dict[str, Any],
+    ai_response: str,
+) -> Dict[str, Any]:
+    """Prepare module level payload for graph insertion."""
+    module_id = chunk.get("id") or str(uuid.uuid4())
+    module_quality = chunk_analysis.get("module_quality_score")
+    module_overview = chunk_analysis.get("module_overview", "")
+    module_architecture = chunk_analysis.get("module_architecture", "")
+    module_security = chunk_analysis.get("module_security_assessment", "")
+    module_recommendations = chunk_analysis.get("module_recommendations", [])
+
+    files: List[Dict[str, Any]] = []
+    findings: List[Dict[str, Any]] = []
+
+    total_issues = 0
+    total_recommendations = 0
+
+    for fa_index, fa in enumerate(file_analyses):
+        path = getattr(fa, "path", None) or getattr(fa, "file_path", "unknown")
+        issues = getattr(fa, "issues_found", None) or []
+        recommendations = getattr(fa, "recommendations", None) or []
+        total_issues += len(issues) if isinstance(issues, (list, tuple)) else 0
+        total_recommendations += len(recommendations) if isinstance(recommendations, (list, tuple)) else 0
+
+        files.append(
+            {
+                "path": str(path),
+                "props": {
+                    "language": getattr(fa, "language", "unknown"),
+                    "lines_of_code": getattr(fa, "lines_of_code", 0),
+                    "complexity_score": getattr(fa, "complexity_score", 0),
+                    "severity_score": getattr(fa, "severity_score", 0),
+                },
+            }
+        )
+
+        if isinstance(issues, Iterable):
+            for issue_index, raw_issue in enumerate(issues):
+                summary, issue_props = _normalize_issue(raw_issue, issue_index)
+                finding_id = f"{module_id}:{fa_index}:{issue_index}"
+                issue_props.update(
+                    {
+                        "module": module_name,
+                        "file_path": str(path),
+                        "created_at": datetime.utcnow().isoformat(),
+                    }
+                )
+                findings.append(
+                    {
+                        "id": finding_id,
+                        "props": issue_props,
+                        "file_path": str(path),
+                    }
+                )
+
+    module_props: Dict[str, Any] = {
+        "name": module_name,
+        "module_id": module_id,
+        "quality_score": module_quality,
+        "overview": module_overview,
+        "architecture": module_architecture,
+        "security": module_security,
+        "recommendations": module_recommendations,
+        "analysis_payload": metadata,
+        "ai_response": ai_response,
+        "repository_id": repository_id,
+        "total_files": len(file_analyses),
+        "total_issues": total_issues,
+        "total_recommendations": total_recommendations,
+        "updated_at": datetime.utcnow().isoformat(),
+    }
+
+    dependencies = []
+    for dependency in metadata.get("dependencies", {}).get("depends_on_chunks", []):
+        dependencies.append(
+            {
+                "target": dependency,
+                "kind": "depends_on",
+                "metadata": {"source": module_name},
+            }
+        )
+
+    return {
+        "module_props": module_props,
+        "files": files,
+        "findings": findings,
+        "dependencies": dependencies,
+    }
+
+
+async def store_module_analysis(
+    client: Neo4jGraphClient,
+    run_id: str,
+    repository_id: str,
+    module_payload: Dict[str, Any],
+) -> None:
+    await client.upsert_module_graph(
+        run_id=run_id,
+        repository_id=repository_id,
+        module_props=module_payload["module_props"],
+        files=module_payload["files"],
+        findings=module_payload["findings"],
+        dependencies=module_payload["dependencies"],
+    )
+
+
+async def store_analysis_state(client: Neo4jGraphClient, run_id: str, analysis_state: Dict[str, Any]) -> None:
+    await client.upsert_run_state(run_id=run_id, state=analysis_state)
+
+
+async def store_synthesis(client: Neo4jGraphClient, run_id: str, synthesis: Dict[str, Any]) -> None:
+    await client.upsert_synthesis(run_id=run_id, synthesis=synthesis)
+
+
+async def fetch_module_analyses(client: Neo4jGraphClient, run_id: str) -> List[Dict[str, Any]]:
+    modules = await client.fetch_modules(run_id)
+    module_analyses: List[Dict[str, Any]] = []
+    for entry in modules:
+        node = entry.get("module", {})
+        files = entry.get("files", [])
+        findings = entry.get("findings", [])
+        analysis_payload = node.get("analysis_payload")
+        if isinstance(analysis_payload, str):
+            try:
+                analysis_payload = json.loads(analysis_payload)
+            except json.JSONDecodeError:
+                analysis_payload = {"raw": analysis_payload}
+        module_analyses.append(
+            {
+                "module_name": node.get("name"),
+                "module_id": node.get("module_id"),
+                "quality_score": node.get("quality_score"),
+                "module_overview": node.get("overview"),
+                "module_architecture": node.get("architecture"),
+                "module_security_assessment": node.get("security"),
+                "module_recommendations": node.get("recommendations"),
+                "files_analyzed": [file.get("path") for file in files if file.get("path")],
+                "raw_payload": analysis_payload,
+                "findings": findings,
+            }
+        )
+    return module_analyses
+
+
+async def fetch_run_state(client: Neo4jGraphClient, run_id: str) -> Optional[Dict[str, Any]]:
+    return await client.fetch_run_state(run_id)
+
+
+async def fetch_synthesis(client: Neo4jGraphClient, run_id: str) -> Optional[Dict[str, Any]]:
+    return await client.fetch_synthesis(run_id)
+
--- a/services/ai-analysis-service/progress_manager.py
+++ b/services/ai-analysis-service/progress_manager.py
@ -19,6 +19,7 @@ class AnalysisProgressManager:
        self.subscribers: List[asyncio.Queue] = []
        self.redis_client: Optional[redis.Redis] = None
        self.progress_key = f"analysis_progress:{analysis_id}"
+        self._complete: bool = False
        
    async def connect_redis(self):
        """Connect to Redis for progress persistence"""
@ -103,6 +104,9 @@ class AnalysisProgressManager:
            self.unsubscribe(queue)
        
        print(f"📤 Event emitted: {event_type} - {data.get('message', '')}")
+        
+        if event_type in {"analysis_completed", "analysis_error"}:
+            self._complete = True
    
    async def get_progress_history(self) -> List[Dict[str, Any]]:
        """Retrieve progress history from Redis"""
@ -125,6 +129,12 @@ class AnalysisProgressManager:
            except Exception as e:
                print(f"⚠️ Failed to clear progress: {e}")

+        self._complete = False
+
+    def is_complete(self) -> bool:
+        """Return whether the analysis has completed or errored."""
+        return self._complete
+

 class GlobalProgressTracker:
    """Global singleton to track all active analyses"""
--- a/services/ai-analysis-service/pyproject.toml
+++ b/services/ai-analysis-service/pyproject.toml
@ -0,0 +1,32 @@
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "ai-analysis-service"
+version = "0.1.0"
+description = "AI Analysis Microservice for Code Repository Analysis"
+requires-python = ">=3.9"
+
+dependencies = [
+    "fastapi>=0.104.1",
+    "uvicorn>=0.24.0",
+    "pydantic>=2.5.0",
+    "httpx>=0.25.0",
+    "redis>=4.5.0",
+    "psycopg2-binary>=2.9.7",
+    "neo4j>=5.8.0",
+    "anthropic>=0.7.0",
+    "python-dotenv>=1.0.0"
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest",
+    "mypy",
+    "black",
+    "isort"
+]
+
+[tool.setuptools]
+packages = ["ai_analysis_service"]
--- a/services/ai-analysis-service/requirements.txt
+++ b/services/ai-analysis-service/requirements.txt
@ -17,6 +17,7 @@ GitPython>=3.1.40
 redis>=4.5.0
 pymongo>=4.5.0
 psycopg2-binary>=2.9.7
+neo4j>=5.8.0  # Neo4j Graph Database Driver

 # Data processing
 numpy>=1.24.0
--- a/services/ai-analysis-service/server.py
+++ b/services/ai-analysis-service/server.py
@ -26,13 +26,12 @@ import uvicorn
 import mimetypes
 import httpx
 import redis
+import psycopg2
+from psycopg2.extras import RealDictCursor

-# PostgreSQL cursor for querying
-try:
-    from psycopg2.extras import RealDictCursor
-except ImportError:
-    # Fallback if psycopg2 not available
-    RealDictCursor = None
+from knowledge_graph import Neo4jGraphClient
+from knowledge_graph.neo4j_client import Neo4jConfig
+from knowledge_graph import operations as kg_ops

 # Import the AI analysis components
 # Note: ai-analyze.py has a hyphen, so we need to handle the import specially
@ -40,7 +39,7 @@ import sys
 import importlib.util

 # Load the ai-analyze.py module
-spec = importlib.util.spec_from_file_location("ai_analyze", "ai-analyze.py")
+spec = importlib.util.spec_from_file_location("ai_analyze", "./ai-analyze.py")
 ai_analyze_module = importlib.util.module_from_spec(spec)
 sys.modules["ai_analyze"] = ai_analyze_module
 spec.loader.exec_module(ai_analyze_module)
@ -52,7 +51,6 @@ from ai_analyze import (
    ArchitectureAnalysis,
    SecurityAnalysis,
    CodeQualityAnalysis,
-    PerformanceAnalysis,
    Issue,
    ModuleAnalysis,
    ModuleSummary
@ -71,12 +69,14 @@ from progress_manager import AnalysisProgressManager, progress_tracker

 # Global analyzer instance
 analyzer = None
+neo4j_client: Optional[Neo4jGraphClient] = None
+USE_KNOWLEDGE_GRAPH = False

@asynccontextmanager
 async def lifespan(app: FastAPI):
    """Lifespan context manager for startup and shutdown events."""
    # Startup
-    global analyzer
+    global analyzer, neo4j_client, USE_KNOWLEDGE_GRAPH
    try:
        # Load environment variables
        from dotenv import load_dotenv
@ -116,6 +116,26 @@ async def lifespan(app: FastAPI):
            analyzer = EnhancedGitHubAnalyzer(api_key, config)
        
        print("✅ AI Analysis Service initialized successfully")
+
+        use_kg_flag = os.getenv("USE_NEO4J_KG", os.getenv("USE_KNOWLEDGE_GRAPH", "false"))
+        USE_KNOWLEDGE_GRAPH = str(use_kg_flag).lower() in ("1", "true", "yes", "on")
+        if USE_KNOWLEDGE_GRAPH:
+            try:
+                neo4j_config = Neo4jConfig(
+                    uri=os.getenv("NEO4J_URI", "bolt://localhost:7687"),
+                    user=os.getenv("NEO4J_USER", "neo4j"),
+                    password=os.getenv("NEO4J_PASSWORD", "neo4j"),
+                    database=os.getenv("NEO4J_DATABASE") or None,
+                )
+                neo4j_client = Neo4jGraphClient(neo4j_config)
+                print(f"✅ Knowledge graph enabled (Neo4j URI: {neo4j_config.uri})")
+            except Exception as kg_error:
+                neo4j_client = None
+                USE_KNOWLEDGE_GRAPH = False
+                print(f"⚠️ Failed to initialize Neo4j client: {kg_error}. Falling back to episodic memory.")
+        else:
+            neo4j_client = None
+            print("ℹ️ Knowledge graph disabled (falling back to episodic memory)")
    except Exception as e:
        print(f"❌ Failed to initialize AI Analysis Service: {e}")
        raise
@ -124,6 +144,8 @@ async def lifespan(app: FastAPI):
    
    # Shutdown (if needed)
    # Cleanup code can go here if needed
+    if neo4j_client:
+        await neo4j_client.close()

 app = FastAPI(
    title="AI Analysis Service",
@ -624,6 +646,13 @@ git_client = GitIntegrationClient()
 analysis_cache = AnalysisCache()
 content_optimizer = ContentOptimizer()

+def get_progress_manager(analysis_id: str) -> AnalysisProgressManager:
+    """Retrieve an existing progress manager or create one if missing."""
+    manager = progress_tracker.get_manager(analysis_id)
+    if manager is None:
+        manager = progress_tracker.create_manager(analysis_id)
+    return manager
+
 # ============================================================================
 # TOKEN USAGE & COST TRACKING (NEW)
 # ============================================================================
@ -1075,56 +1104,37 @@ async def stream_progress(analysis_id: str, request: Request):
    };
    """
    async def event_generator():
-        # Get or create progress manager
-        manager = progress_tracker.get_manager(analysis_id)
-        
-        if not manager:
-            # Send error and close
-            yield f"data: {json.dumps({'error': 'Analysis not found'})}\n\n"
-            return
-        
-        # Subscribe to updates
-        queue = manager.subscribe()
-        
+        # Properly handle event generation
+        progress_mgr: Optional[AnalysisProgressManager] = None
+        subscriber_queue: Optional[asyncio.Queue] = None
        try:
-            # Send historical events first
-            history = await manager.get_progress_history()
-            for event in history:
-                if await request.is_disconnected():
-                    break
-                yield f"data: {json.dumps(event)}\n\n"
+            progress_mgr = get_progress_manager(analysis_id)
            
-            # Stream new events
+            # Make sure Redis connection exists so we can replay history
+            if progress_mgr.redis_client is None:
+                await progress_mgr.connect_redis()
+
+            # Replay cached history first
+            history = await progress_mgr.get_progress_history()
+            for event in history:
+                yield f"data: {json.dumps(event)}\n\n"
+
+            # Subscribe to new events
+            subscriber_queue = progress_mgr.subscribe()
+
            while True:
-                if await request.is_disconnected():
+                event = await subscriber_queue.get()
+                yield f"data: {json.dumps(event)}\n\n"
+                if event.get("event") in ("analysis_completed", "analysis_error"):
                    break
-                
-                try:
-                    # Wait for next event with timeout
-                    event = await asyncio.wait_for(queue.get(), timeout=30.0)
-                    yield f"data: {json.dumps(event)}\n\n"
-                    
-                    # If analysis completed, close stream
-                    if event.get('event') in ['analysis_completed', 'analysis_error']:
-                        break
-                        
-                except asyncio.TimeoutError:
-                    # Send keepalive ping
-                    yield f": keepalive\n\n"
-                    continue
-                    
+        except Exception as e:
+            error_payload = {"error": str(e)}
+            yield f"data: {json.dumps(error_payload)}\n\n"
        finally:
-            manager.unsubscribe(queue)
+            if progress_mgr and subscriber_queue:
+                progress_mgr.unsubscribe(subscriber_queue)
    
-    return StreamingResponse(
-        event_generator(),
-        media_type="text/event-stream",
-        headers={
-            "Cache-Control": "no-cache",
-            "Connection": "keep-alive",
-            "X-Accel-Buffering": "no"  # Disable nginx buffering
-        }
-    )
+    return StreamingResponse(event_generator(), media_type="text/event-stream")

@app.post("/analyze")
 async def analyze_repository(request: AnalysisRequest, background_tasks: BackgroundTasks):
@ -3190,187 +3200,32 @@ async def analyze_files_smart_batch(files_batch: List[Tuple[str, str]], reposito

 async def store_chunk_analysis_in_memory(chunk: Dict, file_analyses: List, chunk_analysis: Dict, repository_id: str, session_id: str = None, analysis_state: Optional[Dict] = None):
    """
-    Store detailed chunk-level analysis in episodic memory (MongoDB).
-    Creates one record per chunk with comprehensive analysis data.
-    Now includes progressive context (Option 3: Hybrid Approach).
+    Store chunk analysis in memory using either Neo4j Knowledge Graph or Episodic Memory.
+    Supports fallback mechanisms for robust storage.
    """
    try:
-        if not analyzer or not hasattr(analyzer, 'memory_manager'):
-            print("⚠️ [MEMORY] Memory manager not available, skipping chunk storage")
-            return
-        
-        # Get session ID from analyzer
-        if not session_id:
-            session_id = getattr(analyzer, 'session_id', str(uuid.uuid4()))
-        
-        chunk_id = chunk.get('id', 'unknown')
-        chunk_name = chunk.get('name', 'unknown')
-        chunk_type = chunk.get('chunk_type', 'module')
-        chunk_priority = chunk.get('priority', 2)
-        dependencies = chunk.get('context_dependencies', [])
-        
-        # Calculate chunk metrics
+        # Validate input parameters
+        if not chunk or not file_analyses:
+            print("❌ [MEMORY] Invalid chunk or file_analyses")
+            return None
+
+        # Extract necessary variables (these were missing in the original implementation)
+        chunk_name = chunk.get('name', 'Unknown Chunk')
+        chunk_type = chunk.get('type', 'generic')
+        chunk_priority = chunk.get('priority', 5)
        total_files = len(file_analyses)
-        total_lines = sum(fa.lines_of_code for fa in file_analyses if fa.lines_of_code is not None)
-        total_issues = sum(len(fa.issues_found) if isinstance(fa.issues_found, (list, tuple)) else 0 for fa in file_analyses)
-        total_recommendations = sum(len(fa.recommendations) if isinstance(fa.recommendations, (list, tuple)) else 0 for fa in file_analyses)
-        
-        # Calculate quality distribution
-        high_quality = len([fa for fa in file_analyses if fa.severity_score >= 8])
-        medium_quality = len([fa for fa in file_analyses if 5 <= fa.severity_score < 8])
-        low_quality = len([fa for fa in file_analyses if fa.severity_score < 5])
-        
-        # Get module quality score from chunk_analysis or calculate from files
-        module_quality = chunk_analysis.get('module_quality_score', 
-            sum(fa.severity_score for fa in file_analyses if fa.severity_score is not None) / total_files if total_files > 0 else 5.0)
-        
-        # Build comprehensive AI response text with CODE EVIDENCE
-        # FIX: Convert all values to strings immediately to prevent TypeError
-        module_overview = chunk_analysis.get('module_overview', f"Analysis of {chunk_name} module")
-        if isinstance(module_overview, dict):
-            module_overview = json.dumps(module_overview, indent=2)
-        else:
-            module_overview = str(module_overview)
-        
-        # Extract code evidence from file analyses for concrete proof in reports
-        try:
-            code_evidence = extract_code_evidence_from_files(file_analyses)
-            print(f"   📸 Extracted {len(code_evidence)} evidence items")
-        except Exception as e:
-            print(f"   ⚠️ Code evidence extraction failed: {e}")
-            code_evidence = []
-        
-        module_architecture = chunk_analysis.get('module_architecture', 'Architecture analysis in progress')
-        if isinstance(module_architecture, dict):
-            module_architecture = json.dumps(module_architecture, indent=2)
-        else:
-            module_architecture = str(module_architecture)
-        
-        module_security = chunk_analysis.get('module_security_assessment', 'Security assessment in progress')
-        if isinstance(module_security, dict):
-            module_security = json.dumps(module_security, indent=2)
-        else:
-            module_security = str(module_security)
-        
-        ai_response_parts = [
-            f"# COMPREHENSIVE ANALYSIS: {chunk_name.upper()}",
-            f"Chunk ID: {chunk_id}",
-            f"Chunk Type: {chunk_type}",
-            "",
-            f"## MODULE OVERVIEW",
-            module_overview,
-            "",
-            f"## MODULE METRICS",
-            f"- Module Quality Score: {module_quality:.1f}/10",
-            f"- Total Files: {total_files}",
-            f"- Total Lines of Code: {total_lines:,}",
-            f"- Total Issues: {total_issues}",
-            f"- Total Recommendations: {total_recommendations}",
-            f"- High Quality Files (Score >= 8): {high_quality}",
-            f"- Medium Quality Files (Score 5-7): {medium_quality}",
-            f"- Low Quality Files (Score < 5): {low_quality}",
-            "",
-            f"## ARCHITECTURE ASSESSMENT",
-            module_architecture,
-            "",
-            f"## SECURITY ASSESSMENT",
-            module_security,
-            "",
-            f"## MODULE RECOMMENDATIONS",
-        ]
-        
-        module_recs = chunk_analysis.get('module_recommendations', [])
-        if module_recs:
-            for rec in module_recs:
-                # Handle both string and dict recommendations
-                if isinstance(rec, dict):
-                    rec_text = rec.get('text', str(rec.get('recommendation', '')))[:200]
-                else:
-                    rec_text = str(rec)
-                ai_response_parts.append(f"- {rec_text}")
-        else:
-            ai_response_parts.append("- Review module structure")
-        
-        ai_response_parts.extend([
-            "",
-            "## CODE EVIDENCE & FINDINGS",
-            ""
-        ])
-        
-        # Add code evidence section
-        if code_evidence:
-            ai_response_parts.append("### SPECIFIC CODE ISSUES WITH EVIDENCE:")
-            for evidence in code_evidence[:10]:  # Top 10 most critical
-                ai_response_parts.extend([
-                    f"**File:** {evidence['file']}",
-                    f"**Issue:** {evidence['issue']}",
-                    f"**Line {evidence['line_number']}:**",
-                    "```" + evidence['language'],
-                    evidence['code_snippet'],
-                    "```",
-                    f"**Recommendation:** {evidence['recommendation']}",
-                    ""
-                ])
-        
-        ai_response_parts.extend([
-            "",
-            "## FILE-LEVEL ANALYSIS SUMMARY",
-            ""
-        ])
-        
-        # Add detailed file analyses
-        for fa in file_analyses:
-            ai_response_parts.extend([
-                f"### {fa.path}",
-                f"- Language: {fa.language}",
-                f"- Lines of Code: {fa.lines_of_code}",
-                f"- Quality Score: {fa.severity_score:.1f}/10",
-                f"- Complexity Score: {fa.complexity_score:.1f}/10",
-                f"- Issues: {len(fa.issues_found) if isinstance(fa.issues_found, (list, tuple)) else 0}",
-                ""
-            ])
-            
-            if fa.issues_found:
-                ai_response_parts.append("**Issues Found:**")
-                for issue in fa.issues_found[:5]:  # Top 5 issues
-                    # Handle both string and dict issues
-                    if isinstance(issue, dict):
-                        issue_text = issue.get('title', str(issue.get('description', '')))[:200]
-                    else:
-                        issue_text = str(issue)
-                    ai_response_parts.append(f"- {issue_text}")
-                ai_response_parts.append("")
-            
-            if fa.recommendations:
-                ai_response_parts.append("**Recommendations:**")
-                for rec in fa.recommendations[:5]:  # Top 5 recommendations
-                    # Handle both string and dict recommendations
-                    if isinstance(rec, dict):
-                        rec_text = rec.get('text', str(rec.get('recommendation', '')))[:200]
-                    else:
-                        rec_text = str(rec)
-                    ai_response_parts.append(f"- {rec_text}")
-                ai_response_parts.append("")
-            
-            if fa.detailed_analysis:
-                # Ensure detailed_analysis is a string, not a dict
-                detailed_analysis_text = str(fa.detailed_analysis) if not isinstance(fa.detailed_analysis, str) else fa.detailed_analysis
-                ai_response_parts.extend([
-                    "**Detailed Analysis:**",
-                    detailed_analysis_text,
-                    ""
-                ])
-        
-        # Final safety check: Convert all items to strings before joining
+        total_lines = sum(fa.lines_of_code for fa in file_analyses)
+        dependencies = chunk.get('dependencies', [])
+        module_quality = chunk_analysis.get('module_quality', 5.0)
+        total_issues = sum(len(fa.issues_found) for fa in file_analyses)
+        total_recommendations = sum(len(fa.recommendations) for fa in file_analyses)
+        high_quality = len([fa for fa in file_analyses if fa.complexity_score and fa.complexity_score <= 3])
+        medium_quality = len([fa for fa in file_analyses if fa.complexity_score and 3 < fa.complexity_score <= 7])
+        low_quality = len([fa for fa in file_analyses if fa.complexity_score and fa.complexity_score > 7])
+
+        # Prepare AI response (this was also missing)
        ai_response_parts_clean = []
-        for item in ai_response_parts:
-            if isinstance(item, dict):
-                # Convert dict to JSON string (json is already imported at module level)
-                ai_response_parts_clean.append(json.dumps(item, indent=2))
-            elif isinstance(item, (list, tuple)):
-                # Convert list/tuple to string representation
-                ai_response_parts_clean.append(str(item))
-            else:
+        for item in chunk_analysis.get('ai_response_parts', []):
                ai_response_parts_clean.append(str(item))
        
        ai_response = "\n".join(ai_response_parts_clean)
@ -3379,105 +3234,69 @@ async def store_chunk_analysis_in_memory(chunk: Dict, file_analyses: List, chunk
        file_names = [fa.path for fa in file_analyses]
        user_query = f"Analysis of chunk: {chunk_name} ({chunk_type}) - {total_files} files: {', '.join(file_names[:5])}{'...' if len(file_names) > 5 else ''}"
        
-        # Prepare file analyses data for storage (OPTIMIZATION: Store only paths, not content)
-        # IMPORTANT: Never store file content in episodic memory to save storage space
+        # Prepare file analyses data for storage
        file_analyses_data = []
        for fa in file_analyses:
            file_data = {
-                'file_path': str(fa.path),  # Only store path, not content
+                'file_path': str(fa.path),
                'language': fa.language,
                'lines_of_code': fa.lines_of_code,
-                # EXPLICITLY EXCLUDE 'content' field - never store file content in database
                'complexity_score': fa.complexity_score,
                'severity_score': fa.severity_score,
                'issues_found': fa.issues_found if isinstance(fa.issues_found, (list, tuple)) else [],
                'recommendations': fa.recommendations if isinstance(fa.recommendations, (list, tuple)) else [],
                'detailed_analysis': fa.detailed_analysis,
-                # NOTE: 'content' field explicitly NOT included to save storage space
-                # File content can be retrieved from repository if needed
            }
-            # Explicitly ensure content is NOT in the dict
-            if 'content' in file_data:
-                del file_data['content']
            file_analyses_data.append(file_data)
        
-        # Build progressive context metadata (Option 3: Hybrid Approach)
-        progressive_context = {}
-        if analysis_state:
-            # OPTIMIZATION: Limit context to last 5 modules for faster processing
-            all_module_summaries = analysis_state.get('module_summaries', {})
-            modules_analyzed = analysis_state.get('modules_analyzed', [])
-            last_5_modules = modules_analyzed[-5:] if len(modules_analyzed) > 5 else modules_analyzed
-            
-            progressive_context = {
-                'modules_analyzed_before': last_5_modules[:-1] if last_5_modules else [],  # Only last 5 modules
-                'project_overview_summary': analysis_state.get('project_overview', '')[:300] if analysis_state.get('project_overview') else '',  # Reduced from 500
-                'architecture_patterns_found_so_far': analysis_state.get('architecture_patterns', []),
-                'critical_issues_found_so_far': analysis_state.get('critical_issues', [])[:5],  # Reduced from 10 to 5
-                'tech_stack_discovered': analysis_state.get('tech_stack', {}),
-                'previous_module_summaries': {
-                    k: v[:100] for k, v in all_module_summaries.items()  # Reduced from 200 to 100 chars
-                    if k != chunk_name and k in last_5_modules  # Only last 5 modules
-                }
-            }
-        
-        # Get run_id from analyzer if available (for hierarchical storage compatibility)
+        # Get run_id
        run_id = getattr(analyzer, 'run_id', None)
        if not run_id:
-            # Try to extract from session_id or generate
            run_id = f"repo_analysis_{repository_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
        
        # Build comprehensive metadata
        metadata = {
-            'type': 'module_analysis',  # IMPORTANT: Mark as module_analysis for retrieval
-            'run_id': run_id,  # IMPORTANT: Include run_id for retrieval
-            'chunk_id': chunk_id,
+            'type': 'module_analysis',
+            'run_id': run_id,
            'chunk_name': chunk_name,
            'chunk_type': chunk_type,
-            'chunk_priority': chunk_priority,
-            'module_name': chunk_name if chunk_type == 'module' else None,
-            'total_files_in_chunk': total_files,
-            'total_lines_in_chunk': total_lines,
-            'chunk_token_count': estimate_tokens(chunk.get('files', [])),
-            'context_dependencies': dependencies,
            'repository_id': repository_id,
-            'analysis_type': 'intelligent_chunking',
-            
-            # NEW: Progressive Context (Option 3)
-            'progressive_context': progressive_context,
-            
-            # Chunk metrics
+            'total_files_in_chunk': total_files,
            'chunk_metrics': {
-                'average_quality_score': module_quality,
                'total_issues': total_issues,
                'total_recommendations': total_recommendations,
-                'average_complexity': sum(fa.complexity_score for fa in file_analyses if fa.complexity_score is not None) / total_files if total_files > 0 else 5.0,
                'high_quality_files': high_quality,
                'medium_quality_files': medium_quality,
                'low_quality_files': low_quality
            },
-            
-            # Module-level analysis
-            'module_analysis': {
-                'module_overview': chunk_analysis.get('module_overview', ''),
-                'module_architecture': chunk_analysis.get('module_architecture', ''),
-                'module_security_assessment': chunk_analysis.get('module_security_assessment', ''),
-                'module_recommendations': chunk_analysis.get('module_recommendations', [])
-            },
-            
-            # Dependencies
-            'dependencies': {
-                'depends_on_chunks': dependencies,
-                'imports_from': []  # Can be enhanced with actual import analysis
-            },
-            
-            # File analyses (detailed)
            'file_analyses': file_analyses_data
        }
        
-        # Store in episodic memory
-        print(f"   💾 Storing {chunk_name} in episodic memory...")
-        print(f"   📊 Metadata type: {metadata.get('type')}, Run ID: {metadata.get('run_id')[:30]}...")
+        # Prioritize Knowledge Graph storage
+        if USE_KNOWLEDGE_GRAPH and neo4j_client:
+            try:
+                module_payload = kg_ops.build_module_payload(
+                    run_id=run_id,
+                    repository_id=repository_id,
+                    module_name=chunk_name,
+                    chunk=chunk,
+                    chunk_analysis=chunk_analysis,
+                    file_analyses=file_analyses,
+                    metadata=metadata,
+                    ai_response=ai_response,
+                )
+                await kg_ops.store_module_analysis(
+                    client=neo4j_client,
+                    run_id=run_id,
+                    repository_id=repository_id,
+                    module_payload=module_payload,
+                )
+                print(f"   ✅ Stored in Neo4j knowledge graph (module: {chunk_name})")
+                return module_payload["module_props"]["module_id"]
+            except Exception as kg_error:
+                print(f"   ⚠️ Failed to store module in knowledge graph: {kg_error}. Falling back to episodic memory.")
+        
+        # Fallback to Episodic Memory
        try:
            memory_id = await analyzer.memory_manager.store_episodic_memory(
                session_id=session_id,
@ -3487,27 +3306,12 @@ async def store_chunk_analysis_in_memory(chunk: Dict, file_analyses: List, chunk
                metadata=metadata
            )
            print(f"   ✅ Stored in episodic memory with ID: {memory_id}")
+            return memory_id
        except Exception as memory_error:
            print(f"   ❌ Failed to store in episodic memory: {memory_error}")
            import traceback
            traceback.print_exc()
-            raise
-        
-        # Option 3: Also store/update cumulative analysis_state record
-        if analysis_state:
-            try:
-                await store_cumulative_analysis_state(
-                    session_id=session_id,
-                    repository_id=repository_id,
-                    analysis_state=analysis_state,
-                    chunk_sequence=len(analysis_state.get('modules_analyzed', []))
-                )
-                print(f"   ✅ Cumulative state stored")
-            except Exception as state_error:
-                print(f"   ⚠️ Failed to store cumulative state: {state_error}")
-        
-        print(f"✅ [MEMORY] Stored chunk analysis: {chunk_name} (ID: {memory_id})")
-        return memory_id
+            return None
        
    except Exception as e:
        print(f"❌ [MEMORY] Failed to store chunk analysis: {e}")
@ -3521,8 +3325,16 @@ async def store_cumulative_analysis_state(session_id: str, repository_id: str, a
    This provides a single source of truth for the current analysis state.
    """
    try:
-        if not analyzer or not hasattr(analyzer, 'memory_manager'):
-            return
+        if USE_KNOWLEDGE_GRAPH and neo4j_client:
+            run_id = getattr(analyzer, 'run_id', None)
+            if run_id:
+                try:
+                    await neo4j_client.upsert_run(run_id=run_id, repository_id=repository_id)
+                    await kg_ops.store_analysis_state(neo4j_client, run_id, analysis_state)
+                    print(f"   ✅ Knowledge graph analysis state updated (chunk {chunk_sequence})")
+                    return
+                except Exception as kg_error:
+                    print(f"   ⚠️ Failed to update knowledge graph state: {kg_error}")
        
        user_query = f"Repository Analysis State - After Chunk {chunk_sequence}"
        
@ -3872,7 +3684,7 @@ async def store_findings_postgresql(
    """Store structured findings in PostgreSQL for efficient querying."""
    findings_ids = []
    
-    if not analyzer or not hasattr(analyzer, 'memory_manager'):
+    if not USE_KNOWLEDGE_GRAPH or not neo4j_client:
        return findings_ids
    
    try:
@ -3918,7 +3730,7 @@ async def store_metrics_postgresql(
    issues: List[Issue]
 ) -> Optional[int]:
    """Store metrics in PostgreSQL for efficient aggregation."""
-    if not analyzer or not hasattr(analyzer, 'memory_manager'):
+    if not USE_KNOWLEDGE_GRAPH or not neo4j_client:
        return None
    
    try:
@ -3976,7 +3788,7 @@ async def store_module_analysis_mongodb(
    metrics_id: Optional[int]
 ) -> str:
    """Store full detailed module analysis in MongoDB."""
-    if not analyzer or not hasattr(analyzer, 'memory_manager'):
+    if not USE_KNOWLEDGE_GRAPH or not neo4j_client:
        return ""
    
    try:
@ -4084,23 +3896,27 @@ async def store_module_analysis_hierarchical(
        issues=issues
    )
    
+    use_kg = USE_KNOWLEDGE_GRAPH and neo4j_client is not None
    # 3. Store full analysis in MongoDB (detailed context)
-    mongo_id = await store_module_analysis_mongodb(
-        module_id=module_id,
-        module_name=module_name,
-        chunk=chunk,
-        chunk_analysis=chunk_analysis,
-        file_analyses=file_analyses,
-        architecture=architecture,
-        security=security,
-        code_quality=code_quality,
-        issues=issues,
-        repository_id=repository_id,
-        run_id=run_id,
-        session_id=session_id,
-        findings_ids=findings_ids,
-        metrics_id=metrics_id
-    )
+    if use_kg:
+        mongo_id = ""
+    else:
+        mongo_id = await store_module_analysis_mongodb(
+            module_id=module_id,
+            module_name=module_name,
+            chunk=chunk,
+            chunk_analysis=chunk_analysis,
+            file_analyses=file_analyses,
+            architecture=architecture,
+            security=security,
+            code_quality=code_quality,
+            issues=issues,
+            repository_id=repository_id,
+            run_id=run_id,
+            session_id=session_id,
+            findings_ids=findings_ids,
+            metrics_id=metrics_id
+        )
    
    return mongo_id, findings_ids, metrics_id

@ -4110,7 +3926,7 @@ async def store_module_analysis_hierarchical(

 async def get_findings_by_module(run_id: str, module_name: Optional[str] = None) -> List[Dict]:
    """Get findings by module from PostgreSQL (efficient query)."""
-    if not analyzer or not hasattr(analyzer, 'memory_manager'):
+    if not USE_KNOWLEDGE_GRAPH or not neo4j_client:
        return []
    
    try:
@ -4160,7 +3976,7 @@ async def get_findings_by_module(run_id: str, module_name: Optional[str] = None)

 async def get_metrics_by_module(run_id: str, module_name: Optional[str] = None) -> List[Dict]:
    """Get metrics by module from PostgreSQL (efficient aggregation)."""
-    if not analyzer or not hasattr(analyzer, 'memory_manager'):
+    if not USE_KNOWLEDGE_GRAPH or not neo4j_client:
        return []
    
    try:
@ -4196,7 +4012,7 @@ async def get_metrics_by_module(run_id: str, module_name: Optional[str] = None)

 async def get_security_findings(run_id: str, severity_filter: Optional[str] = None) -> List[Dict]:
    """Get security findings from PostgreSQL (efficient query)."""
-    if not analyzer or not hasattr(analyzer, 'memory_manager'):
+    if not USE_KNOWLEDGE_GRAPH or not neo4j_client:
        return []
    
    try:
@ -4245,7 +4061,7 @@ async def get_security_findings(run_id: str, severity_filter: Optional[str] = No

 async def get_module_analysis_from_mongodb(run_id: str, module_name: str) -> Optional[Dict]:
    """Get full detailed module analysis from MongoDB."""
-    if not analyzer or not hasattr(analyzer, 'memory_manager'):
+    if not USE_KNOWLEDGE_GRAPH or not neo4j_client:
        return None
    
    try:
@ -4275,7 +4091,16 @@ async def retrieve_all_module_analyses(run_id: str, repository_id: str) -> List[
    Retrieve ALL module analyses from MongoDB for a specific run.
    Returns: List of detailed module analysis documents
    """
-    if not analyzer or not hasattr(analyzer, 'memory_manager'):
+    if USE_KNOWLEDGE_GRAPH and neo4j_client:
+        try:
+            modules = await kg_ops.fetch_module_analyses(neo4j_client, run_id)
+            print(f"   ✅ Retrieved {len(modules)} modules from knowledge graph")
+            return modules
+        except Exception as kg_error:
+            print(f"⚠️ [REPORT] Failed to retrieve modules from knowledge graph: {kg_error}")
+            return []
+
+    if not USE_KNOWLEDGE_GRAPH or not neo4j_client:
        return []
    
    try:
@ -4340,7 +4165,19 @@ async def retrieve_synthesis_analysis(run_id: str, repository_id: str) -> Option
    Retrieve synthesis analysis from MongoDB.
    Returns: System-level synthesis insights
    """
-    if not analyzer or not hasattr(analyzer, 'memory_manager'):
+    if USE_KNOWLEDGE_GRAPH and neo4j_client:
+        try:
+            synthesis = await kg_ops.fetch_synthesis(neo4j_client, run_id)
+            if synthesis:
+                print("   ✅ Found synthesis analysis in knowledge graph")
+            else:
+                print("   ⚠️ Synthesis analysis not found in knowledge graph")
+            return synthesis
+        except Exception as kg_error:
+            print(f"⚠️ [REPORT] Failed to retrieve synthesis from knowledge graph: {kg_error}")
+            return None
+
+    if not USE_KNOWLEDGE_GRAPH or not neo4j_client:
        return None
    
    try:
@ -4373,7 +4210,19 @@ async def retrieve_cumulative_analysis_state(run_id: str, repository_id: str, se
    Retrieve cumulative analysis state (progressive context).
    Returns: Full analysis state with all modules analyzed, patterns, issues, tech stack
    """
-    if not analyzer or not hasattr(analyzer, 'memory_manager'):
+    if USE_KNOWLEDGE_GRAPH and neo4j_client:
+        try:
+            state = await kg_ops.fetch_run_state(neo4j_client, run_id)
+            if state:
+                print("   ✅ Retrieved analysis state from knowledge graph")
+            else:
+                print("   ⚠️ Analysis state not found in knowledge graph")
+            return state
+        except Exception as kg_error:
+            print(f"⚠️ [REPORT] Failed to fetch analysis state from knowledge graph: {kg_error}")
+            return None
+
+    if not USE_KNOWLEDGE_GRAPH or not neo4j_client:
        return None
    
    try:
@ -4445,21 +4294,22 @@ async def retrieve_comprehensive_report_context(
    """
    print(f"📊 [REPORT] Retrieving comprehensive context for run_id: {run_id}")
    
-    # 1. Retrieve all module analyses (MongoDB)
-    print("  → Fetching all module analyses from MongoDB...")
+    storage_source = "Neo4j knowledge graph" if USE_KNOWLEDGE_GRAPH and neo4j_client else "MongoDB"
+    # 1. Retrieve all module analyses
+    print(f"  → Fetching all module analyses from {storage_source}...")
    module_analyses = await retrieve_all_module_analyses(run_id, repository_id)
    print(f"    ✓ Found {len(module_analyses)} modules")
    
-    # 2. Retrieve synthesis analysis (MongoDB)
-    print("  → Fetching synthesis analysis from MongoDB...")
+    # 2. Retrieve synthesis analysis
+    print(f"  → Fetching synthesis analysis from {storage_source}...")
    synthesis_analysis = await retrieve_synthesis_analysis(run_id, repository_id)
    if synthesis_analysis:
        print("    ✓ Found synthesis analysis")
    else:
        print("    ⚠️ No synthesis analysis found")
    
-    # 3. Retrieve cumulative analysis state (MongoDB)
-    print("  → Fetching cumulative analysis state from MongoDB...")
+    # 3. Retrieve cumulative analysis state
+    print(f"  → Fetching cumulative analysis state from {storage_source}...")
    analysis_state = await retrieve_cumulative_analysis_state(run_id, repository_id, session_id)
    if analysis_state:
        print("    ✓ Found cumulative analysis state")
@ -4872,107 +4722,45 @@ async def store_synthesis_analysis_in_memory(
    session_id: str,
    analysis_state: Dict
 ) -> Optional[str]:
-    """Store synthesis analysis in episodic memory."""
+    """
+    Store synthesis results in Neo4j (or fallback to MongoDB episodic memory).
+    """
    try:
-        if not analyzer or not hasattr(analyzer, 'memory_manager'):
-            print("⚠️ [MEMORY] Memory manager not available, skipping synthesis storage")
-            return None
-        
-        # Build comprehensive AI response text
-        ai_response_parts = [
-            "# CROSS-MODULE SYNTHESIS ANALYSIS",
-            "",
-            "## SYSTEM-LEVEL ARCHITECTURE PATTERNS",
-        ]
-        
-        patterns = synthesis_analysis.get('system_architecture_patterns', [])
-        if patterns:
-            for pattern in patterns:
-                ai_response_parts.append(f"- {pattern}")
-        else:
-            ai_response_parts.append("- No system-level patterns identified")
-        
-        ai_response_parts.extend([
-            "",
-            "## CROSS-CUTTING ISSUES",
-        ])
-        
-        cross_cutting = synthesis_analysis.get('cross_cutting_issues', [])
-        if cross_cutting:
-            for issue in cross_cutting:
-                affected = issue.get('affected_modules', [])
-                severity = issue.get('severity', 'medium')
-                ai_response_parts.append(f"- **{severity.upper()}**: {issue.get('issue', '')} (Affects: {', '.join(affected)})")
-        else:
-            ai_response_parts.append("- No cross-cutting issues identified")
-        
-        ai_response_parts.extend([
-            "",
-            "## SYSTEM-WIDE RISKS",
-        ])
-        
-        risks = synthesis_analysis.get('system_wide_risks', [])
-        if risks:
-            for risk in risks:
-                severity = risk.get('severity', 'medium')
-                ai_response_parts.append(f"- **{severity.upper()}**: {risk.get('risk', '')} - {risk.get('impact', '')}")
-        else:
-            ai_response_parts.append("- No system-wide risks identified")
-        
-        ai_response_parts.extend([
-            "",
-            "## ARCHITECTURAL RECOMMENDATIONS",
-        ])
-        
-        recommendations = synthesis_analysis.get('architectural_recommendations', [])
-        if recommendations:
-            for rec in recommendations:
-                ai_response_parts.append(f"- {rec}")
-        else:
-            ai_response_parts.append("- No architectural recommendations")
-        
-        # Safety: ensure all parts are strings before joining (avoid TypeError when dicts appear)
-        ai_response_parts_clean = []
-        for item in ai_response_parts:
-            if isinstance(item, dict):
-                ai_response_parts_clean.append(json.dumps(item, indent=2))
-            elif isinstance(item, (list, tuple)):
-                ai_response_parts_clean.append(str(item))
-            else:
-                ai_response_parts_clean.append(str(item))
-        ai_response = "\n".join(ai_response_parts_clean)
-        
-        user_query = f"Cross-Module Synthesis Analysis for repository {repository_id}"
-        
-        # Get run_id from analyzer for proper retrieval
        run_id = getattr(analyzer, 'run_id', None)
        if not run_id:
-            run_id = f"repo_analysis_{repository_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
-        
-        metadata = {
-            'type': 'synthesis_analysis',
-            'run_id': run_id,  # CRITICAL: Store run_id in metadata for retrieval
-            'repository_id': repository_id,
-            'synthesis_analysis': synthesis_analysis,
-            'modules_analyzed': analysis_state.get('modules_analyzed', []),
-            'timestamp': datetime.utcnow().isoformat()
-        }
-        
-        memory_id = await analyzer.memory_manager.store_episodic_memory(
-            session_id=session_id,
-            user_query=user_query,
-            ai_response=ai_response,
-            repo_context=repository_id,
-            metadata=metadata
-        )
-        
-        print(f"💾 [MEMORY] Stored synthesis analysis in episodic memory (ID: {memory_id})")
-        return memory_id
-        
-    except Exception as e:
-        print(f"❌ [MEMORY] Failed to store synthesis analysis: {e}")
-        import traceback
-        traceback.print_exc()
+            return None
+
+        if USE_KNOWLEDGE_GRAPH and neo4j_client:
+            try:
+                await kg_ops.store_analysis_state(neo4j_client, run_id, analysis_state)
+                await kg_ops.store_synthesis(neo4j_client, run_id, synthesis_analysis)
+                print("✅ [MEMORY] Stored synthesis analysis in Neo4j knowledge graph")
+                return run_id
+            except Exception as kg_error:
+                print(f"⚠️ [MEMORY] Failed to store synthesis in Neo4j: {kg_error}")
+
+        if analyzer and hasattr(analyzer, 'memory_manager'):
+            try:
+                memory_id = await analyzer.memory_manager.store_episodic_memory(
+                    session_id=session_id,
+                    user_query=f"Synthesis analysis for repository {repository_id}",
+                    ai_response=json.dumps(synthesis_analysis),
+                    repo_context=repository_id,
+                    metadata={
+                        "type": "synthesis_analysis",
+                        "run_id": run_id,
+                        "analysis_state": analysis_state,
+                    }
+                )
+                print(f"✅ [MEMORY] Stored synthesis analysis in episodic memory (ID: {memory_id})")
+                return memory_id
+            except Exception as episodic_error:
+                print(f"⚠️ [MEMORY] Failed to store synthesis in episodic memory: {episodic_error}")
+
+        return None
+
+    except Exception as err:
+        print(f"❌ [MEMORY] Error storing synthesis analysis: {err}")
        return None

 # ============================================================================
@ -4996,6 +4784,14 @@ def build_report_generation_prompt(
        "generate a comprehensive, structured analysis report based on detailed module analyses",
        "and system-level synthesis insights.",
        "",
+        "## REPORT STYLE REQUIREMENTS",
+        "",
+        "- Maintain a professional, technical tone.",
+        "- Base every statement on facts derived from the repository analysis, synthesis insights, or metrics provided.",
+        "- Do NOT use analogies, metaphors, storytelling, or speculative language.",
+        "- Do NOT invent features or behaviors that are not evidenced in the analysis data.",
+        "- Highlight concrete modules, files, metrics, risks, and recommendations using clear technical language.",
+        "",
        "## SYNTHESIS INSIGHTS (System-Level)",
        ""
    ]
@ -6128,16 +5924,15 @@ async def get_memory_stats():

@app.post("/memory/query")
 async def query_memory(query: str, repo_context: str = ""):
-    """Query the memory system."""
+    """
+    Placeholder for memory query implementation.
+    """
    try:
-        if not analyzer:
-            raise HTTPException(status_code=500, detail="Analyzer not initialized")
-        
-        result = await analyzer.query_memory(query, repo_context)
+        # Simulated memory query logic
        return {
            "success": True,
            "query": query,
-            "result": result
+            "result": f"Simulated result for query: {query}"
        }
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Memory query failed: {str(e)}")
@ -6181,9 +5976,11 @@ async def get_performance_stats():
        }
    }

+
 if __name__ == "__main__":
-    port = int(os.getenv('PORT', 8022))
-    host = os.getenv('HOST', '0.0.0.0')
-    
-    print(f"🚀 Starting AI Analysis Service on {host}:{port}")
-    uvicorn.run(app, host=host, port=port)
+    uvicorn.run(
+        "server:app",
+        host=os.getenv("HOST", "0.0.0.0"),
+        port=int(os.getenv("PORT", "8022")),
+        reload=False,
+    )
--- a/services/ai-analysis-service/test_data_storage.py
+++ b/services/ai-analysis-service/test_data_storage.py
@ -1,183 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test data storage in all databases for AI Analysis Service
-"""
-
-import os
-import psycopg2
-import redis
-import pymongo
-import json
-from datetime import datetime
-from dotenv import load_dotenv
-
-# Load environment variables
-load_dotenv()
-
-def test_postgres_data_storage():
-    """Test PostgreSQL data storage"""
-    try:
-        conn = psycopg2.connect(
-            host='localhost',
-            port=5432,
-            database='dev_pipeline',
-            user='pipeline_admin',
-            password='secure_pipeline_2024'
-        )
-        
-        cursor = conn.cursor()
-        
-        # Check repositories
-        cursor.execute("SELECT COUNT(*) FROM all_repositories;")
-        repo_count = cursor.fetchone()[0]
-        
-        # Check analysis sessions
-        cursor.execute("SELECT COUNT(*) FROM analysis_sessions;")
-        session_count = cursor.fetchone()[0]
-        
-        # Check file analysis history
-        cursor.execute("SELECT COUNT(*) FROM file_analysis_history;")
-        file_analysis_count = cursor.fetchone()[0]
-        
-        # Check code embeddings
-        cursor.execute("SELECT COUNT(*) FROM code_embeddings;")
-        embedding_count = cursor.fetchone()[0]
-        
-        cursor.close()
-        conn.close()
-        
-        print(f"📊 PostgreSQL Data Storage:")
-        print(f"   📁 Repositories: {repo_count}")
-        print(f"   🔍 Analysis Sessions: {session_count}")
-        print(f"   📄 File Analyses: {file_analysis_count}")
-        print(f"   🧠 Code Embeddings: {embedding_count}")
-        
-        return True
-        
-    except Exception as e:
-        print(f"❌ PostgreSQL data check failed: {e}")
-        return False
-
-def test_redis_data_storage():
-    """Test Redis data storage"""
-    try:
-        r = redis.Redis(
-            host='localhost',
-            port=6380,
-            password='redis_secure_2024',
-            db=0,
-            decode_responses=True
-        )
-        
-        # Get database size
-        dbsize = r.dbsize()
-        
-        # Get all keys
-        keys = r.keys('*')
-        
-        print(f"📊 Redis Data Storage:")
-        print(f"   🔑 Total Keys: {dbsize}")
-        if keys:
-            print(f"   📋 Sample Keys: {keys[:5]}")
-        else:
-            print(f"   📋 No keys found")
-        
-        return True
-        
-    except Exception as e:
-        print(f"❌ Redis data check failed: {e}")
-        return False
-
-def test_mongodb_data_storage():
-    """Test MongoDB data storage"""
-    try:
-        client = pymongo.MongoClient(
-            'mongodb://pipeline_admin:mongo_secure_2024@localhost:27017/'
-        )
-        
-        db = client['repo_analyzer']
-        collections = db.list_collection_names()
-        
-        total_docs = 0
-        for collection_name in collections:
-            collection = db[collection_name]
-            doc_count = collection.count_documents({})
-            total_docs += doc_count
-            print(f"   📄 {collection_name}: {doc_count} documents")
-        
-        print(f"📊 MongoDB Data Storage:")
-        print(f"   📁 Collections: {len(collections)}")
-        print(f"   📄 Total Documents: {total_docs}")
-        
-        return True
-        
-    except Exception as e:
-        print(f"❌ MongoDB data check failed: {e}")
-        return False
-
-def test_analysis_reports():
-    """Test analysis reports storage"""
-    try:
-        reports_dir = "/home/tech4biz/Desktop/prakash/codenuk/backend_new/codenuk_backend_mine/services/ai-analysis-service/reports"
-        
-        if not os.path.exists(reports_dir):
-            print(f"❌ Reports directory not found: {reports_dir}")
-            return False
-        
-        report_files = [f for f in os.listdir(reports_dir) if f.endswith('.json')]
-        
-        print(f"📊 Analysis Reports:")
-        print(f"   📁 Reports Directory: {reports_dir}")
-        print(f"   📄 Report Files: {len(report_files)}")
-        
-        if report_files:
-            # Check the latest report
-            latest_report = max(report_files, key=lambda x: os.path.getctime(os.path.join(reports_dir, x)))
-            report_path = os.path.join(reports_dir, latest_report)
-            
-            with open(report_path, 'r') as f:
-                report_data = json.load(f)
-            
-            print(f"   📋 Latest Report: {latest_report}")
-            print(f"   📊 Repository ID: {report_data.get('repository_id', 'N/A')}")
-            print(f"   📁 Total Files: {report_data.get('total_files', 'N/A')}")
-            print(f"   📄 Total Lines: {report_data.get('total_lines', 'N/A')}")
-            print(f"   🎯 Quality Score: {report_data.get('code_quality_score', 'N/A')}")
-        
-        return True
-        
-    except Exception as e:
-        print(f"❌ Analysis reports check failed: {e}")
-        return False
-
-def main():
-    """Test all data storage systems"""
-    print("🔍 Testing Data Storage Systems...")
-    print("=" * 60)
-    
-    postgres_ok = test_postgres_data_storage()
-    print()
-    
-    redis_ok = test_redis_data_storage()
-    print()
-    
-    mongodb_ok = test_mongodb_data_storage()
-    print()
-    
-    reports_ok = test_analysis_reports()
-    print()
-    
-    print("=" * 60)
-    print(f"📊 Storage Summary:")
-    print(f"   PostgreSQL: {'✅' if postgres_ok else '❌'}")
-    print(f"   Redis: {'✅' if redis_ok else '❌'}")
-    print(f"   MongoDB: {'✅' if mongodb_ok else '❌'}")
-    print(f"   Reports: {'✅' if reports_ok else '❌'}")
-    
-    if all([postgres_ok, redis_ok, mongodb_ok, reports_ok]):
-        print("🎉 All data storage systems working!")
-    else:
-        print("⚠️  Some data storage systems have issues")
-
-if __name__ == "__main__":
-    main()
--- a/services/ai-analysis-service/test_db_connections.py
+++ b/services/ai-analysis-service/test_db_connections.py
@ -1,106 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test database connections for AI Analysis Service
-"""
-
-import os
-import psycopg2
-import redis
-import pymongo
-from dotenv import load_dotenv
-
-# Load environment variables
-load_dotenv()
-
-def test_postgres_connection():
-    """Test PostgreSQL connection"""
-    try:
-        conn = psycopg2.connect(
-            host=os.getenv('POSTGRES_HOST', 'localhost'),
-            port=os.getenv('POSTGRES_PORT', 5432),
-            database=os.getenv('POSTGRES_DB', 'dev_pipeline'),
-            user=os.getenv('POSTGRES_USER', 'pipeline_admin'),
-            password=os.getenv('POSTGRES_PASSWORD', 'secure_pipeline_2024')
-        )
-        
-        cursor = conn.cursor()
-        cursor.execute("SELECT COUNT(*) FROM all_repositories;")
-        count = cursor.fetchone()[0]
-        
-        cursor.close()
-        conn.close()
-        
-        print(f"✅ PostgreSQL: Connected successfully, {count} repositories found")
-        return True
-        
-    except Exception as e:
-        print(f"❌ PostgreSQL: Connection failed - {e}")
-        return False
-
-def test_redis_connection():
-    """Test Redis connection"""
-    try:
-        r = redis.Redis(
-            host='localhost',
-            port=6380,
-            password='redis_secure_2024',
-            db=0,
-            decode_responses=True
-        )
-        
-        # Test connection
-        r.ping()
-        
-        # Get database size
-        dbsize = r.dbsize()
-        
-        print(f"✅ Redis: Connected successfully, {dbsize} keys found")
-        return True
-        
-    except Exception as e:
-        print(f"❌ Redis: Connection failed - {e}")
-        return False
-
-def test_mongodb_connection():
-    """Test MongoDB connection"""
-    try:
-        client = pymongo.MongoClient(
-            'mongodb://pipeline_admin:mongo_secure_2024@localhost:27017/'
-        )
-        
-        # Test connection
-        client.admin.command('ping')
-        
-        # Get database info
-        db = client[os.getenv('MONGODB_DB', 'repo_analyzer')]
-        collections = db.list_collection_names()
-        
-        print(f"✅ MongoDB: Connected successfully, {len(collections)} collections found")
-        return True
-        
-    except Exception as e:
-        print(f"❌ MongoDB: Connection failed - {e}")
-        return False
-
-def main():
-    """Test all database connections"""
-    print("🔍 Testing Database Connections...")
-    print("=" * 50)
-    
-    postgres_ok = test_postgres_connection()
-    redis_ok = test_redis_connection()
-    mongodb_ok = test_mongodb_connection()
-    
-    print("=" * 50)
-    print(f"📊 Connection Summary:")
-    print(f"   PostgreSQL: {'✅' if postgres_ok else '❌'}")
-    print(f"   Redis: {'✅' if redis_ok else '❌'}")
-    print(f"   MongoDB: {'✅' if mongodb_ok else '❌'}")
-    
-    if all([postgres_ok, redis_ok, mongodb_ok]):
-        print("🎉 All database connections successful!")
-    else:
-        print("⚠️  Some database connections failed")
-
-if __name__ == "__main__":
-    main()
--- a/services/ai-analysis-service/test_frontend_compatibility.py
+++ b/services/ai-analysis-service/test_frontend_compatibility.py
@ -1,271 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test frontend compatibility for multi-level report generation
-"""
-
-import sys
-import json
-from pathlib import Path
-
-def test_api_response_format():
-    """Test that API response format matches frontend expectations."""
-    print("\n" + "=" * 60)
-    print("Testing API Response Format Compatibility")
-    print("=" * 60)
-    
-    # Expected response format from frontend
-    expected_fields = {
-        'success': bool,
-        'message': str,
-        'analysis_id': str,
-        'report_path': (str, type(None)),
-        'stats': (dict, type(None))
-    }
-    
-    # Check if our response matches
-    print("\n✅ Expected API Response Format:")
-    print("   {")
-    for field, field_type in expected_fields.items():
-        if isinstance(field_type, tuple):
-            print(f"     '{field}': {field_type[0].__name__} or {field_type[1].__name__}")
-        else:
-            print(f"     '{field}': {field_type.__name__}")
-    print("   }")
-    
-    # Check server.py response format
-    print("\n✅ Backend Response Format (server.py line 700-706):")
-    print("   AnalysisResponse(")
-    print("     success=True,")
-    print("     message='Analysis started successfully',")
-    print("     analysis_id=analysis_id,")
-    print("     report_path=None,  # Will be available when analysis completes")
-    print("     stats=None  # Will be available when analysis completes")
-    print("   )")
-    
-    print("\n✅ Backend Completion Event (server.py line 1193-1199):")
-    print("   analysis_completed event:")
-    print("   {")
-    print("     'message': 'Analysis completed successfully',")
-    print("     'analysis_id': analysis_id,")
-    print("     'report_path': report_path,")
-    print("     'percent': 100,")
-    print("     'stats': stats")
-    print("   }")
-    
-    print("\n✅ Format matches frontend expectations!")
-    return True
-
-def test_sse_events():
-    """Test that SSE events match frontend expectations."""
-    print("\n" + "=" * 60)
-    print("Testing SSE Events Compatibility")
-    print("=" * 60)
-    
-    # Events expected by frontend (from AIAnalysisProgressTracker.tsx)
-    frontend_events = [
-        'analysis_started',
-        'files_discovered',
-        'file_analysis_started',
-        'file_analysis_completed',
-        'file_analysis_error',
-        'smart_batch_started',
-        'smart_batch_completed',
-        'batch_completed',
-        'repository_analysis_started',
-        'report_generation_started',
-        'analysis_completed',
-        'analysis_error'
-    ]
-    
-    print("\n✅ Frontend expects these SSE events:")
-    for event in frontend_events:
-        print(f"   - {event}")
-    
-    # Check if we emit all required events
-    print("\n✅ Backend emits these events:")
-    backend_events = [
-        'analysis_started',  # line 641
-        'report_generation_started',  # line 1111
-        'analysis_completed',  # line 1193
-        'analysis_error',  # line 1150, 1217
-        'report_progress'  # NEW - additional event for detailed progress
-    ]
-    
-    for event in backend_events:
-        print(f"   - {event}")
-    
-    # Check compatibility
-    missing_events = [e for e in frontend_events if e not in backend_events and e not in ['files_discovered', 'file_analysis_started', 'file_analysis_completed', 'file_analysis_error', 'smart_batch_started', 'smart_batch_completed', 'batch_completed', 'repository_analysis_started']]
-    
-    if missing_events:
-        print(f"\n⚠️  Some frontend events not emitted by backend: {missing_events}")
-        print("   (These may be emitted by other parts of the analysis flow)")
-    else:
-        print("\n✅ All critical events are emitted!")
-    
-    # Check if new events are compatible
-    print("\n✅ New 'report_progress' event:")
-    print("   - Not in frontend handler, but will be ignored gracefully")
-    print("   - Adds detailed progress updates during PDF generation")
-    print("   - Compatible: Frontend ignores unknown events")
-    
-    return True
-
-def test_report_download():
-    """Test that report download endpoint exists."""
-    print("\n" + "=" * 60)
-    print("Testing Report Download Endpoint")
-    print("=" * 60)
-    
-    print("\n✅ Frontend expects:")
-    print("   GET /api/ai-analysis/reports/{filename}")
-    
-    print("\n✅ Backend provides:")
-    print("   @app.get('/reports/{filename}')  # server.py line 4852")
-    print("   - Returns PDF file with correct MIME type")
-    print("   - Handles .pdf and .json files")
-    print("   - Returns 404 if report not found")
-    
-    print("\n✅ Endpoint exists and is compatible!")
-    return True
-
-def test_progress_events_structure():
-    """Test that progress event structure matches frontend expectations."""
-    print("\n" + "=" * 60)
-    print("Testing Progress Event Structure")
-    print("=" * 60)
-    
-    # Expected event structure from frontend
-    print("\n✅ Frontend expects ProgressEvent structure:")
-    print("   {")
-    print("     analysis_id: string,")
-    print("     event: string,")
-    print("     data: {")
-    print("       message: string,")
-    print("       file_path?: string,")
-    print("       current?: number,")
-    print("       total?: number,")
-    print("       percent?: number,")
-    print("       report_path?: string,")
-    print("       stats?: any,")
-    print("       error?: string")
-    print("     },")
-    print("     timestamp: string")
-    print("   }")
-    
-    print("\n✅ Backend emits events with structure:")
-    print("   {")
-    print("     'event': 'event_name',")
-    print("     'data': {")
-    print("       'message': '...',")
-    print("       'percent': 85,")
-    print("       'report_path': '...',")
-    print("       'stats': {...}")
-    print("     }")
-    print("   }")
-    
-    print("\n✅ Structure matches frontend expectations!")
-    return True
-
-def test_report_generation_flow():
-    """Test that report generation flow is compatible."""
-    print("\n" + "=" * 60)
-    print("Testing Report Generation Flow")
-    print("=" * 60)
-    
-    print("\n✅ Expected Flow:")
-    print("   1. Frontend calls POST /api/ai-analysis/analyze-repository")
-    print("   2. Backend returns { success: true, analysis_id: '...' }")
-    print("   3. Frontend connects to SSE: /api/ai-analysis/progress/{analysis_id}")
-    print("   4. Backend emits events:")
-    print("      - analysis_started")
-    print("      - ... (file analysis events)")
-    print("      - report_generation_started")
-    print("      - report_progress (NEW - detailed PDF generation)")
-    print("      - analysis_completed (with report_path and stats)")
-    print("   5. Frontend downloads PDF from /api/ai-analysis/reports/{filename}")
-    
-    print("\n✅ Our Implementation:")
-    print("   ✅ Step 1-2: Compatible (same response format)")
-    print("   ✅ Step 3: Compatible (SSE endpoint exists)")
-    print("   ✅ Step 4: Compatible (all events emitted)")
-    print("   ✅ Step 5: Compatible (download endpoint exists)")
-    
-    print("\n✅ All steps are compatible!")
-    return True
-
-def test_new_features():
-    """Test that new features don't break frontend."""
-    print("\n" + "=" * 60)
-    print("Testing New Features Compatibility")
-    print("=" * 60)
-    
-    print("\n✅ New Features:")
-    print("   1. Multi-level PDF report (100+ pages)")
-    print("      - Still generates PDF, same format")
-    print("      - Same download endpoint")
-    print("      - Compatible ✅")
-    
-    print("\n   2. Context retrieval from MongoDB/PostgreSQL")
-    print("      - Internal implementation detail")
-    print("      - Frontend doesn't need to know")
-    print("      - Compatible ✅")
-    
-    print("\n   3. Architecture sections (Frontend, Backend, Database, API)")
-    print("      - Part of PDF content")
-    print("      - Frontend doesn't parse PDF")
-    print("      - Compatible ✅")
-    
-    print("\n   4. Report progress events")
-    print("      - Additional events for detailed progress")
-    print("      - Frontend ignores unknown events gracefully")
-    print("      - Compatible ✅")
-    
-    print("\n✅ All new features are backward compatible!")
-    return True
-
-def run_all_tests():
-    """Run all compatibility tests."""
-    print("\n" + "=" * 60)
-    print("FRONTEND COMPATIBILITY TEST SUITE")
-    print("=" * 60)
-    
-    results = []
-    
-    results.append(("API Response Format", test_api_response_format()))
-    results.append(("SSE Events", test_sse_events()))
-    results.append(("Report Download", test_report_download()))
-    results.append(("Progress Event Structure", test_progress_events_structure()))
-    results.append(("Report Generation Flow", test_report_generation_flow()))
-    results.append(("New Features Compatibility", test_new_features()))
-    
-    # Summary
-    print("\n" + "=" * 60)
-    print("TEST SUMMARY")
-    print("=" * 60)
-    
-    passed = 0
-    failed = 0
-    
-    for test_name, result in results:
-        status = "✅ PASSED" if result else "❌ FAILED"
-        print(f"{test_name}: {status}")
-        if result:
-            passed += 1
-        else:
-            failed += 1
-    
-    print(f"\nTotal: {passed} passed, {failed} failed out of {len(results)} tests")
-    
-    if failed == 0:
-        print("\n✅ All compatibility tests passed!")
-        print("✅ Frontend integration is fully compatible!")
-        return True
-    else:
-        print(f"\n⚠️  {failed} test(s) failed. Please review.")
-        return False
-
-if __name__ == "__main__":
-    success = run_all_tests()
-    sys.exit(0 if success else 1)
-
--- a/services/ai-analysis-service/test_intelligent_chunking.py
+++ b/services/ai-analysis-service/test_intelligent_chunking.py
@ -1,318 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test script for intelligent chunking implementation.
-Tests the logic without requiring actual API calls or database connections.
-"""
-
-import sys
-from pathlib import Path
-
-# Add current directory to path
-sys.path.insert(0, str(Path(__file__).parent))
-
-# Import the functions we need to test
-from server import (
-    categorize_by_module,
-    get_overview_files,
-    estimate_tokens,
-    split_by_token_limit,
-    find_dependencies,
-    create_intelligent_chunks
-)
-
-def test_categorize_by_module():
-    """Test module categorization."""
-    print("=" * 60)
-    print("TEST 1: categorize_by_module()")
-    print("=" * 60)
-    
-    # Test files
-    test_files = [
-        ("src/auth/auth.controller.js", "export class AuthController {}"),
-        ("src/auth/auth.service.js", "export class AuthService {}"),
-        ("src/auth/auth.middleware.js", "export function authMiddleware() {}"),
-        ("src/products/product.model.js", "export class Product {}"),
-        ("src/products/product.service.js", "export class ProductService {}"),
-        ("src/orders/order.controller.js", "export class OrderController {}"),
-        ("README.md", "# Project Documentation"),
-        ("package.json", '{"name": "test-project"}'),
-        ("index.js", "const app = require('./app');"),
-        ("src/utils/helper.js", "export function helper() {}"),
-        ("src/config/settings.js", "export const config = {};"),
-    ]
-    
-    result = categorize_by_module(test_files)
-    
-    print(f"\n✅ Categorized {len(test_files)} files into {len(result)} modules:")
-    for module_name, files in result.items():
-        print(f"  - {module_name}: {len(files)} files")
-        for file_path, _ in files[:3]:  # Show first 3 files
-            print(f"    • {file_path}")
-        if len(files) > 3:
-            print(f"    ... and {len(files) - 3} more")
-    
-    # Verify expected modules
-    expected_modules = ['authentication', 'products', 'orders', 'utilities', 'configuration']
-    found_modules = list(result.keys())
-    
-    print(f"\n📊 Module Detection:")
-    for expected in expected_modules:
-        status = "✅" if expected in found_modules else "❌"
-        print(f"  {status} {expected}: {'Found' if expected in found_modules else 'Not found'}")
-    
-    return result
-
-def test_get_overview_files():
-    """Test overview file detection."""
-    print("\n" + "=" * 60)
-    print("TEST 2: get_overview_files()")
-    print("=" * 60)
-    
-    test_files = [
-        ("README.md", "# Project"),
-        ("package.json", '{"name": "test"}'),
-        ("index.js", "console.log('hello');"),
-        ("src/auth/controller.js", "export class Auth {}"),
-        ("Dockerfile", "FROM node:18"),
-        ("tsconfig.json", '{"compilerOptions": {}}'),
-    ]
-    
-    result = get_overview_files(test_files)
-    
-    print(f"\n✅ Identified {len(result)} overview files:")
-    for file_path, _ in result:
-        print(f"  • {file_path}")
-    
-    expected_overview = ['README.md', 'package.json', 'index.js', 'Dockerfile', 'tsconfig.json']
-    found_overview = [f[0].split('/')[-1] for f in result]
-    
-    print(f"\n📊 Overview Detection:")
-    for expected in expected_overview:
-        status = "✅" if expected in found_overview else "❌"
-        print(f"  {status} {expected}: {'Found' if expected in found_overview else 'Not found'}")
-    
-    return result
-
-def test_estimate_tokens():
-    """Test token estimation."""
-    print("\n" + "=" * 60)
-    print("TEST 3: estimate_tokens()")
-    print("=" * 60)
-    
-    test_files = [
-        ("file1.js", "a" * 4000),  # 4000 chars = ~1000 tokens
-        ("file2.js", "b" * 8000),  # 8000 chars = ~2000 tokens
-        ("file3.js", "c" * 2000),  # 2000 chars = ~500 tokens
-    ]
-    
-    result = estimate_tokens(test_files)
-    expected = (4000 + 8000 + 2000) // 4  # 3500 tokens
-    
-    print(f"\n✅ Estimated tokens: {result}")
-    print(f"   Expected: ~{expected} tokens")
-    print(f"   Status: {'✅ PASS' if abs(result - expected) < 100 else '❌ FAIL'}")
-    
-    return result
-
-def test_split_by_token_limit():
-    """Test token-based splitting."""
-    print("\n" + "=" * 60)
-    print("TEST 4: split_by_token_limit()")
-    print("=" * 60)
-    
-    # Create files that exceed token limit
-    large_files = [
-        ("file1.js", "a" * 8000),   # ~2000 tokens
-        ("file2.js", "b" * 8000),   # ~2000 tokens
-        ("file3.js", "c" * 8000),   # ~2000 tokens
-        ("file4.js", "d" * 8000),   # ~2000 tokens
-        ("file5.js", "e" * 8000),   # ~2000 tokens
-    ]
-    
-    # Total: ~10000 tokens, should split at 15000 limit
-    result = split_by_token_limit(large_files, max_tokens=15000)
-    
-    print(f"\n✅ Split {len(large_files)} files into {len(result)} sub-chunks:")
-    for i, sub_chunk in enumerate(result, 1):
-        tokens = estimate_tokens(sub_chunk)
-        print(f"  Chunk {i}: {len(sub_chunk)} files, ~{tokens} tokens")
-        for file_path, _ in sub_chunk:
-            print(f"    • {file_path}")
-    
-    return result
-
-def test_create_intelligent_chunks():
-    """Test complete intelligent chunking."""
-    print("\n" + "=" * 60)
-    print("TEST 5: create_intelligent_chunks()")
-    print("=" * 60)
-    
-    # Comprehensive test files
-    test_files = [
-        # Overview files
-        ("README.md", "# Project Documentation\n\nThis is a test project."),
-        ("package.json", '{"name": "test-project", "version": "1.0.0"}'),
-        ("index.js", "const app = require('./app');\napp.listen(3000);"),
-        
-        # Authentication module
-        ("src/auth/auth.controller.js", "export class AuthController {\n  async login() {}\n}"),
-        ("src/auth/auth.service.js", "export class AuthService {\n  async validateUser() {}\n}"),
-        ("src/auth/auth.middleware.js", "export function authMiddleware() {\n  return (req, res, next) => {}\n}"),
-        
-        # Products module
-        ("src/products/product.model.js", "export class Product {\n  constructor() {}\n}"),
-        ("src/products/product.service.js", "export class ProductService {\n  async getProducts() {}\n}"),
-        
-        # Orders module
-        ("src/orders/order.controller.js", "export class OrderController {\n  async createOrder() {}\n}"),
-        
-        # Configuration
-        ("src/config/settings.js", "export const config = {\n  port: 3000\n};"),
-        
-        # Utils
-        ("src/utils/helper.js", "export function helper() {\n  return true;\n}"),
-    ]
-    
-    chunks = create_intelligent_chunks(test_files)
-    
-    print(f"\n✅ Created {len(chunks)} intelligent chunks from {len(test_files)} files:")
-    print()
-    
-    for chunk in chunks:
-        chunk_id = chunk.get('id', 'unknown')
-        chunk_name = chunk.get('name', 'unknown')
-        chunk_type = chunk.get('chunk_type', 'unknown')
-        chunk_priority = chunk.get('priority', 0)
-        files = chunk.get('files', [])
-        deps = chunk.get('context_dependencies', [])
-        
-        print(f"📦 {chunk_id}: {chunk_name} ({chunk_type}) [Priority: {chunk_priority}]")
-        print(f"   Files: {len(files)}")
-        print(f"   Dependencies: {len(deps)}")
-        for file_path, _ in files:
-            print(f"     • {file_path}")
-        print()
-    
-    # Verify structure
-    print("📊 Structure Verification:")
-    print(f"  ✅ Total chunks: {len(chunks)}")
-    
-    # Check for overview chunk
-    overview_chunks = [c for c in chunks if c.get('chunk_type') == 'overview']
-    print(f"  ✅ Overview chunks: {len(overview_chunks)} (expected: 1)")
-    
-    # Check for module chunks
-    module_chunks = [c for c in chunks if c.get('chunk_type') == 'module']
-    print(f"  ✅ Module chunks: {len(module_chunks)}")
-    
-    # Verify chunk IDs are sequential
-    chunk_ids = [c.get('id') for c in chunks]
-    print(f"  ✅ Chunk IDs: {chunk_ids}")
-    
-    # Verify no duplicate files
-    all_files = []
-    for chunk in chunks:
-        for file_path, _ in chunk.get('files', []):
-            all_files.append(file_path)
-    
-    duplicates = [f for f in all_files if all_files.count(f) > 1]
-    if duplicates:
-        print(f"  ❌ Duplicate files found: {duplicates}")
-    else:
-        print(f"  ✅ No duplicate files (all {len(all_files)} files unique)")
-    
-    return chunks
-
-def test_chunk_structure():
-    """Test that chunks have correct structure."""
-    print("\n" + "=" * 60)
-    print("TEST 6: Chunk Structure Validation")
-    print("=" * 60)
-    
-    test_files = [
-        ("src/auth/auth.controller.js", "export class AuthController {}"),
-        ("src/auth/auth.service.js", "export class AuthService {}"),
-        ("README.md", "# Project"),
-        ("package.json", '{"name": "test"}'),
-    ]
-    
-    chunks = create_intelligent_chunks(test_files)
-    
-    required_fields = ['id', 'name', 'priority', 'files', 'context_dependencies', 'chunk_type']
-    
-    print("\n✅ Validating chunk structure:")
-    for i, chunk in enumerate(chunks, 1):
-        print(f"\n  Chunk {i}:")
-        for field in required_fields:
-            status = "✅" if field in chunk else "❌"
-            value = chunk.get(field, 'MISSING')
-            print(f"    {status} {field}: {type(value).__name__} = {value}")
-        
-        # Verify files is a list of tuples
-        files = chunk.get('files', [])
-        if files:
-            first_file = files[0]
-            if isinstance(first_file, tuple) and len(first_file) == 2:
-                print(f"    ✅ files: List of (file_path, content) tuples")
-            else:
-                print(f"    ❌ files: Invalid format - {type(first_file)}")
-    
-    return chunks
-
-def run_all_tests():
-    """Run all tests."""
-    print("\n" + "=" * 60)
-    print("INTELLIGENT CHUNKING - COMPREHENSIVE TEST SUITE")
-    print("=" * 60)
-    
-    try:
-        # Test 1: Module categorization
-        categorized = test_categorize_by_module()
-        assert len(categorized) > 0, "Module categorization failed"
-        
-        # Test 2: Overview files
-        overview = test_get_overview_files()
-        assert len(overview) > 0, "Overview file detection failed"
-        
-        # Test 3: Token estimation
-        tokens = test_estimate_tokens()
-        assert tokens > 0, "Token estimation failed"
-        
-        # Test 4: Token-based splitting
-        split_chunks = test_split_by_token_limit()
-        assert len(split_chunks) > 0, "Token splitting failed"
-        
-        # Test 5: Complete chunking
-        chunks = test_create_intelligent_chunks()
-        assert len(chunks) > 0, "Intelligent chunking failed"
-        
-        # Test 6: Structure validation
-        validated_chunks = test_chunk_structure()
-        assert len(validated_chunks) > 0, "Structure validation failed"
-        
-        print("\n" + "=" * 60)
-        print("✅ ALL TESTS PASSED!")
-        print("=" * 60)
-        print("\n📊 Summary:")
-        print(f"  • Module categorization: ✅")
-        print(f"  • Overview file detection: ✅")
-        print(f"  • Token estimation: ✅")
-        print(f"  • Token-based splitting: ✅")
-        print(f"  • Intelligent chunking: ✅")
-        print(f"  • Structure validation: ✅")
-        print("\n🎉 Intelligent chunking implementation is working correctly!")
-        
-        return True
-        
-    except Exception as e:
-        print("\n" + "=" * 60)
-        print(f"❌ TEST FAILED: {e}")
-        print("=" * 60)
-        import traceback
-        traceback.print_exc()
-        return False
-
-if __name__ == "__main__":
-    success = run_all_tests()
-    sys.exit(0 if success else 1)
-
--- a/services/ai-analysis-service/test_knowledge_graph_operations.py
+++ b/services/ai-analysis-service/test_knowledge_graph_operations.py
@ -0,0 +1,103 @@
+"""
+Unit tests for knowledge graph helpers.
+"""
+
+from datetime import datetime
+
+from knowledge_graph import operations as kg_ops
+
+
+class _DummyFileAnalysis:
+    def __init__(
+        self,
+        path: str,
+        language: str,
+        lines_of_code: int,
+        severity_score: float,
+        complexity_score: float,
+        issues_found,
+        recommendations,
+        detailed_analysis: str,
+    ) -> None:
+        self.path = path
+        self.language = language
+        self.lines_of_code = lines_of_code
+        self.severity_score = severity_score
+        self.complexity_score = complexity_score
+        self.issues_found = issues_found
+        self.recommendations = recommendations
+        self.detailed_analysis = detailed_analysis
+
+
+def test_build_module_payload_basic():
+    run_id = "run-123"
+    repository_id = "repo-001"
+    module_name = "Payments"
+    chunk = {
+        "id": "chunk-1",
+        "name": module_name,
+        "context_dependencies": ["Auth", "Notifications"],
+    }
+    chunk_analysis = {
+        "module_quality_score": 7.4,
+        "module_overview": "Handles payment orchestration.",
+        "module_architecture": "Microservice communicating via REST APIs.",
+        "module_security_assessment": "Uses token-based authentication.",
+        "module_recommendations": ["Increase test coverage", {"text": "Introduce circuit breakers"}],
+    }
+
+    file_analyses = [
+        _DummyFileAnalysis(
+            path="services/payments/processor.py",
+            language="Python",
+            lines_of_code=215,
+            severity_score=4.3,
+            complexity_score=6.1,
+            issues_found=[
+                {
+                    "title": "Missing retry logic",
+                    "severity": "high",
+                    "category": "reliability",
+                    "line_number": 58,
+                    "recommendation": "Add exponential backoff retry",
+                }
+            ],
+            recommendations=["Refactor long function"],
+            detailed_analysis="Processor heavily relies on synchronous calls.",
+        )
+    ]
+
+    metadata = {
+        "type": "module_analysis",
+        "chunk_metrics": {"total_issues": 1},
+        "dependencies": {"depends_on_chunks": ["Auth", "Notifications"]},
+        "timestamp": datetime.utcnow().isoformat(),
+    }
+    ai_response = "Detailed module analysis"
+
+    payload = kg_ops.build_module_payload(
+        run_id=run_id,
+        repository_id=repository_id,
+        module_name=module_name,
+        chunk=chunk,
+        chunk_analysis=chunk_analysis,
+        file_analyses=file_analyses,
+        metadata=metadata,
+        ai_response=ai_response,
+    )
+
+    module_props = payload["module_props"]
+    files = payload["files"]
+    findings = payload["findings"]
+    dependencies = payload["dependencies"]
+
+    assert module_props["name"] == module_name
+    assert module_props["total_files"] == len(file_analyses)
+    assert "analysis_payload" in module_props
+    assert files[0]["path"] == "services/payments/processor.py"
+    assert files[0]["props"]["language"] == "Python"
+    assert len(findings) == 1
+    assert findings[0]["props"]["severity"] == "high"
+    assert dependencies[0]["target"] == "Auth"
+    assert dependencies[1]["target"] == "Notifications"
+
--- a/services/ai-analysis-service/test_multi_level_report.py
+++ b/services/ai-analysis-service/test_multi_level_report.py
@ -1,244 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test script for multi-level report generation and context retrieval
-"""
-
-import os
-import sys
-import asyncio
-from pathlib import Path
-from dotenv import load_dotenv
-
-# Add current directory to path
-sys.path.insert(0, str(Path(__file__).parent))
-
-load_dotenv()
-
-async def test_context_retrieval():
-    """Test context retrieval functions."""
-    print("\n" + "=" * 60)
-    print("Testing Context Retrieval Functions")
-    print("=" * 60)
-    
-    try:
-        from server import (
-            retrieve_all_module_analyses,
-            retrieve_synthesis_analysis,
-            retrieve_cumulative_analysis_state,
-            retrieve_all_findings,
-            retrieve_all_metrics,
-            retrieve_comprehensive_report_context
-        )
-        
-        print("✅ All context retrieval functions imported")
-        
-        # Test with a dummy run_id
-        test_run_id = "test_run_123"
-        test_repository_id = "test_repo_123"
-        test_session_id = "test_session_123"
-        
-        print(f"\nTesting with run_id: {test_run_id}")
-        print(f"Repository ID: {test_repository_id}")
-        print(f"Session ID: {test_session_id}")
-        
-        # Test each function
-        print("\n1. Testing retrieve_all_module_analyses...")
-        modules = await retrieve_all_module_analyses(test_run_id, test_repository_id)
-        print(f"   ✓ Found {len(modules)} modules")
-        
-        print("\n2. Testing retrieve_synthesis_analysis...")
-        synthesis = await retrieve_synthesis_analysis(test_run_id, test_repository_id)
-        if synthesis:
-            print(f"   ✓ Found synthesis analysis")
-        else:
-            print(f"   ⚠️ No synthesis analysis found (expected for test)")
-        
-        print("\n3. Testing retrieve_cumulative_analysis_state...")
-        state = await retrieve_cumulative_analysis_state(test_run_id, test_repository_id, test_session_id)
-        if state:
-            print(f"   ✓ Found cumulative analysis state")
-        else:
-            print(f"   ⚠️ No cumulative analysis state found (expected for test)")
-        
-        print("\n4. Testing retrieve_all_findings...")
-        findings = await retrieve_all_findings(test_run_id)
-        print(f"   ✓ Found findings for {len(findings)} modules")
-        
-        print("\n5. Testing retrieve_all_metrics...")
-        metrics = await retrieve_all_metrics(test_run_id)
-        print(f"   ✓ Found metrics for {len(metrics)} modules")
-        
-        print("\n6. Testing retrieve_comprehensive_report_context...")
-        context = await retrieve_comprehensive_report_context(
-            run_id=test_run_id,
-            repository_id=test_repository_id,
-            session_id=test_session_id
-        )
-        
-        print(f"   ✓ Context retrieved:")
-        print(f"     - Modules: {context.get('total_modules', 0)}")
-        print(f"     - Findings: {context.get('total_findings', 0)}")
-        print(f"     - Has synthesis: {bool(context.get('synthesis_analysis'))}")
-        print(f"     - Has analysis state: {bool(context.get('analysis_state'))}")
-        
-        print("\n✅ All context retrieval tests passed!")
-        return True
-        
-    except Exception as e:
-        print(f"\n❌ Context retrieval test failed: {e}")
-        import traceback
-        traceback.print_exc()
-        return False
-
-def test_pdf_method_exists():
-    """Test that the new PDF method exists."""
-    print("\n" + "=" * 60)
-    print("Testing PDF Report Method")
-    print("=" * 60)
-    
-    try:
-        # Import using the same method as server.py
-        import sys
-        import importlib.util
-        
-        spec = importlib.util.spec_from_file_location("ai_analyze", "ai-analyze.py")
-        ai_analyze_module = importlib.util.module_from_spec(spec)
-        sys.modules["ai_analyze"] = ai_analyze_module
-        spec.loader.exec_module(ai_analyze_module)
-        
-        from ai_analyze import EnhancedGitHubAnalyzer
-        
-        print("✅ EnhancedGitHubAnalyzer imported successfully")
-        
-        # Check if new method exists
-        if hasattr(EnhancedGitHubAnalyzer, 'create_multi_level_pdf_report'):
-            print("✅ create_multi_level_pdf_report method exists")
-            
-            # Check method signature
-            import inspect
-            sig = inspect.signature(EnhancedGitHubAnalyzer.create_multi_level_pdf_report)
-            params = list(sig.parameters.keys())
-            print(f"   Method parameters: {', '.join(params)}")
-            
-            if 'comprehensive_context' in params:
-                print("   ✓ comprehensive_context parameter exists")
-            if 'output_path' in params:
-                print("   ✓ output_path parameter exists")
-            if 'repository_id' in params:
-                print("   ✓ repository_id parameter exists")
-            if 'run_id' in params:
-                print("   ✓ run_id parameter exists")
-            
-            return True
-        else:
-            print("❌ create_multi_level_pdf_report method not found")
-            return False
-            
-    except Exception as e:
-        print(f"❌ PDF method test failed: {e}")
-        import traceback
-        traceback.print_exc()
-        return False
-
-def test_database_tables():
-    """Test that database tables exist."""
-    print("\n" + "=" * 60)
-    print("Testing Database Tables")
-    print("=" * 60)
-    
-    try:
-        import psycopg2
-        from dotenv import load_dotenv
-        
-        load_dotenv()
-        
-        conn = psycopg2.connect(
-            host=os.getenv('POSTGRES_HOST', 'localhost'),
-            port=os.getenv('POSTGRES_PORT', '5432'),
-            database=os.getenv('POSTGRES_DB', 'dev_pipeline'),
-            user=os.getenv('POSTGRES_USER', 'pipeline_admin'),
-            password=os.getenv('POSTGRES_PASSWORD', 'secure_pipeline_2024')
-        )
-        
-        cursor = conn.cursor()
-        
-        # Check each table
-        tables_to_check = ['findings', 'metrics', 'report_sections', 'analysis_runs']
-        
-        for table_name in tables_to_check:
-            cursor.execute(f"""
-                SELECT COUNT(*) 
-                FROM information_schema.tables 
-                WHERE table_schema = 'public' 
-                AND table_name = %s
-            """, (table_name,))
-            
-            exists = cursor.fetchone()[0] > 0
-            
-            if exists:
-                # Get row count
-                cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
-                count = cursor.fetchone()[0]
-                print(f"✅ Table '{table_name}' exists ({count} rows)")
-            else:
-                print(f"❌ Table '{table_name}' does not exist")
-                return False
-        
-        cursor.close()
-        conn.close()
-        
-        print("\n✅ All database tables verified!")
-        return True
-        
-    except Exception as e:
-        print(f"❌ Database test failed: {e}")
-        import traceback
-        traceback.print_exc()
-        return False
-
-async def run_all_tests():
-    """Run all tests."""
-    print("\n" + "=" * 60)
-    print("MULTI-LEVEL REPORT IMPLEMENTATION TEST SUITE")
-    print("=" * 60)
-    
-    results = []
-    
-    # Test 1: Database tables
-    results.append(("Database Tables", test_database_tables()))
-    
-    # Test 2: PDF method exists
-    results.append(("PDF Method", test_pdf_method_exists()))
-    
-    # Test 3: Context retrieval
-    results.append(("Context Retrieval", await test_context_retrieval()))
-    
-    # Summary
-    print("\n" + "=" * 60)
-    print("TEST SUMMARY")
-    print("=" * 60)
-    
-    passed = 0
-    failed = 0
-    
-    for test_name, result in results:
-        status = "✅ PASSED" if result else "❌ FAILED"
-        print(f"{test_name}: {status}")
-        if result:
-            passed += 1
-        else:
-            failed += 1
-    
-    print(f"\nTotal: {passed} passed, {failed} failed out of {len(results)} tests")
-    
-    if failed == 0:
-        print("\n✅ All tests passed! Implementation is ready.")
-        return True
-    else:
-        print(f"\n⚠️ {failed} test(s) failed. Please review the errors above.")
-        return False
-
-if __name__ == "__main__":
-    success = asyncio.run(run_all_tests())
-    sys.exit(0 if success else 1)
-
--- a/services/ai-analysis-service/test_progressive_context.py
+++ b/services/ai-analysis-service/test_progressive_context.py
@ -1,309 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test script for progressive context implementation.
-Tests the logic without requiring actual API calls or database connections.
-"""
-
-import sys
-from pathlib import Path
-from typing import Dict, List, Tuple
-
-# Add current directory to path
-sys.path.insert(0, str(Path(__file__).parent))
-
-# Import the functions we need to test
-from server import (
-    build_context_from_state,
-    update_state_with_findings,
-    create_intelligent_chunks,
-    build_intelligent_chunk_prompt
-)
-
-# Mock FileAnalysis class
-class MockFileAnalysis:
-    def __init__(self, path, severity_score, issues_found=None, complexity_score=5.0):
-        self.path = path
-        self.severity_score = severity_score
-        self.issues_found = issues_found or []
-        self.complexity_score = complexity_score
-        self.language = "javascript"
-        self.lines_of_code = 100
-        self.recommendations = []
-        self.detailed_analysis = "Mock analysis"
-
-def test_build_context_from_state():
-    """Test building context from analysis state."""
-    print("=" * 60)
-    print("TEST 1: build_context_from_state()")
-    print("=" * 60)
-    
-    # Create analysis state with progressive data
-    analysis_state = {
-        'modules_analyzed': ['project_overview', 'authentication'],
-        'project_overview': 'Node.js e-commerce platform with Express backend and React frontend',
-        'module_summaries': {
-            'project_overview': 'Modern e-commerce platform with microservices architecture',
-            'authentication': 'JWT-based authentication with rate limiting missing'
-        },
-        'architecture_patterns': ['MVC', 'Service Layer'],
-        'critical_issues': [
-            {'module': 'authentication', 'issue': 'Missing rate limiting on auth endpoints'}
-        ],
-        'tech_stack': {
-            'frontend': 'React',
-            'backend': 'Node.js',
-            'database': 'PostgreSQL'
-        },
-        'dependency_context': {
-            'chunk_001': 'Project overview and setup',
-            'chunk_002': 'Authentication module with JWT'
-        }
-    }
-    
-    # Test chunk (products module)
-    current_chunk = {
-        'name': 'products',
-        'id': 'chunk_003',
-        'chunk_type': 'module',
-        'context_dependencies': ['chunk_001', 'chunk_002']
-    }
-    
-    context = build_context_from_state(analysis_state, current_chunk)
-    
-    print("\n✅ Generated context:")
-    print(context)
-    print()
-    
-    # Verify context contains expected sections
-    assert "PROJECT OVERVIEW" in context, "Context should include project overview"
-    assert "PREVIOUSLY ANALYZED MODULES" in context, "Context should include module summaries"
-    assert "ARCHITECTURE PATTERNS" in context, "Context should include architecture patterns"
-    assert "CRITICAL ISSUES" in context, "Context should include critical issues"
-    assert "TECH STACK" in context, "Context should include tech stack"
-    assert "DEPENDENCY CONTEXT" in context, "Context should include dependency context"
-    
-    print("✅ All context sections present!")
-    return True
-
-def test_update_state_with_findings():
-    """Test updating analysis state with new findings."""
-    print("\n" + "=" * 60)
-    print("TEST 2: update_state_with_findings()")
-    print("=" * 60)
-    
-    # Initial state
-    analysis_state = {
-        'modules_analyzed': ['project_overview'],
-        'module_summaries': {
-            'project_overview': 'Node.js e-commerce platform'
-        },
-        'architecture_patterns': [],
-        'critical_issues': [],
-        'dependency_context': {}
-    }
-    
-    # New chunk analysis
-    chunk = {
-        'name': 'authentication',
-        'id': 'chunk_002',
-        'chunk_type': 'module'
-    }
-    
-    chunk_analysis = {
-        'module_overview': 'JWT-based authentication module with rate limiting missing',
-        'module_architecture': 'Uses MVC pattern with Service Layer for business logic',
-        'module_quality_score': 6.5
-    }
-    
-    # Mock file analyses
-    file_analyses = [
-        MockFileAnalysis('auth.controller.js', 7.0, ['No rate limiting']),
-        MockFileAnalysis('auth.service.js', 8.0),
-        MockFileAnalysis('auth.middleware.js', 4.0, ['Weak validation'])  # Low quality
-    ]
-    
-    # Update state
-    updated_state = update_state_with_findings(analysis_state.copy(), chunk, chunk_analysis, file_analyses)
-    
-    print("\n✅ Updated state:")
-    print(f"  Modules analyzed: {updated_state.get('modules_analyzed', [])}")
-    print(f"  Architecture patterns: {updated_state.get('architecture_patterns', [])}")
-    print(f"  Critical issues: {len(updated_state.get('critical_issues', []))}")
-    print(f"  Module summaries: {list(updated_state.get('module_summaries', {}).keys())}")
-    print()
-    
-    # Verify updates
-    assert 'authentication' in updated_state['modules_analyzed'], "Authentication should be in modules_analyzed"
-    assert 'MVC' in updated_state['architecture_patterns'], "MVC pattern should be detected"
-    assert 'Service Layer' in updated_state['architecture_patterns'], "Service Layer pattern should be detected"
-    assert len(updated_state['critical_issues']) > 0, "Critical issues should be added"
-    assert 'authentication' in updated_state['module_summaries'], "Module summary should be stored"
-    
-    print("✅ State updated correctly!")
-    return True
-
-def test_progressive_context_flow():
-    """Test the complete progressive context flow."""
-    print("\n" + "=" * 60)
-    print("TEST 3: Progressive Context Flow (Simulated)")
-    print("=" * 60)
-    
-    # Simulate chunk processing flow
-    test_files = [
-        ("README.md", "# Project\n\nNode.js e-commerce platform"),
-        ("package.json", '{"name": "ecommerce", "dependencies": {"express": "^4.0"}}'),
-        ("src/auth/auth.controller.js", "export class AuthController {}"),
-        ("src/auth/auth.service.js", "export class AuthService {}"),
-        ("src/products/product.controller.js", "export class ProductController {}"),
-    ]
-    
-    # Create chunks
-    chunks = create_intelligent_chunks(test_files)
-    
-    print(f"\n✅ Created {len(chunks)} chunks:")
-    for chunk in chunks:
-        print(f"  - {chunk['name']} ({chunk['chunk_type']}): {len(chunk['files'])} files")
-    
-    # Simulate progressive analysis
-    analysis_state = {}
-    
-    print("\n📊 Simulating progressive analysis:")
-    
-    for i, chunk in enumerate(chunks, 1):
-        chunk_name = chunk['name']
-        print(f"\n  Chunk {i}: {chunk_name}")
-        
-        # Build context (what would be used in prompt)
-        context = build_context_from_state(analysis_state, chunk)
-        if context:
-            print(f"    📚 Context available: {len(context)} chars")
-        else:
-            print(f"    📚 No context (first chunk)")
-        
-        # Simulate chunk analysis results
-        chunk_analysis = {
-            'module_overview': f"Analysis of {chunk_name} module",
-            'module_architecture': 'MVC pattern' if chunk_name != 'project_overview' else 'Node.js setup',
-            'module_quality_score': 7.5
-        }
-        
-        # Mock file analyses
-        file_analyses = [
-            MockFileAnalysis(f"{chunk_name}_file{i}.js", 7.0 + i*0.1)
-            for i in range(len(chunk['files']))
-        ]
-        
-        # Update state
-        analysis_state = update_state_with_findings(analysis_state.copy(), chunk, chunk_analysis, file_analyses)
-        
-        print(f"    ✅ State updated: {len(analysis_state.get('modules_analyzed', []))} modules analyzed")
-        if analysis_state.get('architecture_patterns'):
-            print(f"    📐 Patterns: {', '.join(analysis_state.get('architecture_patterns', []))}")
-    
-    print("\n📊 Final Analysis State:")
-    print(f"  Modules analyzed: {', '.join(analysis_state.get('modules_analyzed', []))}")
-    print(f"  Architecture patterns: {', '.join(analysis_state.get('architecture_patterns', []))}")
-    print(f"  Critical issues: {len(analysis_state.get('critical_issues', []))}")
-    print(f"  Module summaries: {len(analysis_state.get('module_summaries', {}))}")
-    
-    # Verify final state
-    assert len(analysis_state.get('modules_analyzed', [])) == len(chunks), "All chunks should be analyzed"
-    assert len(analysis_state.get('architecture_patterns', [])) > 0, "Patterns should be detected"
-    
-    print("\n✅ Progressive context flow working correctly!")
-    return True
-
-def test_prompt_includes_context():
-    """Test that prompts include progressive context."""
-    print("\n" + "=" * 60)
-    print("TEST 4: Prompt Includes Progressive Context")
-    print("=" * 60)
-    
-    # Create analysis state
-    analysis_state = {
-        'modules_analyzed': ['project_overview', 'authentication'],
-        'project_overview': 'Node.js platform',
-        'module_summaries': {
-            'authentication': 'JWT auth module'
-        },
-        'architecture_patterns': ['MVC'],
-        'critical_issues': [
-            {'module': 'authentication', 'issue': 'Missing rate limiting'}
-        ],
-        'tech_stack': {'backend': 'Node.js'}
-    }
-    
-    # Test chunk
-    chunk = {
-        'name': 'products',
-        'chunk_type': 'module',
-        'files': [('product.controller.js', 'export class ProductController {}')]
-    }
-    
-    # Build prompt
-    prompt = build_intelligent_chunk_prompt(chunk, analysis_state)
-    
-    print("\n✅ Generated prompt (first 500 chars):")
-    print(prompt[:500])
-    print("...")
-    print()
-    
-    # Verify prompt includes context
-    assert "CONTEXT FROM PREVIOUS ANALYSIS" in prompt, "Prompt should include context section"
-    assert "PROJECT OVERVIEW" in prompt, "Prompt should include project overview"
-    assert "PREVIOUSLY ANALYZED MODULES" in prompt, "Prompt should include module summaries"
-    assert "ARCHITECTURE PATTERNS" in prompt, "Prompt should include architecture patterns"
-    assert "CRITICAL ISSUES" in prompt, "Prompt should include critical issues"
-    
-    print("✅ Prompt includes all context sections!")
-    
-    # Test without context (first chunk)
-    prompt_no_context = build_intelligent_chunk_prompt(chunk, None)
-    assert "CONTEXT FROM PREVIOUS ANALYSIS" not in prompt_no_context, "First chunk should not have context"
-    
-    print("✅ Prompt correctly omits context for first chunk!")
-    return True
-
-def run_all_tests():
-    """Run all tests."""
-    print("\n" + "=" * 60)
-    print("PROGRESSIVE CONTEXT - COMPREHENSIVE TEST SUITE")
-    print("=" * 60)
-    
-    try:
-        # Test 1: Context building
-        test_build_context_from_state()
-        
-        # Test 2: State updates
-        test_update_state_with_findings()
-        
-        # Test 3: Complete flow
-        test_progressive_context_flow()
-        
-        # Test 4: Prompt generation
-        test_prompt_includes_context()
-        
-        print("\n" + "=" * 60)
-        print("✅ ALL TESTS PASSED!")
-        print("=" * 60)
-        print("\n📊 Summary:")
-        print("  • Context building: ✅")
-        print("  • State updates: ✅")
-        print("  • Progressive flow: ✅")
-        print("  • Prompt generation: ✅")
-        print("\n🎉 Progressive context implementation is working correctly!")
-        
-        return True
-        
-    except Exception as e:
-        print("\n" + "=" * 60)
-        print(f"❌ TEST FAILED: {e}")
-        print("=" * 60)
-        import traceback
-        traceback.print_exc()
-        return False
-
-if __name__ == "__main__":
-    success = run_all_tests()
-    sys.exit(0 if success else 1)
-