Commit 841414df authored by Mahmoud Aglan's avatar Mahmoud Aglan

test

parent 37b9873f
...@@ -53,6 +53,20 @@ def _run_migrations(): ...@@ -53,6 +53,20 @@ def _run_migrations():
if table_name not in existing_tables: if table_name not in existing_tables:
print(f" Creating {table_name} table") print(f" Creating {table_name} table")
if "linked_repos" in existing_tables:
lr_columns = {c["name"] for c in inspector.get_columns("linked_repos")}
with engine.connect() as conn:
if "architecture_map" not in lr_columns:
conn.execute(text("ALTER TABLE linked_repos ADD COLUMN architecture_map TEXT"))
print(" Added linked_repos.architecture_map column")
if "map_status" not in lr_columns:
conn.execute(text("ALTER TABLE linked_repos ADD COLUMN map_status VARCHAR(20) DEFAULT 'none'"))
print(" Added linked_repos.map_status column")
if "map_generated_at" not in lr_columns:
conn.execute(text("ALTER TABLE linked_repos ADD COLUMN map_generated_at DATETIME"))
print(" Added linked_repos.map_generated_at column")
conn.commit()
except Exception as e: except Exception as e:
print(f" Migration note: {e}") print(f" Migration note: {e}")
......
...@@ -151,6 +151,9 @@ class LinkedRepo(Base): ...@@ -151,6 +151,9 @@ class LinkedRepo(Base):
default_branch = Column(String(100), default="main") default_branch = Column(String(100), default="main")
web_url = Column(String(500), default="") web_url = Column(String(500), default="")
description = Column(Text, default="") description = Column(Text, default="")
architecture_map = Column(Text, nullable=True)
map_status = Column(String(20), default="none")
map_generated_at = Column(DateTime, nullable=True)
created_at = Column(DateTime, default=datetime.utcnow) created_at = Column(DateTime, default=datetime.utcnow)
actions = relationship("PendingAction", back_populates="repo", cascade="all,delete-orphan") actions = relationship("PendingAction", back_populates="repo", cascade="all,delete-orphan")
......
...@@ -3,6 +3,7 @@ GitLab CE integration routes — superadmin only. ...@@ -3,6 +3,7 @@ GitLab CE integration routes — superadmin only.
Son of Anton v4.0.0 Son of Anton v4.0.0
""" """
import asyncio
import json import json
from datetime import datetime from datetime import datetime
from typing import Optional from typing import Optional
...@@ -14,7 +15,7 @@ from sqlalchemy.orm import Session ...@@ -14,7 +15,7 @@ from sqlalchemy.orm import Session
from backend.database import get_db from backend.database import get_db
from backend.models import User, GitLabSettings, LinkedRepo, PendingAction from backend.models import User, GitLabSettings, LinkedRepo, PendingAction
from backend.auth import require_superadmin from backend.auth import require_superadmin
from backend.services import gitlab_service from backend.services import gitlab_service, code_analyzer
router = APIRouter() router = APIRouter()
...@@ -181,10 +182,18 @@ async def link_repo(body: LinkRepoBody, admin: User = Depends(require_superadmin ...@@ -181,10 +182,18 @@ async def link_repo(body: LinkRepoBody, admin: User = Depends(require_superadmin
default_branch=project.get("default_branch", "main"), default_branch=project.get("default_branch", "main"),
web_url=project.get("web_url", ""), web_url=project.get("web_url", ""),
description=project.get("description", ""), description=project.get("description", ""),
map_status="analyzing",
) )
db.add(repo) db.add(repo)
db.commit() db.commit()
db.refresh(repo) db.refresh(repo)
# Start background analysis
asyncio.create_task(_analyze_repo_background(
repo.id, s.gitlab_url, s.gitlab_token,
project["id"], project.get("default_branch", "main"),
))
return _repo_dict(repo) return _repo_dict(repo)
...@@ -196,6 +205,84 @@ def unlink_repo(repo_id: str, admin: User = Depends(require_superadmin), db: Ses ...@@ -196,6 +205,84 @@ def unlink_repo(repo_id: str, admin: User = Depends(require_superadmin), db: Ses
return {"ok": True} return {"ok": True}
# ═══════════════════════════════════════════════════
# Architecture Map
# ═══════════════════════════════════════════════════
async def _analyze_repo_background(repo_id: str, gitlab_url: str, gitlab_token: str, project_id: int, branch: str):
"""Background task: load all files and generate architecture map."""
from backend.database import SessionLocal as BgSession
db = BgSession()
try:
repo = db.query(LinkedRepo).filter(LinkedRepo.id == repo_id).first()
if not repo:
return
repo.map_status = "analyzing"
db.commit()
result = await gitlab_service.load_project_files(
gitlab_url, gitlab_token, project_id, ref=branch,
)
files = result.get("files", [])
if not files:
repo.map_status = "failed"
repo.architecture_map = "[No files could be loaded for analysis]"
db.commit()
return
architecture_map = code_analyzer.analyze_codebase(files)
repo.architecture_map = architecture_map
repo.map_status = "ready"
repo.map_generated_at = datetime.utcnow()
db.commit()
print(f" ✅ Architecture map generated for {repo.name} ({len(architecture_map)} chars)")
except Exception as e:
try:
repo = db.query(LinkedRepo).filter(LinkedRepo.id == repo_id).first()
if repo:
repo.map_status = "failed"
repo.architecture_map = f"[Analysis failed: {str(e)[:200]}]"
db.commit()
except Exception:
pass
print(f" ❌ Architecture analysis failed for repo {repo_id}: {e}")
finally:
db.close()
@router.post("/repos/{repo_id}/analyze")
async def reanalyze_repo(repo_id: str, admin: User = Depends(require_superadmin), db: Session = Depends(get_db)):
"""Re-generate the architecture map for a linked repo."""
s = _get_settings(db)
repo = _get_repo(repo_id, db)
repo.map_status = "analyzing"
db.commit()
asyncio.create_task(_analyze_repo_background(
repo.id, s.gitlab_url, s.gitlab_token,
repo.gitlab_project_id, repo.default_branch,
))
return {"ok": True, "status": "analyzing"}
@router.get("/repos/{repo_id}/map")
def get_repo_map(repo_id: str, admin: User = Depends(require_superadmin), db: Session = Depends(get_db)):
"""Get the architecture map for a linked repo."""
repo = _get_repo(repo_id, db)
return {
"map_status": repo.map_status or "none",
"map_generated_at": str(repo.map_generated_at) if repo.map_generated_at else None,
"architecture_map": repo.architecture_map or "",
"map_size": len(repo.architecture_map or ""),
}
# ═══════════════════════════════════════════════════ # ═══════════════════════════════════════════════════
# Repository Operations # Repository Operations
# ═══════════════════════════════════════════════════ # ═══════════════════════════════════════════════════
...@@ -300,23 +387,6 @@ async def create_mr(repo_id: str, body: MergeRequestBody, admin: User = Depends( ...@@ -300,23 +387,6 @@ async def create_mr(repo_id: str, body: MergeRequestBody, admin: User = Depends(
raise HTTPException(e.status_code, e.detail) raise HTTPException(e.status_code, e.detail)
@router.get("/repos/{repo_id}/analyze")
async def analyze_project(
repo_id: str,
ref: Optional[str] = Query(None),
admin: User = Depends(require_superadmin),
db: Session = Depends(get_db),
):
s = _get_settings(db)
repo = _get_repo(repo_id, db)
branch = ref or repo.default_branch
try:
result = await gitlab_service.load_project_files(s.gitlab_url, s.gitlab_token, repo.gitlab_project_id, ref=branch)
return result
except gitlab_service.GitLabError as e:
raise HTTPException(e.status_code, e.detail)
# ═══════════════════════════════════════════════════ # ═══════════════════════════════════════════════════
# Pending Actions # Pending Actions
# ═══════════════════════════════════════════════════ # ═══════════════════════════════════════════════════
...@@ -419,6 +489,8 @@ def _repo_dict(r: LinkedRepo) -> dict: ...@@ -419,6 +489,8 @@ def _repo_dict(r: LinkedRepo) -> dict:
"default_branch": r.default_branch, "default_branch": r.default_branch,
"web_url": r.web_url, "web_url": r.web_url,
"description": r.description, "description": r.description,
"map_status": r.map_status or "none",
"map_generated_at": str(r.map_generated_at) if r.map_generated_at else None,
"created_at": str(r.created_at), "created_at": str(r.created_at),
} }
......
"""
Codebase Architecture Analyzer — generates a structural mindmap
of any repository by analyzing imports, exports, routes, and
cross-boundary connections.
Works with: Python, JS/TS/JSX/TSX, C#, Go, Rust, Java, Ruby, PHP
"""
import re
from collections import defaultdict
from typing import Optional
# ═══════════════════════════════════════════════════
# Language-specific parsers
# ═══════════════════════════════════════════════════
def _parse_python(path: str, content: str) -> dict:
"""Extract imports, definitions, and routes from Python files."""
imports = []
definitions = []
routes = []
decorators = []
for line in content.split("\n"):
stripped = line.strip()
# Imports
m = re.match(r'^from\s+([\w.]+)\s+import\s+(.+)', stripped)
if m:
module = m.group(1)
names = [n.strip().split(" as ")[0].strip() for n in m.group(2).split(",")]
imports.append({"module": module, "names": names})
continue
m = re.match(r'^import\s+([\w.]+)', stripped)
if m:
imports.append({"module": m.group(1), "names": []})
continue
# Decorators (accumulate for next def/class)
m = re.match(r'^@(\w+)\.(get|post|put|delete|patch|websocket)\s*\(\s*["\']([^"\']*)', stripped)
if m:
method = m.group(2).upper()
route_path = m.group(3)
decorators.append({"method": method, "path": route_path})
continue
m = re.match(r'^@(\w+)\.route\s*\(\s*["\']([^"\']*)', stripped)
if m:
decorators.append({"method": "ROUTE", "path": m.group(2)})
continue
# Function defs
m = re.match(r'^(?:async\s+)?def\s+(\w+)\s*\(', stripped)
if m:
name = m.group(1)
if not name.startswith("_") or name.startswith("__"):
defn = {"type": "function", "name": name}
definitions.append(defn)
if decorators:
for d in decorators:
routes.append({
"method": d["method"],
"path": d["path"],
"handler": name,
"file": path,
})
decorators = []
continue
# Class defs
m = re.match(r'^class\s+(\w+)\s*[\(:]', stripped)
if m:
definitions.append({"type": "class", "name": m.group(1)})
decorators = []
continue
# Clear decorators on non-matching lines
if stripped and not stripped.startswith("@") and not stripped.startswith("#"):
decorators = []
return {"imports": imports, "definitions": definitions, "routes": routes}
def _parse_javascript(path: str, content: str) -> dict:
"""Extract imports, exports, routes, and API calls from JS/TS files."""
imports = []
exports = []
api_calls = []
routes = []
for line in content.split("\n"):
stripped = line.strip()
# ES6 imports
m = re.match(r'^import\s+(?:{([^}]+)}|(\w+))\s+from\s+["\']([^"\']+)', stripped)
if m:
names = []
if m.group(1):
names = [n.strip().split(" as ")[0].strip() for n in m.group(1).split(",")]
elif m.group(2):
names = [m.group(2)]
imports.append({"module": m.group(3), "names": names})
continue
# import default + named
m = re.match(r'^import\s+(\w+)\s*,\s*{([^}]+)}\s+from\s+["\']([^"\']+)', stripped)
if m:
names = [m.group(1)] + [n.strip().split(" as ")[0].strip() for n in m.group(2).split(",")]
imports.append({"module": m.group(3), "names": names})
continue
# import * as
m = re.match(r'^import\s+\*\s+as\s+(\w+)\s+from\s+["\']([^"\']+)', stripped)
if m:
imports.append({"module": m.group(2), "names": [f"* as {m.group(1)}"]})
continue
# require
m = re.search(r'require\s*\(\s*["\']([^"\']+)', stripped)
if m and not stripped.startswith("//"):
imports.append({"module": m.group(1), "names": []})
# Exports
m = re.match(r'^export\s+(?:default\s+)?(?:async\s+)?function\s+(\w+)', stripped)
if m:
exports.append({"type": "function", "name": m.group(1)})
continue
m = re.match(r'^export\s+(?:default\s+)?(?:const|let|var)\s+(\w+)', stripped)
if m:
exports.append({"type": "const", "name": m.group(1)})
continue
m = re.match(r'^export\s+default\s+(?:class\s+)?(\w+)', stripped)
if m:
exports.append({"type": "default", "name": m.group(1)})
continue
# API calls (fetch)
for fm in re.finditer(r'fetch\s*\(\s*[`"\']([^`"\']*(?:/api/[^`"\']*)?)[`"\']', stripped):
url = fm.group(1)
if "/api/" in url or url.startswith("/"):
api_calls.append(url)
for fm in re.finditer(r'fetch\s*\(\s*`\$\{[^}]*\}(/[^`]*)`', stripped):
api_calls.append(fm.group(1))
# Express routes
m = re.match(r'(?:app|router)\.(get|post|put|delete|patch)\s*\(\s*["\']([^"\']+)', stripped)
if m:
routes.append({
"method": m.group(1).upper(),
"path": m.group(2),
"file": path,
})
return {"imports": imports, "exports": exports, "api_calls": api_calls, "routes": routes}
def _parse_csharp(path: str, content: str) -> dict:
"""Extract basic structure from C# files."""
imports = []
definitions = []
routes = []
for line in content.split("\n"):
stripped = line.strip()
m = re.match(r'^using\s+([\w.]+)\s*;', stripped)
if m:
imports.append({"module": m.group(1), "names": []})
continue
m = re.match(r'^(?:public|private|protected|internal|static|\s)*class\s+(\w+)', stripped)
if m:
definitions.append({"type": "class", "name": m.group(1)})
continue
m = re.match(r'^(?:public|private|protected|internal|static|\s)*interface\s+(\w+)', stripped)
if m:
definitions.append({"type": "interface", "name": m.group(1)})
continue
# ASP.NET routes
m = re.search(r'\[Http(Get|Post|Put|Delete|Patch)\s*\(\s*"([^"]*)"', stripped)
if m:
routes.append({"method": m.group(1).upper(), "path": m.group(2), "file": path})
continue
m = re.search(r'\[Route\s*\(\s*"([^"]*)"', stripped)
if m:
routes.append({"method": "ROUTE", "path": m.group(1), "file": path})
return {"imports": imports, "definitions": definitions, "routes": routes}
def _parse_generic(path: str, content: str) -> dict:
"""Fallback: extract function/class patterns from any language."""
definitions = []
imports = []
for line in content.split("\n"):
stripped = line.strip()
# Go imports
m = re.match(r'^import\s+"([^"]+)"', stripped)
if m:
imports.append({"module": m.group(1), "names": []})
# Go/Rust function defs
m = re.match(r'^(?:pub\s+)?fn\s+(\w+)', stripped)
if m:
definitions.append({"type": "function", "name": m.group(1)})
continue
m = re.match(r'^func\s+(?:\([^)]+\)\s+)?(\w+)', stripped)
if m:
definitions.append({"type": "function", "name": m.group(1)})
continue
# Go/Rust struct/type
m = re.match(r'^(?:pub\s+)?(?:type|struct)\s+(\w+)', stripped)
if m:
definitions.append({"type": "struct", "name": m.group(1)})
return {"imports": imports, "definitions": definitions}
# ═══════════════════════════════════════════════════
# File parser dispatcher
# ═══════════════════════════════════════════════════
def _parse_file(path: str, content: str) -> dict:
"""Parse a file based on its extension."""
lower = path.lower()
if lower.endswith((".py",)):
return _parse_python(path, content)
elif lower.endswith((".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs", ".vue", ".svelte")):
return _parse_javascript(path, content)
elif lower.endswith((".cs",)):
return _parse_csharp(path, content)
else:
return _parse_generic(path, content)
# ═══════════════════════════════════════════════════
# Resolve import paths to actual files
# ═══════════════════════════════════════════════════
def _resolve_import(importing_file: str, module: str, all_paths: set[str]) -> Optional[str]:
"""Try to resolve an import module string to an actual file path."""
if not module:
return None
# Direct path match
for ext in ["", ".py", ".js", ".ts", ".jsx", ".tsx", ".cs", ".go", ".rs"]:
candidate = module.replace(".", "/") + ext
if candidate in all_paths:
return candidate
# Relative imports for Python (from .module import ...)
if module.startswith("."):
dir_parts = importing_file.rsplit("/", 1)
base_dir = dir_parts[0] if len(dir_parts) > 1 else ""
rel = module.lstrip(".")
for ext in [".py", ""]:
candidate = f"{base_dir}/{rel.replace('.', '/')}{ext}"
if candidate in all_paths:
return candidate
# __init__.py
candidate = f"{base_dir}/{rel.replace('.', '/')}/__init__.py"
if candidate in all_paths:
return candidate
# JS relative imports
if module.startswith("./") or module.startswith("../"):
dir_parts = importing_file.rsplit("/", 1)
base_dir = dir_parts[0] if len(dir_parts) > 1 else ""
if module.startswith("./"):
rel = module[2:]
else:
# Go up directories
parts = base_dir.split("/")
ups = module.count("../")
base = "/".join(parts[:-ups]) if ups < len(parts) else ""
rel = module.replace("../", "")
base_dir = base
for ext in ["", ".js", ".jsx", ".ts", ".tsx", "/index.js", "/index.tsx", "/index.ts"]:
candidate = f"{base_dir}/{rel}{ext}" if base_dir else f"{rel}{ext}"
if candidate in all_paths:
return candidate
# Backend-style imports (backend.models → backend/models.py)
for ext in [".py"]:
candidate = module.replace(".", "/") + ext
if candidate in all_paths:
return candidate
return None
# ═══════════════════════════════════════════════════
# Main analyzer
# ═══════════════════════════════════════════════════
MAX_MAP_CHARS = 40_000 # ~10K tokens max for the map
def analyze_codebase(files: list[dict]) -> str:
"""
Analyze a list of {path, content} dicts and produce
an architecture mindmap string.
"""
all_paths = {f["path"] for f in files}
file_data = {} # path → parsed data
all_routes = []
all_api_calls = []
import_graph = defaultdict(set) # file → set of files it imports
imported_by = defaultdict(set) # file → set of files that import it
definitions_map = defaultdict(list) # file → list of definitions
exports_map = defaultdict(list) # file → list of exports
# ── Pass 1: Parse all files ──
for f in files:
path = f["path"]
content = f.get("content", "")
if not content or content.startswith("["):
continue
parsed = _parse_file(path, content)
file_data[path] = parsed
# Collect definitions
for d in parsed.get("definitions", []):
definitions_map[path].append(d)
for e in parsed.get("exports", []):
exports_map[path].append(e)
# Collect routes
for r in parsed.get("routes", []):
r["file"] = path
all_routes.append(r)
# Collect API calls
for call in parsed.get("api_calls", []):
all_api_calls.append({"file": path, "url": call})
# ── Pass 2: Resolve imports → build dependency graph ──
for path, parsed in file_data.items():
for imp in parsed.get("imports", []):
target = _resolve_import(path, imp["module"], all_paths)
if target and target != path:
import_graph[path].add(target)
imported_by[target].add(path)
# ── Pass 3: Detect project structure ──
lang_counts = defaultdict(int)
dir_categories = defaultdict(set)
for path in all_paths:
ext = ""
if "." in path.rsplit("/", 1)[-1]:
ext = "." + path.rsplit(".", 1)[-1].lower()
lang_map = {
".py": "Python", ".js": "JavaScript", ".ts": "TypeScript",
".jsx": "React JSX", ".tsx": "React TSX", ".cs": "C#",
".java": "Java", ".go": "Go", ".rs": "Rust", ".rb": "Ruby",
".php": "PHP", ".vue": "Vue", ".svelte": "Svelte",
".html": "HTML", ".css": "CSS", ".scss": "SCSS",
".sql": "SQL", ".sh": "Shell",
}
if ext in lang_map:
lang_counts[lang_map[ext]] += 1
top_dir = path.split("/")[0] if "/" in path else "(root)"
dir_categories[top_dir].add(path)
# Detect frameworks
frameworks = []
all_content_lower = " ".join(
f.get("content", "")[:500].lower() for f in files[:50]
)
framework_signals = {
"FastAPI": ["fastapi", "from fastapi"],
"Flask": ["from flask"],
"Django": ["from django", "django.conf"],
"Express": ["express()", "require('express')"],
"React": ["from 'react'", 'from "react"', "react-dom"],
"Vue": ["createapp", "vue"],
"Next.js": ["next/", "getserversideprops"],
"Unity": ["monobehaviour", "unityengine"],
"ASP.NET": ["microsoft.aspnetcore", "iactionresult"],
"SQLAlchemy": ["sqlalchemy", "declarative_base"],
"Prisma": ["@prisma/client"],
}
for fw, signals in framework_signals.items():
for sig in signals:
if sig in all_content_lower:
frameworks.append(fw)
break
# ── Pass 4: Identify hot files (most referenced) ──
hot_files = sorted(imported_by.items(), key=lambda x: -len(x[1]))[:20]
# ── Pass 5: Cross-boundary flow detection ──
flows = _detect_cross_boundary_flows(all_routes, all_api_calls, import_graph, definitions_map)
# ── Pass 6: Build the map ──
return _format_architecture_map(
all_paths=all_paths,
lang_counts=lang_counts,
frameworks=frameworks,
dir_categories=dir_categories,
all_routes=all_routes,
all_api_calls=all_api_calls,
import_graph=import_graph,
imported_by=imported_by,
definitions_map=definitions_map,
exports_map=exports_map,
hot_files=hot_files,
flows=flows,
)
def _detect_cross_boundary_flows(
routes: list, api_calls: list,
import_graph: dict, definitions_map: dict,
) -> list[str]:
"""Detect end-to-end flows across frontend/backend boundaries."""
flows = []
# Match API calls to routes
route_map = {}
for r in routes:
key = r.get("path", "").rstrip("/")
if key:
route_map[key] = r
for call in api_calls:
url = call["url"]
# Clean URL patterns
clean = re.sub(r'\$\{[^}]+\}', '{param}', url)
clean = clean.rstrip("/")
matched_route = None
for route_path, route_info in route_map.items():
# Simple match — exact or parameterized
route_clean = re.sub(r'\{[^}]+\}', '{param}', route_path)
if clean == route_clean or clean.endswith(route_clean):
matched_route = route_info
break
if matched_route:
flows.append(
f" {call['file']} → {matched_route.get('method', '?')} {matched_route.get('path', '?')} → {matched_route.get('file', '?')}:{matched_route.get('handler', '?')}()"
)
return flows[:50] # Cap flows
def _format_architecture_map(
all_paths, lang_counts, frameworks, dir_categories,
all_routes, all_api_calls, import_graph, imported_by,
definitions_map, exports_map, hot_files, flows,
) -> str:
"""Format the analysis into a compact, AI-readable string."""
lines = []
lines.append("╔══════════════════════════════════════════╗")
lines.append("║ CODEBASE ARCHITECTURE MINDMAP ║")
lines.append("╚══════════════════════════════════════════╝")
lines.append("")
# ── Project Overview ──
lines.append("PROJECT OVERVIEW:")
lines.append(f" Total source files: {len(all_paths)}")
if lang_counts:
top_langs = sorted(lang_counts.items(), key=lambda x: -x[1])[:8]
lines.append(f" Languages: {', '.join(f'{l} ({c})' for l, c in top_langs)}")
if frameworks:
lines.append(f" Frameworks: {', '.join(sorted(set(frameworks)))}")
lines.append("")
# ── Directory Structure ──
lines.append("DIRECTORY STRUCTURE:")
for dir_name in sorted(dir_categories.keys()):
count = len(dir_categories[dir_name])
lines.append(f" {dir_name}/ ({count} files)")
lines.append("")
# ── API Routes ──
if all_routes:
lines.append("API ENDPOINTS:")
# Group by file
routes_by_file = defaultdict(list)
for r in all_routes:
routes_by_file[r.get("file", "?")].append(r)
for rfile in sorted(routes_by_file.keys()):
lines.append(f" [{rfile}]")
for r in routes_by_file[rfile]:
handler = r.get("handler", "")
handler_str = f" → {handler}()" if handler else ""
lines.append(f" {r.get('method', '?'):6s} {r.get('path', '?')}{handler_str}")
lines.append("")
# ── Frontend → Backend Connections ──
if flows:
lines.append("CROSS-BOUNDARY FLOWS (Frontend → Backend):")
for flow in flows:
lines.append(flow)
lines.append("")
# ── Hot Files (most imported) ──
if hot_files:
lines.append("HOT FILES (most referenced by other files):")
for path, importers in hot_files:
if len(importers) >= 2:
lines.append(f" {path} ← imported by {len(importers)} files")
lines.append("")
# ── Dependency Graph (compact) ──
if import_graph:
lines.append("DEPENDENCY GRAPH (file → its dependencies):")
# Show only files with 2+ deps, sorted by dep count
sorted_deps = sorted(import_graph.items(), key=lambda x: -len(x[1]))
shown = 0
for path, deps in sorted_deps:
if shown >= 60:
break
dep_names = sorted(deps)[:15]
extra = f" (+{len(deps) - 15} more)" if len(deps) > 15 else ""
short_deps = [d.rsplit("/", 1)[-1] for d in dep_names]
lines.append(f" {path}")
lines.append(f" → {', '.join(short_deps)}{extra}")
shown += 1
lines.append("")
# ── Key Definitions ──
if definitions_map:
lines.append("KEY DEFINITIONS:")
for path in sorted(definitions_map.keys()):
defs = definitions_map[path]
if not defs:
continue
classes = [d["name"] for d in defs if d["type"] == "class"]
functions = [d["name"] for d in defs if d["type"] == "function"]
structs = [d["name"] for d in defs if d["type"] in ("struct", "interface")]
parts = []
if classes:
parts.append(f"classes: {', '.join(classes[:10])}")
if structs:
parts.append(f"types: {', '.join(structs[:10])}")
if functions:
# Only show non-private functions, cap at 8
pub = [f for f in functions if not f.startswith("_")][:8]
if pub:
parts.append(f"functions: {', '.join(pub)}")
if parts:
lines.append(f" {path}")
for p in parts:
lines.append(f" {p}")
lines.append("")
# ── Cycle/Pattern Detection ──
cycles = _detect_cycles(import_graph)
if cycles:
lines.append("CIRCULAR DEPENDENCIES DETECTED:")
for cycle in cycles[:10]:
lines.append(f" ⟲ {' → '.join(cycle)}")
lines.append("")
# ── Shared Definitions ──
# Find classes/models used across many files
shared = _find_shared_definitions(definitions_map, imported_by)
if shared:
lines.append("WIDELY-USED DEFINITIONS:")
for name, info in shared[:15]:
lines.append(f" {name} (defined in {info['defined_in']}, used by {info['used_by_count']} files)")
lines.append("")
result = "\n".join(lines)
# Truncate if too long
if len(result) > MAX_MAP_CHARS:
result = result[:MAX_MAP_CHARS] + "\n\n... [architecture map truncated]"
return result
def _detect_cycles(import_graph: dict) -> list[list[str]]:
"""Detect import cycles using DFS."""
cycles = []
visited = set()
path_set = set()
path_list = []
def dfs(node):
if len(cycles) >= 10:
return
if node in path_set:
# Found cycle
idx = path_list.index(node)
cycle = path_list[idx:] + [node]
# Use short names
short = [p.rsplit("/", 1)[-1] for p in cycle]
cycles.append(short)
return
if node in visited:
return
visited.add(node)
path_set.add(node)
path_list.append(node)
for dep in import_graph.get(node, []):
dfs(dep)
path_set.discard(node)
path_list.pop()
for node in import_graph:
if node not in visited:
dfs(node)
return cycles
def _find_shared_definitions(
definitions_map: dict,
imported_by: dict,
) -> list[tuple[str, dict]]:
"""Find definitions that are used across many files."""
results = []
for path, defs in definitions_map.items():
importers_count = len(imported_by.get(path, set()))
if importers_count < 2:
continue
for d in defs:
if d["type"] in ("class", "struct", "interface"):
results.append((
d["name"],
{
"defined_in": path,
"type": d["type"],
"used_by_count": importers_count,
}
))
results.sort(key=lambda x: -x[1]["used_by_count"])
return results
\ No newline at end of file
""" """
Background generation manager — v4.1.0 Background generation manager — v4.1.0
Smart codebase loading for massive repos + persistent file context. Smart codebase loading for massive repos + persistent file context + architecture mindmap.
""" """
import asyncio import asyncio
...@@ -19,11 +19,9 @@ from backend.services import bedrock_service, memory_service, rag_service, attac ...@@ -19,11 +19,9 @@ from backend.services import bedrock_service, memory_service, rag_service, attac
# Caches # Caches
# ═══════════════════════════════════════════════════ # ═══════════════════════════════════════════════════
# Tree cache: repo_id:branch → (timestamp, tree_list)
_tree_cache: dict[str, tuple[float, list[dict]]] = {} _tree_cache: dict[str, tuple[float, list[dict]]] = {}
TREE_CACHE_TTL = 600 # 10 minutes TREE_CACHE_TTL = 600
# Tracks which files have been discussed per chat
_chat_file_history: dict[str, set[str]] = {} _chat_file_history: dict[str, set[str]] = {}
...@@ -103,7 +101,6 @@ class GenerationManager: ...@@ -103,7 +101,6 @@ class GenerationManager:
await asyncio.sleep(0.02) await asyncio.sleep(0.02)
def invalidate_repo_cache(self, repo_id: str): def invalidate_repo_cache(self, repo_id: str):
"""Call after a commit to force-refresh on next message."""
keys_to_remove = [k for k in _tree_cache if k.startswith(f"{repo_id}:")] keys_to_remove = [k for k in _tree_cache if k.startswith(f"{repo_id}:")]
for k in keys_to_remove: for k in keys_to_remove:
_tree_cache.pop(k, None) _tree_cache.pop(k, None)
...@@ -115,13 +112,6 @@ class GenerationManager: ...@@ -115,13 +112,6 @@ class GenerationManager:
async def _build_repo_context( async def _build_repo_context(
self, db, chat, user_query: str self, db, chat, user_query: str
) -> Optional[str]: ) -> Optional[str]:
"""
Build repo context using smart file selection.
For ANY size codebase:
1. Full file tree (paths only) — always included
2. Priority files (configs, entry points) — always loaded
3. Query-relevant files — loaded based on what user asked
"""
if not chat.linked_repo_id: if not chat.linked_repo_id:
return None return None
...@@ -138,7 +128,6 @@ class GenerationManager: ...@@ -138,7 +128,6 @@ class GenerationManager:
branch = repo.default_branch branch = repo.default_branch
try: try:
# 1. Get tree (cached)
tree = _get_tree_cache(repo.id, branch) tree = _get_tree_cache(repo.id, branch)
if tree is None: if tree is None:
tree = await gitlab_service.get_tree( tree = await gitlab_service.get_tree(
...@@ -147,10 +136,8 @@ class GenerationManager: ...@@ -147,10 +136,8 @@ class GenerationManager:
) )
_set_tree_cache(repo.id, branch, tree) _set_tree_cache(repo.id, branch, tree)
# 2. Get previously discussed files for this chat
prev_files = _chat_file_history.get(chat.id, set()) prev_files = _chat_file_history.get(chat.id, set())
# 3. Smart-load files
result = await gitlab_service.load_smart_files( result = await gitlab_service.load_smart_files(
gl_url, gl_token, repo.gitlab_project_id, gl_url, gl_token, repo.gitlab_project_id,
ref=branch, tree=tree, ref=branch, tree=tree,
...@@ -158,7 +145,6 @@ class GenerationManager: ...@@ -158,7 +145,6 @@ class GenerationManager:
previous_files=prev_files, previous_files=prev_files,
) )
# 4. Track loaded files for future messages
loaded_paths = set() loaded_paths = set()
for f in result["priority_files"]: for f in result["priority_files"]:
loaded_paths.add(f["path"]) loaded_paths.add(f["path"])
...@@ -168,11 +154,9 @@ class GenerationManager: ...@@ -168,11 +154,9 @@ class GenerationManager:
_chat_file_history[chat.id] = set() _chat_file_history[chat.id] = set()
_chat_file_history[chat.id].update(loaded_paths) _chat_file_history[chat.id].update(loaded_paths)
# 5. Format the context return self._format_smart_context(result, tree, repo, db)
return self._format_smart_context(result, tree, repo)
except Exception as e: except Exception as e:
# Fallback: just the tree
try: try:
tree = await gitlab_service.get_tree( tree = await gitlab_service.get_tree(
gl_url, gl_token, repo.gitlab_project_id, ref=branch, gl_url, gl_token, repo.gitlab_project_id, ref=branch,
...@@ -182,16 +166,10 @@ class GenerationManager: ...@@ -182,16 +166,10 @@ class GenerationManager:
return f"[Repository: {repo.name} — error: {str(e)[:200]}]" return f"[Repository: {repo.name} — error: {str(e)[:200]}]"
def _format_smart_context( def _format_smart_context(
self, result: dict, tree: list[dict], repo self, result: dict, tree: list[dict], repo, db
) -> str: ) -> str:
"""Format loaded files into prompt context.""" files_in_tree = sorted([i["path"] for i in tree if i["type"] == "blob"])
# File tree dirs_in_tree = sorted([i["path"] for i in tree if i["type"] == "tree"])
files_in_tree = sorted(
[i["path"] for i in tree if i["type"] == "blob"]
)
dirs_in_tree = sorted(
[i["path"] for i in tree if i["type"] == "tree"]
)
lines = [ lines = [
f"Repository: {repo.name}", f"Repository: {repo.name}",
...@@ -200,44 +178,45 @@ class GenerationManager: ...@@ -200,44 +178,45 @@ class GenerationManager:
f"Total files: {len(files_in_tree)} | Directories: {len(dirs_in_tree)}", f"Total files: {len(files_in_tree)} | Directories: {len(dirs_in_tree)}",
f"Files loaded into context: {result['files_loaded']}", f"Files loaded into context: {result['files_loaded']}",
f"Characters loaded: {result['total_characters']:,}", f"Characters loaded: {result['total_characters']:,}",
"",
"═" * 60,
"COMPLETE FILE TREE (all paths):",
"═" * 60,
] ]
# Architecture map
if repo.architecture_map and repo.map_status == "ready":
lines.append("")
lines.append(repo.architecture_map)
lines.append("")
# File tree
lines.append("═" * 60)
lines.append("COMPLETE FILE TREE:")
lines.append("═" * 60)
for fp in files_in_tree: for fp in files_in_tree:
lines.append(f" {fp}") lines.append(f" {fp}")
# File contents
lines.append("") lines.append("")
lines.append("═" * 60) lines.append("═" * 60)
lines.append("LOADED FILE CONTENTS:") lines.append("LOADED FILE CONTENTS:")
lines.append("═" * 60) lines.append("═" * 60)
# Priority files
if result["priority_files"]: if result["priority_files"]:
lines.append("") lines.append("\n── Config & Entry Point Files ──")
lines.append("── Config & Entry Point Files ──")
for f in result["priority_files"]: for f in result["priority_files"]:
lines.append(f"\n━━━ {f['path']} ━━━") lines.append(f"\n━━━ {f['path']} ━━━")
lines.append(f["content"]) lines.append(f["content"])
lines.append(f"━━━ end {f['path']} ━━━") lines.append(f"━━━ end {f['path']} ━━━")
# Query-relevant files
if result["query_files"]: if result["query_files"]:
lines.append("") lines.append("\n── Files Relevant to Current Question ──")
lines.append("── Files Relevant to Current Question ──")
for f in result["query_files"]: for f in result["query_files"]:
lines.append(f"\n━━━ {f['path']} ━━━") lines.append(f"\n━━━ {f['path']} ━━━")
lines.append(f["content"]) lines.append(f["content"])
lines.append(f"━━━ end {f['path']} ━━━") lines.append(f"━━━ end {f['path']} ━━━")
# Note about unloaded files
unloaded = len(files_in_tree) - result["files_loaded"] unloaded = len(files_in_tree) - result["files_loaded"]
if unloaded > 0: if unloaded > 0:
lines.append("") lines.append(f"\nNOTE: {unloaded} additional files exist but are not loaded.")
lines.append(f"NOTE: {unloaded} additional files exist in the repository.") lines.append("Mention specific file names to have them loaded in the next message.")
lines.append("If you need to see a specific file, ask the user to mention it by name.")
lines.append("You can see ALL file paths in the tree above.")
return "\n".join(lines) return "\n".join(lines)
...@@ -266,7 +245,6 @@ class GenerationManager: ...@@ -266,7 +245,6 @@ class GenerationManager:
db_user = db.query(User).filter(User.id == user_id).first() db_user = db.query(User).filter(User.id == user_id).first()
# Quota reset
now = datetime.utcnow() now = datetime.utcnow()
if db_user.quota_reset_date and now >= db_user.quota_reset_date: if db_user.quota_reset_date and now >= db_user.quota_reset_date:
db_user.tokens_used_this_month = 0 db_user.tokens_used_this_month = 0
...@@ -280,7 +258,6 @@ class GenerationManager: ...@@ -280,7 +258,6 @@ class GenerationManager:
state.events.append({"type": "error", "message": "Monthly token quota exceeded."}) state.events.append({"type": "error", "message": "Monthly token quota exceeded."})
return return
# Process attachments
attachments = [] attachments = []
if attachment_ids: if attachment_ids:
attachments = ( attachments = (
...@@ -305,7 +282,6 @@ class GenerationManager: ...@@ -305,7 +282,6 @@ class GenerationManager:
if attachments: if attachments:
db.commit() db.commit()
# RAG
kb_id = knowledge_base_id or chat.knowledge_base_id kb_id = knowledge_base_id or chat.knowledge_base_id
rag_context = None rag_context = None
if kb_id: if kb_id:
...@@ -314,29 +290,22 @@ class GenerationManager: ...@@ -314,29 +290,22 @@ class GenerationManager:
except Exception: except Exception:
pass pass
# ── SMART REPO CONTEXT (query-aware file loading) ──
repo_context = await self._build_repo_context(db, chat, content) repo_context = await self._build_repo_context(db, chat, content)
# ── PERSISTENT ATTACHMENT CONTEXT ──
attachment_context = memory_service.gather_attachment_context(chat_id, db) attachment_context = memory_service.gather_attachment_context(chat_id, db)
# Build system prompt
system_prompt = build_full_prompt( system_prompt = build_full_prompt(
rag_context=rag_context, rag_context=rag_context,
repo_context=repo_context, repo_context=repo_context,
attachment_context=attachment_context, attachment_context=attachment_context,
) )
# Build conversation messages
messages = memory_service.build_messages(chat, db) messages = memory_service.build_messages(chat, db)
# Inject multimodal content blocks for current attachments
if attachments and messages and messages[-1]["role"] == "user": if attachments and messages and messages[-1]["role"] == "user":
content_blocks = attachment_service.build_claude_content_blocks(attachments) content_blocks = attachment_service.build_claude_content_blocks(attachments)
content_blocks.append({"type": "text", "text": content}) content_blocks.append({"type": "text", "text": content})
messages[-1]["content"] = content_blocks messages[-1]["content"] = content_blocks
# Thinking config
effective_max = max_tokens effective_max = max_tokens
thinking_config = None thinking_config = None
if reasoning_budget > 0: if reasoning_budget > 0:
...@@ -387,7 +356,6 @@ class GenerationManager: ...@@ -387,7 +356,6 @@ class GenerationManager:
usage = event.get("usage", {}) usage = event.get("usage", {})
output_tokens = usage.get("output_tokens", 0) output_tokens = usage.get("output_tokens", 0)
# Save assistant message
assistant_msg = Message( assistant_msg = Message(
chat_id=chat_id, role="assistant", content=full_text, chat_id=chat_id, role="assistant", content=full_text,
thinking_content=full_thinking or None, thinking_content=full_thinking or None,
...@@ -404,7 +372,6 @@ class GenerationManager: ...@@ -404,7 +372,6 @@ class GenerationManager:
state.message_id = assistant_msg.id state.message_id = assistant_msg.id
# Auto-title
msg_count = db.query(Message).filter(Message.chat_id == chat_id).count() msg_count = db.query(Message).filter(Message.chat_id == chat_id).count()
if msg_count <= 2 and chat.title == "New Chat": if msg_count <= 2 and chat.title == "New Chat":
try: try:
......
...@@ -176,6 +176,8 @@ export const gitlabCommitSingle = (token, repoId, data) => request("POST", `/git ...@@ -176,6 +176,8 @@ export const gitlabCommitSingle = (token, repoId, data) => request("POST", `/git
export const gitlabCreateMR = (token, repoId, data) => request("POST", `/gitlab/repos/${repoId}/merge-request`, token, data); export const gitlabCreateMR = (token, repoId, data) => request("POST", `/gitlab/repos/${repoId}/merge-request`, token, data);
export const gitlabAnalyzeProject = (token, repoId, ref) => export const gitlabAnalyzeProject = (token, repoId, ref) =>
request("GET", `/gitlab/repos/${repoId}/analyze?ref=${encodeURIComponent(ref || "")}`, token); request("GET", `/gitlab/repos/${repoId}/analyze?ref=${encodeURIComponent(ref || "")}`, token);
export const gitlabReanalyzeRepo = (token, repoId) => request("POST", `/gitlab/repos/${repoId}/analyze`, token);
export const gitlabGetRepoMap = (token, repoId) => request("GET", `/gitlab/repos/${repoId}/map`, token);
export const gitlabListActions = (token, status) => request("GET", `/gitlab/actions?status=${status || "pending"}`, token); export const gitlabListActions = (token, status) => request("GET", `/gitlab/actions?status=${status || "pending"}`, token);
export const gitlabCreateAction = (token, data) => request("POST", "/gitlab/actions", token, data); export const gitlabCreateAction = (token, data) => request("POST", "/gitlab/actions", token, data);
export const gitlabApproveAction = (token, actionId) => request("POST", `/gitlab/actions/${actionId}/approve`, token); export const gitlabApproveAction = (token, actionId) => request("POST", `/gitlab/actions/${actionId}/approve`, token);
......
import React, { useState, useEffect, useRef, useCallback, useMemo } from "react"; import React, { useState, useEffect, useRef, useCallback } from "react";
import { useApp } from "../store"; import { useApp } from "../store";
import { import {
getMessages, downloadZip, listKnowledgeBases, updateChat, getMessages, downloadZip, listKnowledgeBases, updateChat,
...@@ -108,9 +108,6 @@ export default function ChatView({ chatId }) { ...@@ -108,9 +108,6 @@ export default function ChatView({ chatId }) {
linked_repo_id: selectedRepoId || "", linked_repo_id: selectedRepoId || "",
}); });
// ── THIS IS THE FIX ──
// Build the full linked_repo object from the local repos list
// so the UI immediately sees the repo banner, commit buttons, etc.
const repoObj = selectedRepoId const repoObj = selectedRepoId
? repos.find(r => r.id === selectedRepoId) || null ? repos.find(r => r.id === selectedRepoId) || null
: null; : null;
...@@ -124,7 +121,7 @@ export default function ChatView({ chatId }) { ...@@ -124,7 +121,7 @@ export default function ChatView({ chatId }) {
reasoning_budget: reasoningBudget, reasoning_budget: reasoningBudget,
knowledge_base_id: selectedKbId, knowledge_base_id: selectedKbId,
linked_repo_id: selectedRepoId, linked_repo_id: selectedRepoId,
linked_repo: repoObj, // ← was missing linked_repo: repoObj,
}, },
}); });
} catch { } } catch { }
...@@ -164,7 +161,6 @@ export default function ChatView({ chatId }) { ...@@ -164,7 +161,6 @@ export default function ChatView({ chatId }) {
if (!msg) return; if (!msg) return;
try { try {
await gitlabCommitSingle(state.token, linkedRepo.id, { branch, file_path: filePath, content: code, commit_message: msg, action }); await gitlabCommitSingle(state.token, linkedRepo.id, { branch, file_path: filePath, content: code, commit_message: msg, action });
// Refresh repo cache so AI sees updated code
try { await refreshRepoContext(state.token, chatId); } catch { } try { await refreshRepoContext(state.token, chatId); } catch { }
} catch (e) { alert(`❌ ${e.message}`); throw e; } } catch (e) { alert(`❌ ${e.message}`); throw e; }
}, [linkedRepo, state.token, chatId]); }, [linkedRepo, state.token, chatId]);
...@@ -185,14 +181,31 @@ export default function ChatView({ chatId }) { ...@@ -185,14 +181,31 @@ export default function ChatView({ chatId }) {
{/* Repo banner */} {/* Repo banner */}
{linkedRepo && ( {linkedRepo && (
<div className="px-3 py-1.5 bg-orange-500/10 border-b border-orange-500/20 flex items-center gap-2 text-xs"> <div className="px-3 py-1.5 bg-orange-500/10 border-b border-orange-500/20 flex items-center gap-2 text-xs flex-wrap">
<GitBranch size={12} className="text-orange-400" /> <GitBranch size={12} className="text-orange-400" />
<span className="text-orange-300 font-medium">{linkedRepo.name}</span> <span className="text-orange-300 font-medium">{linkedRepo.name}</span>
<span className="text-orange-300/60">({linkedRepo.default_branch})</span> <span className="text-orange-300/60">({linkedRepo.default_branch})</span>
<span className="text-orange-300/40">Full codebase loaded</span> {linkedRepo.map_status === "ready" && (
<button onClick={handleRefreshRepo} disabled={refreshingRepo} className="ml-auto text-orange-300/60 hover:text-orange-300 transition" title="Refresh repo context"> <span className="text-green-400/80 flex items-center gap-1">
<RefreshCw size={12} className={refreshingRepo ? "animate-spin" : ""} /> <span className="w-1.5 h-1.5 bg-green-400 rounded-full" /> Mindmap ready
</button> </span>
)}
{linkedRepo.map_status === "analyzing" && (
<span className="text-amber-400/80 flex items-center gap-1">
<Loader2 size={10} className="animate-spin" /> Analyzing…
</span>
)}
{linkedRepo.map_status === "failed" && (
<span className="text-red-400/80">Map failed</span>
)}
{(!linkedRepo.map_status || linkedRepo.map_status === "none") && (
<span className="text-orange-300/40">No mindmap</span>
)}
<div className="ml-auto flex items-center gap-2">
<button onClick={handleRefreshRepo} disabled={refreshingRepo} className="text-orange-300/60 hover:text-orange-300 transition" title="Refresh repo cache">
<RefreshCw size={12} className={refreshingRepo ? "animate-spin" : ""} />
</button>
</div>
</div> </div>
)} )}
...@@ -246,9 +259,9 @@ export default function ChatView({ chatId }) { ...@@ -246,9 +259,9 @@ export default function ChatView({ chatId }) {
<label className="text-xs text-anton-muted mb-1 flex items-center gap-1"><GitBranch size={12} className="text-orange-400" /> Repository (AI sees all files)</label> <label className="text-xs text-anton-muted mb-1 flex items-center gap-1"><GitBranch size={12} className="text-orange-400" /> Repository (AI sees all files)</label>
<select value={selectedRepoId || ""} onChange={e => setSelectedRepoId(e.target.value || null)} className="w-full bg-anton-bg border border-anton-border rounded-lg px-3 py-2.5 text-white focus:outline-none focus:border-orange-400"> <select value={selectedRepoId || ""} onChange={e => setSelectedRepoId(e.target.value || null)} className="w-full bg-anton-bg border border-anton-border rounded-lg px-3 py-2.5 text-white focus:outline-none focus:border-orange-400">
<option value="">None</option> <option value="">None</option>
{repos.map(r => <option key={r.id} value={r.id}>🔀 {r.name} ({r.default_branch})</option>)} {repos.map(r => <option key={r.id} value={r.id}>🔀 {r.name} ({r.default_branch}){r.map_status === "ready" ? " ✅" : r.map_status === "analyzing" ? " ⏳" : ""}</option>)}
</select> </select>
<p className="text-[9px] text-orange-400/60 mt-1">When linked, AI loads the full codebase into context.</p> <p className="text-[9px] text-orange-400/60 mt-1">When linked, AI loads the full codebase + architecture mindmap into context.</p>
</div> </div>
)} )}
</div> </div>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment