Commit 841414df authored by Mahmoud Aglan's avatar Mahmoud Aglan

test

parent 37b9873f
......@@ -53,6 +53,20 @@ def _run_migrations():
if table_name not in existing_tables:
print(f" Creating {table_name} table")
if "linked_repos" in existing_tables:
lr_columns = {c["name"] for c in inspector.get_columns("linked_repos")}
with engine.connect() as conn:
if "architecture_map" not in lr_columns:
conn.execute(text("ALTER TABLE linked_repos ADD COLUMN architecture_map TEXT"))
print(" Added linked_repos.architecture_map column")
if "map_status" not in lr_columns:
conn.execute(text("ALTER TABLE linked_repos ADD COLUMN map_status VARCHAR(20) DEFAULT 'none'"))
print(" Added linked_repos.map_status column")
if "map_generated_at" not in lr_columns:
conn.execute(text("ALTER TABLE linked_repos ADD COLUMN map_generated_at DATETIME"))
print(" Added linked_repos.map_generated_at column")
conn.commit()
except Exception as e:
print(f" Migration note: {e}")
......
......@@ -151,6 +151,9 @@ class LinkedRepo(Base):
default_branch = Column(String(100), default="main")
web_url = Column(String(500), default="")
description = Column(Text, default="")
architecture_map = Column(Text, nullable=True)
map_status = Column(String(20), default="none")
map_generated_at = Column(DateTime, nullable=True)
created_at = Column(DateTime, default=datetime.utcnow)
actions = relationship("PendingAction", back_populates="repo", cascade="all,delete-orphan")
......
......@@ -3,6 +3,7 @@ GitLab CE integration routes — superadmin only.
Son of Anton v4.0.0
"""
import asyncio
import json
from datetime import datetime
from typing import Optional
......@@ -14,7 +15,7 @@ from sqlalchemy.orm import Session
from backend.database import get_db
from backend.models import User, GitLabSettings, LinkedRepo, PendingAction
from backend.auth import require_superadmin
from backend.services import gitlab_service
from backend.services import gitlab_service, code_analyzer
router = APIRouter()
......@@ -181,10 +182,18 @@ async def link_repo(body: LinkRepoBody, admin: User = Depends(require_superadmin
default_branch=project.get("default_branch", "main"),
web_url=project.get("web_url", ""),
description=project.get("description", ""),
map_status="analyzing",
)
db.add(repo)
db.commit()
db.refresh(repo)
# Start background analysis
asyncio.create_task(_analyze_repo_background(
repo.id, s.gitlab_url, s.gitlab_token,
project["id"], project.get("default_branch", "main"),
))
return _repo_dict(repo)
......@@ -196,6 +205,84 @@ def unlink_repo(repo_id: str, admin: User = Depends(require_superadmin), db: Ses
return {"ok": True}
# ═══════════════════════════════════════════════════
# Architecture Map
# ═══════════════════════════════════════════════════
async def _analyze_repo_background(repo_id: str, gitlab_url: str, gitlab_token: str, project_id: int, branch: str):
"""Background task: load all files and generate architecture map."""
from backend.database import SessionLocal as BgSession
db = BgSession()
try:
repo = db.query(LinkedRepo).filter(LinkedRepo.id == repo_id).first()
if not repo:
return
repo.map_status = "analyzing"
db.commit()
result = await gitlab_service.load_project_files(
gitlab_url, gitlab_token, project_id, ref=branch,
)
files = result.get("files", [])
if not files:
repo.map_status = "failed"
repo.architecture_map = "[No files could be loaded for analysis]"
db.commit()
return
architecture_map = code_analyzer.analyze_codebase(files)
repo.architecture_map = architecture_map
repo.map_status = "ready"
repo.map_generated_at = datetime.utcnow()
db.commit()
print(f" ✅ Architecture map generated for {repo.name} ({len(architecture_map)} chars)")
except Exception as e:
try:
repo = db.query(LinkedRepo).filter(LinkedRepo.id == repo_id).first()
if repo:
repo.map_status = "failed"
repo.architecture_map = f"[Analysis failed: {str(e)[:200]}]"
db.commit()
except Exception:
pass
print(f" ❌ Architecture analysis failed for repo {repo_id}: {e}")
finally:
db.close()
@router.post("/repos/{repo_id}/analyze")
async def reanalyze_repo(repo_id: str, admin: User = Depends(require_superadmin), db: Session = Depends(get_db)):
"""Re-generate the architecture map for a linked repo."""
s = _get_settings(db)
repo = _get_repo(repo_id, db)
repo.map_status = "analyzing"
db.commit()
asyncio.create_task(_analyze_repo_background(
repo.id, s.gitlab_url, s.gitlab_token,
repo.gitlab_project_id, repo.default_branch,
))
return {"ok": True, "status": "analyzing"}
@router.get("/repos/{repo_id}/map")
def get_repo_map(repo_id: str, admin: User = Depends(require_superadmin), db: Session = Depends(get_db)):
"""Get the architecture map for a linked repo."""
repo = _get_repo(repo_id, db)
return {
"map_status": repo.map_status or "none",
"map_generated_at": str(repo.map_generated_at) if repo.map_generated_at else None,
"architecture_map": repo.architecture_map or "",
"map_size": len(repo.architecture_map or ""),
}
# ═══════════════════════════════════════════════════
# Repository Operations
# ═══════════════════════════════════════════════════
......@@ -300,23 +387,6 @@ async def create_mr(repo_id: str, body: MergeRequestBody, admin: User = Depends(
raise HTTPException(e.status_code, e.detail)
@router.get("/repos/{repo_id}/analyze")
async def analyze_project(
repo_id: str,
ref: Optional[str] = Query(None),
admin: User = Depends(require_superadmin),
db: Session = Depends(get_db),
):
s = _get_settings(db)
repo = _get_repo(repo_id, db)
branch = ref or repo.default_branch
try:
result = await gitlab_service.load_project_files(s.gitlab_url, s.gitlab_token, repo.gitlab_project_id, ref=branch)
return result
except gitlab_service.GitLabError as e:
raise HTTPException(e.status_code, e.detail)
# ═══════════════════════════════════════════════════
# Pending Actions
# ═══════════════════════════════════════════════════
......@@ -419,6 +489,8 @@ def _repo_dict(r: LinkedRepo) -> dict:
"default_branch": r.default_branch,
"web_url": r.web_url,
"description": r.description,
"map_status": r.map_status or "none",
"map_generated_at": str(r.map_generated_at) if r.map_generated_at else None,
"created_at": str(r.created_at),
}
......
"""
Codebase Architecture Analyzer — generates a structural mindmap
of any repository by analyzing imports, exports, routes, and
cross-boundary connections.
Works with: Python, JS/TS/JSX/TSX, C#, Go, Rust, Java, Ruby, PHP
"""
import re
from collections import defaultdict
from typing import Optional
# ═══════════════════════════════════════════════════
# Language-specific parsers
# ═══════════════════════════════════════════════════
def _parse_python(path: str, content: str) -> dict:
"""Extract imports, definitions, and routes from Python files."""
imports = []
definitions = []
routes = []
decorators = []
for line in content.split("\n"):
stripped = line.strip()
# Imports
m = re.match(r'^from\s+([\w.]+)\s+import\s+(.+)', stripped)
if m:
module = m.group(1)
names = [n.strip().split(" as ")[0].strip() for n in m.group(2).split(",")]
imports.append({"module": module, "names": names})
continue
m = re.match(r'^import\s+([\w.]+)', stripped)
if m:
imports.append({"module": m.group(1), "names": []})
continue
# Decorators (accumulate for next def/class)
m = re.match(r'^@(\w+)\.(get|post|put|delete|patch|websocket)\s*\(\s*["\']([^"\']*)', stripped)
if m:
method = m.group(2).upper()
route_path = m.group(3)
decorators.append({"method": method, "path": route_path})
continue
m = re.match(r'^@(\w+)\.route\s*\(\s*["\']([^"\']*)', stripped)
if m:
decorators.append({"method": "ROUTE", "path": m.group(2)})
continue
# Function defs
m = re.match(r'^(?:async\s+)?def\s+(\w+)\s*\(', stripped)
if m:
name = m.group(1)
if not name.startswith("_") or name.startswith("__"):
defn = {"type": "function", "name": name}
definitions.append(defn)
if decorators:
for d in decorators:
routes.append({
"method": d["method"],
"path": d["path"],
"handler": name,
"file": path,
})
decorators = []
continue
# Class defs
m = re.match(r'^class\s+(\w+)\s*[\(:]', stripped)
if m:
definitions.append({"type": "class", "name": m.group(1)})
decorators = []
continue
# Clear decorators on non-matching lines
if stripped and not stripped.startswith("@") and not stripped.startswith("#"):
decorators = []
return {"imports": imports, "definitions": definitions, "routes": routes}
def _parse_javascript(path: str, content: str) -> dict:
"""Extract imports, exports, routes, and API calls from JS/TS files."""
imports = []
exports = []
api_calls = []
routes = []
for line in content.split("\n"):
stripped = line.strip()
# ES6 imports
m = re.match(r'^import\s+(?:{([^}]+)}|(\w+))\s+from\s+["\']([^"\']+)', stripped)
if m:
names = []
if m.group(1):
names = [n.strip().split(" as ")[0].strip() for n in m.group(1).split(",")]
elif m.group(2):
names = [m.group(2)]
imports.append({"module": m.group(3), "names": names})
continue
# import default + named
m = re.match(r'^import\s+(\w+)\s*,\s*{([^}]+)}\s+from\s+["\']([^"\']+)', stripped)
if m:
names = [m.group(1)] + [n.strip().split(" as ")[0].strip() for n in m.group(2).split(",")]
imports.append({"module": m.group(3), "names": names})
continue
# import * as
m = re.match(r'^import\s+\*\s+as\s+(\w+)\s+from\s+["\']([^"\']+)', stripped)
if m:
imports.append({"module": m.group(2), "names": [f"* as {m.group(1)}"]})
continue
# require
m = re.search(r'require\s*\(\s*["\']([^"\']+)', stripped)
if m and not stripped.startswith("//"):
imports.append({"module": m.group(1), "names": []})
# Exports
m = re.match(r'^export\s+(?:default\s+)?(?:async\s+)?function\s+(\w+)', stripped)
if m:
exports.append({"type": "function", "name": m.group(1)})
continue
m = re.match(r'^export\s+(?:default\s+)?(?:const|let|var)\s+(\w+)', stripped)
if m:
exports.append({"type": "const", "name": m.group(1)})
continue
m = re.match(r'^export\s+default\s+(?:class\s+)?(\w+)', stripped)
if m:
exports.append({"type": "default", "name": m.group(1)})
continue
# API calls (fetch)
for fm in re.finditer(r'fetch\s*\(\s*[`"\']([^`"\']*(?:/api/[^`"\']*)?)[`"\']', stripped):
url = fm.group(1)
if "/api/" in url or url.startswith("/"):
api_calls.append(url)
for fm in re.finditer(r'fetch\s*\(\s*`\$\{[^}]*\}(/[^`]*)`', stripped):
api_calls.append(fm.group(1))
# Express routes
m = re.match(r'(?:app|router)\.(get|post|put|delete|patch)\s*\(\s*["\']([^"\']+)', stripped)
if m:
routes.append({
"method": m.group(1).upper(),
"path": m.group(2),
"file": path,
})
return {"imports": imports, "exports": exports, "api_calls": api_calls, "routes": routes}
def _parse_csharp(path: str, content: str) -> dict:
"""Extract basic structure from C# files."""
imports = []
definitions = []
routes = []
for line in content.split("\n"):
stripped = line.strip()
m = re.match(r'^using\s+([\w.]+)\s*;', stripped)
if m:
imports.append({"module": m.group(1), "names": []})
continue
m = re.match(r'^(?:public|private|protected|internal|static|\s)*class\s+(\w+)', stripped)
if m:
definitions.append({"type": "class", "name": m.group(1)})
continue
m = re.match(r'^(?:public|private|protected|internal|static|\s)*interface\s+(\w+)', stripped)
if m:
definitions.append({"type": "interface", "name": m.group(1)})
continue
# ASP.NET routes
m = re.search(r'\[Http(Get|Post|Put|Delete|Patch)\s*\(\s*"([^"]*)"', stripped)
if m:
routes.append({"method": m.group(1).upper(), "path": m.group(2), "file": path})
continue
m = re.search(r'\[Route\s*\(\s*"([^"]*)"', stripped)
if m:
routes.append({"method": "ROUTE", "path": m.group(1), "file": path})
return {"imports": imports, "definitions": definitions, "routes": routes}
def _parse_generic(path: str, content: str) -> dict:
"""Fallback: extract function/class patterns from any language."""
definitions = []
imports = []
for line in content.split("\n"):
stripped = line.strip()
# Go imports
m = re.match(r'^import\s+"([^"]+)"', stripped)
if m:
imports.append({"module": m.group(1), "names": []})
# Go/Rust function defs
m = re.match(r'^(?:pub\s+)?fn\s+(\w+)', stripped)
if m:
definitions.append({"type": "function", "name": m.group(1)})
continue
m = re.match(r'^func\s+(?:\([^)]+\)\s+)?(\w+)', stripped)
if m:
definitions.append({"type": "function", "name": m.group(1)})
continue
# Go/Rust struct/type
m = re.match(r'^(?:pub\s+)?(?:type|struct)\s+(\w+)', stripped)
if m:
definitions.append({"type": "struct", "name": m.group(1)})
return {"imports": imports, "definitions": definitions}
# ═══════════════════════════════════════════════════
# File parser dispatcher
# ═══════════════════════════════════════════════════
def _parse_file(path: str, content: str) -> dict:
"""Parse a file based on its extension."""
lower = path.lower()
if lower.endswith((".py",)):
return _parse_python(path, content)
elif lower.endswith((".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs", ".vue", ".svelte")):
return _parse_javascript(path, content)
elif lower.endswith((".cs",)):
return _parse_csharp(path, content)
else:
return _parse_generic(path, content)
# ═══════════════════════════════════════════════════
# Resolve import paths to actual files
# ═══════════════════════════════════════════════════
def _resolve_import(importing_file: str, module: str, all_paths: set[str]) -> Optional[str]:
"""Try to resolve an import module string to an actual file path."""
if not module:
return None
# Direct path match
for ext in ["", ".py", ".js", ".ts", ".jsx", ".tsx", ".cs", ".go", ".rs"]:
candidate = module.replace(".", "/") + ext
if candidate in all_paths:
return candidate
# Relative imports for Python (from .module import ...)
if module.startswith("."):
dir_parts = importing_file.rsplit("/", 1)
base_dir = dir_parts[0] if len(dir_parts) > 1 else ""
rel = module.lstrip(".")
for ext in [".py", ""]:
candidate = f"{base_dir}/{rel.replace('.', '/')}{ext}"
if candidate in all_paths:
return candidate
# __init__.py
candidate = f"{base_dir}/{rel.replace('.', '/')}/__init__.py"
if candidate in all_paths:
return candidate
# JS relative imports
if module.startswith("./") or module.startswith("../"):
dir_parts = importing_file.rsplit("/", 1)
base_dir = dir_parts[0] if len(dir_parts) > 1 else ""
if module.startswith("./"):
rel = module[2:]
else:
# Go up directories
parts = base_dir.split("/")
ups = module.count("../")
base = "/".join(parts[:-ups]) if ups < len(parts) else ""
rel = module.replace("../", "")
base_dir = base
for ext in ["", ".js", ".jsx", ".ts", ".tsx", "/index.js", "/index.tsx", "/index.ts"]:
candidate = f"{base_dir}/{rel}{ext}" if base_dir else f"{rel}{ext}"
if candidate in all_paths:
return candidate
# Backend-style imports (backend.models → backend/models.py)
for ext in [".py"]:
candidate = module.replace(".", "/") + ext
if candidate in all_paths:
return candidate
return None
# ═══════════════════════════════════════════════════
# Main analyzer
# ═══════════════════════════════════════════════════
MAX_MAP_CHARS = 40_000 # ~10K tokens max for the map
def analyze_codebase(files: list[dict]) -> str:
"""
Analyze a list of {path, content} dicts and produce
an architecture mindmap string.
"""
all_paths = {f["path"] for f in files}
file_data = {} # path → parsed data
all_routes = []
all_api_calls = []
import_graph = defaultdict(set) # file → set of files it imports
imported_by = defaultdict(set) # file → set of files that import it
definitions_map = defaultdict(list) # file → list of definitions
exports_map = defaultdict(list) # file → list of exports
# ── Pass 1: Parse all files ──
for f in files:
path = f["path"]
content = f.get("content", "")
if not content or content.startswith("["):
continue
parsed = _parse_file(path, content)
file_data[path] = parsed
# Collect definitions
for d in parsed.get("definitions", []):
definitions_map[path].append(d)
for e in parsed.get("exports", []):
exports_map[path].append(e)
# Collect routes
for r in parsed.get("routes", []):
r["file"] = path
all_routes.append(r)
# Collect API calls
for call in parsed.get("api_calls", []):
all_api_calls.append({"file": path, "url": call})
# ── Pass 2: Resolve imports → build dependency graph ──
for path, parsed in file_data.items():
for imp in parsed.get("imports", []):
target = _resolve_import(path, imp["module"], all_paths)
if target and target != path:
import_graph[path].add(target)
imported_by[target].add(path)
# ── Pass 3: Detect project structure ──
lang_counts = defaultdict(int)
dir_categories = defaultdict(set)
for path in all_paths:
ext = ""
if "." in path.rsplit("/", 1)[-1]:
ext = "." + path.rsplit(".", 1)[-1].lower()
lang_map = {
".py": "Python", ".js": "JavaScript", ".ts": "TypeScript",
".jsx": "React JSX", ".tsx": "React TSX", ".cs": "C#",
".java": "Java", ".go": "Go", ".rs": "Rust", ".rb": "Ruby",
".php": "PHP", ".vue": "Vue", ".svelte": "Svelte",
".html": "HTML", ".css": "CSS", ".scss": "SCSS",
".sql": "SQL", ".sh": "Shell",
}
if ext in lang_map:
lang_counts[lang_map[ext]] += 1
top_dir = path.split("/")[0] if "/" in path else "(root)"
dir_categories[top_dir].add(path)
# Detect frameworks
frameworks = []
all_content_lower = " ".join(
f.get("content", "")[:500].lower() for f in files[:50]
)
framework_signals = {
"FastAPI": ["fastapi", "from fastapi"],
"Flask": ["from flask"],
"Django": ["from django", "django.conf"],
"Express": ["express()", "require('express')"],
"React": ["from 'react'", 'from "react"', "react-dom"],
"Vue": ["createapp", "vue"],
"Next.js": ["next/", "getserversideprops"],
"Unity": ["monobehaviour", "unityengine"],
"ASP.NET": ["microsoft.aspnetcore", "iactionresult"],
"SQLAlchemy": ["sqlalchemy", "declarative_base"],
"Prisma": ["@prisma/client"],
}
for fw, signals in framework_signals.items():
for sig in signals:
if sig in all_content_lower:
frameworks.append(fw)
break
# ── Pass 4: Identify hot files (most referenced) ──
hot_files = sorted(imported_by.items(), key=lambda x: -len(x[1]))[:20]
# ── Pass 5: Cross-boundary flow detection ──
flows = _detect_cross_boundary_flows(all_routes, all_api_calls, import_graph, definitions_map)
# ── Pass 6: Build the map ──
return _format_architecture_map(
all_paths=all_paths,
lang_counts=lang_counts,
frameworks=frameworks,
dir_categories=dir_categories,
all_routes=all_routes,
all_api_calls=all_api_calls,
import_graph=import_graph,
imported_by=imported_by,
definitions_map=definitions_map,
exports_map=exports_map,
hot_files=hot_files,
flows=flows,
)
def _detect_cross_boundary_flows(
routes: list, api_calls: list,
import_graph: dict, definitions_map: dict,
) -> list[str]:
"""Detect end-to-end flows across frontend/backend boundaries."""
flows = []
# Match API calls to routes
route_map = {}
for r in routes:
key = r.get("path", "").rstrip("/")
if key:
route_map[key] = r
for call in api_calls:
url = call["url"]
# Clean URL patterns
clean = re.sub(r'\$\{[^}]+\}', '{param}', url)
clean = clean.rstrip("/")
matched_route = None
for route_path, route_info in route_map.items():
# Simple match — exact or parameterized
route_clean = re.sub(r'\{[^}]+\}', '{param}', route_path)
if clean == route_clean or clean.endswith(route_clean):
matched_route = route_info
break
if matched_route:
flows.append(
f" {call['file']} → {matched_route.get('method', '?')} {matched_route.get('path', '?')} → {matched_route.get('file', '?')}:{matched_route.get('handler', '?')}()"
)
return flows[:50] # Cap flows
def _format_architecture_map(
all_paths, lang_counts, frameworks, dir_categories,
all_routes, all_api_calls, import_graph, imported_by,
definitions_map, exports_map, hot_files, flows,
) -> str:
"""Format the analysis into a compact, AI-readable string."""
lines = []
lines.append("╔══════════════════════════════════════════╗")
lines.append("║ CODEBASE ARCHITECTURE MINDMAP ║")
lines.append("╚══════════════════════════════════════════╝")
lines.append("")
# ── Project Overview ──
lines.append("PROJECT OVERVIEW:")
lines.append(f" Total source files: {len(all_paths)}")
if lang_counts:
top_langs = sorted(lang_counts.items(), key=lambda x: -x[1])[:8]
lines.append(f" Languages: {', '.join(f'{l} ({c})' for l, c in top_langs)}")
if frameworks:
lines.append(f" Frameworks: {', '.join(sorted(set(frameworks)))}")
lines.append("")
# ── Directory Structure ──
lines.append("DIRECTORY STRUCTURE:")
for dir_name in sorted(dir_categories.keys()):
count = len(dir_categories[dir_name])
lines.append(f" {dir_name}/ ({count} files)")
lines.append("")
# ── API Routes ──
if all_routes:
lines.append("API ENDPOINTS:")
# Group by file
routes_by_file = defaultdict(list)
for r in all_routes:
routes_by_file[r.get("file", "?")].append(r)
for rfile in sorted(routes_by_file.keys()):
lines.append(f" [{rfile}]")
for r in routes_by_file[rfile]:
handler = r.get("handler", "")
handler_str = f" → {handler}()" if handler else ""
lines.append(f" {r.get('method', '?'):6s} {r.get('path', '?')}{handler_str}")
lines.append("")
# ── Frontend → Backend Connections ──
if flows:
lines.append("CROSS-BOUNDARY FLOWS (Frontend → Backend):")
for flow in flows:
lines.append(flow)
lines.append("")
# ── Hot Files (most imported) ──
if hot_files:
lines.append("HOT FILES (most referenced by other files):")
for path, importers in hot_files:
if len(importers) >= 2:
lines.append(f" {path} ← imported by {len(importers)} files")
lines.append("")
# ── Dependency Graph (compact) ──
if import_graph:
lines.append("DEPENDENCY GRAPH (file → its dependencies):")
# Show only files with 2+ deps, sorted by dep count
sorted_deps = sorted(import_graph.items(), key=lambda x: -len(x[1]))
shown = 0
for path, deps in sorted_deps:
if shown >= 60:
break
dep_names = sorted(deps)[:15]
extra = f" (+{len(deps) - 15} more)" if len(deps) > 15 else ""
short_deps = [d.rsplit("/", 1)[-1] for d in dep_names]
lines.append(f" {path}")
lines.append(f" → {', '.join(short_deps)}{extra}")
shown += 1
lines.append("")
# ── Key Definitions ──
if definitions_map:
lines.append("KEY DEFINITIONS:")
for path in sorted(definitions_map.keys()):
defs = definitions_map[path]
if not defs:
continue
classes = [d["name"] for d in defs if d["type"] == "class"]
functions = [d["name"] for d in defs if d["type"] == "function"]
structs = [d["name"] for d in defs if d["type"] in ("struct", "interface")]
parts = []
if classes:
parts.append(f"classes: {', '.join(classes[:10])}")
if structs:
parts.append(f"types: {', '.join(structs[:10])}")
if functions:
# Only show non-private functions, cap at 8
pub = [f for f in functions if not f.startswith("_")][:8]
if pub:
parts.append(f"functions: {', '.join(pub)}")
if parts:
lines.append(f" {path}")
for p in parts:
lines.append(f" {p}")
lines.append("")
# ── Cycle/Pattern Detection ──
cycles = _detect_cycles(import_graph)
if cycles:
lines.append("CIRCULAR DEPENDENCIES DETECTED:")
for cycle in cycles[:10]:
lines.append(f" ⟲ {' → '.join(cycle)}")
lines.append("")
# ── Shared Definitions ──
# Find classes/models used across many files
shared = _find_shared_definitions(definitions_map, imported_by)
if shared:
lines.append("WIDELY-USED DEFINITIONS:")
for name, info in shared[:15]:
lines.append(f" {name} (defined in {info['defined_in']}, used by {info['used_by_count']} files)")
lines.append("")
result = "\n".join(lines)
# Truncate if too long
if len(result) > MAX_MAP_CHARS:
result = result[:MAX_MAP_CHARS] + "\n\n... [architecture map truncated]"
return result
def _detect_cycles(import_graph: dict) -> list[list[str]]:
"""Detect import cycles using DFS."""
cycles = []
visited = set()
path_set = set()
path_list = []
def dfs(node):
if len(cycles) >= 10:
return
if node in path_set:
# Found cycle
idx = path_list.index(node)
cycle = path_list[idx:] + [node]
# Use short names
short = [p.rsplit("/", 1)[-1] for p in cycle]
cycles.append(short)
return
if node in visited:
return
visited.add(node)
path_set.add(node)
path_list.append(node)
for dep in import_graph.get(node, []):
dfs(dep)
path_set.discard(node)
path_list.pop()
for node in import_graph:
if node not in visited:
dfs(node)
return cycles
def _find_shared_definitions(
definitions_map: dict,
imported_by: dict,
) -> list[tuple[str, dict]]:
"""Find definitions that are used across many files."""
results = []
for path, defs in definitions_map.items():
importers_count = len(imported_by.get(path, set()))
if importers_count < 2:
continue
for d in defs:
if d["type"] in ("class", "struct", "interface"):
results.append((
d["name"],
{
"defined_in": path,
"type": d["type"],
"used_by_count": importers_count,
}
))
results.sort(key=lambda x: -x[1]["used_by_count"])
return results
\ No newline at end of file
"""
Background generation manager — v4.1.0
Smart codebase loading for massive repos + persistent file context.
Smart codebase loading for massive repos + persistent file context + architecture mindmap.
"""
import asyncio
......@@ -19,11 +19,9 @@ from backend.services import bedrock_service, memory_service, rag_service, attac
# Caches
# ═══════════════════════════════════════════════════
# Tree cache: repo_id:branch → (timestamp, tree_list)
_tree_cache: dict[str, tuple[float, list[dict]]] = {}
TREE_CACHE_TTL = 600 # 10 minutes
TREE_CACHE_TTL = 600
# Tracks which files have been discussed per chat
_chat_file_history: dict[str, set[str]] = {}
......@@ -103,7 +101,6 @@ class GenerationManager:
await asyncio.sleep(0.02)
def invalidate_repo_cache(self, repo_id: str):
"""Call after a commit to force-refresh on next message."""
keys_to_remove = [k for k in _tree_cache if k.startswith(f"{repo_id}:")]
for k in keys_to_remove:
_tree_cache.pop(k, None)
......@@ -115,13 +112,6 @@ class GenerationManager:
async def _build_repo_context(
self, db, chat, user_query: str
) -> Optional[str]:
"""
Build repo context using smart file selection.
For ANY size codebase:
1. Full file tree (paths only) — always included
2. Priority files (configs, entry points) — always loaded
3. Query-relevant files — loaded based on what user asked
"""
if not chat.linked_repo_id:
return None
......@@ -138,7 +128,6 @@ class GenerationManager:
branch = repo.default_branch
try:
# 1. Get tree (cached)
tree = _get_tree_cache(repo.id, branch)
if tree is None:
tree = await gitlab_service.get_tree(
......@@ -147,10 +136,8 @@ class GenerationManager:
)
_set_tree_cache(repo.id, branch, tree)
# 2. Get previously discussed files for this chat
prev_files = _chat_file_history.get(chat.id, set())
# 3. Smart-load files
result = await gitlab_service.load_smart_files(
gl_url, gl_token, repo.gitlab_project_id,
ref=branch, tree=tree,
......@@ -158,7 +145,6 @@ class GenerationManager:
previous_files=prev_files,
)
# 4. Track loaded files for future messages
loaded_paths = set()
for f in result["priority_files"]:
loaded_paths.add(f["path"])
......@@ -168,11 +154,9 @@ class GenerationManager:
_chat_file_history[chat.id] = set()
_chat_file_history[chat.id].update(loaded_paths)
# 5. Format the context
return self._format_smart_context(result, tree, repo)
return self._format_smart_context(result, tree, repo, db)
except Exception as e:
# Fallback: just the tree
try:
tree = await gitlab_service.get_tree(
gl_url, gl_token, repo.gitlab_project_id, ref=branch,
......@@ -182,16 +166,10 @@ class GenerationManager:
return f"[Repository: {repo.name} — error: {str(e)[:200]}]"
def _format_smart_context(
self, result: dict, tree: list[dict], repo
self, result: dict, tree: list[dict], repo, db
) -> str:
"""Format loaded files into prompt context."""
# File tree
files_in_tree = sorted(
[i["path"] for i in tree if i["type"] == "blob"]
)
dirs_in_tree = sorted(
[i["path"] for i in tree if i["type"] == "tree"]
)
files_in_tree = sorted([i["path"] for i in tree if i["type"] == "blob"])
dirs_in_tree = sorted([i["path"] for i in tree if i["type"] == "tree"])
lines = [
f"Repository: {repo.name}",
......@@ -200,44 +178,45 @@ class GenerationManager:
f"Total files: {len(files_in_tree)} | Directories: {len(dirs_in_tree)}",
f"Files loaded into context: {result['files_loaded']}",
f"Characters loaded: {result['total_characters']:,}",
"",
"═" * 60,
"COMPLETE FILE TREE (all paths):",
"═" * 60,
]
# Architecture map
if repo.architecture_map and repo.map_status == "ready":
lines.append("")
lines.append(repo.architecture_map)
lines.append("")
# File tree
lines.append("═" * 60)
lines.append("COMPLETE FILE TREE:")
lines.append("═" * 60)
for fp in files_in_tree:
lines.append(f" {fp}")
# File contents
lines.append("")
lines.append("═" * 60)
lines.append("LOADED FILE CONTENTS:")
lines.append("═" * 60)
# Priority files
if result["priority_files"]:
lines.append("")
lines.append("── Config & Entry Point Files ──")
lines.append("\n── Config & Entry Point Files ──")
for f in result["priority_files"]:
lines.append(f"\n━━━ {f['path']} ━━━")
lines.append(f["content"])
lines.append(f"━━━ end {f['path']} ━━━")
# Query-relevant files
if result["query_files"]:
lines.append("")
lines.append("── Files Relevant to Current Question ──")
lines.append("\n── Files Relevant to Current Question ──")
for f in result["query_files"]:
lines.append(f"\n━━━ {f['path']} ━━━")
lines.append(f["content"])
lines.append(f"━━━ end {f['path']} ━━━")
# Note about unloaded files
unloaded = len(files_in_tree) - result["files_loaded"]
if unloaded > 0:
lines.append("")
lines.append(f"NOTE: {unloaded} additional files exist in the repository.")
lines.append("If you need to see a specific file, ask the user to mention it by name.")
lines.append("You can see ALL file paths in the tree above.")
lines.append(f"\nNOTE: {unloaded} additional files exist but are not loaded.")
lines.append("Mention specific file names to have them loaded in the next message.")
return "\n".join(lines)
......@@ -266,7 +245,6 @@ class GenerationManager:
db_user = db.query(User).filter(User.id == user_id).first()
# Quota reset
now = datetime.utcnow()
if db_user.quota_reset_date and now >= db_user.quota_reset_date:
db_user.tokens_used_this_month = 0
......@@ -280,7 +258,6 @@ class GenerationManager:
state.events.append({"type": "error", "message": "Monthly token quota exceeded."})
return
# Process attachments
attachments = []
if attachment_ids:
attachments = (
......@@ -305,7 +282,6 @@ class GenerationManager:
if attachments:
db.commit()
# RAG
kb_id = knowledge_base_id or chat.knowledge_base_id
rag_context = None
if kb_id:
......@@ -314,29 +290,22 @@ class GenerationManager:
except Exception:
pass
# ── SMART REPO CONTEXT (query-aware file loading) ──
repo_context = await self._build_repo_context(db, chat, content)
# ── PERSISTENT ATTACHMENT CONTEXT ──
attachment_context = memory_service.gather_attachment_context(chat_id, db)
# Build system prompt
system_prompt = build_full_prompt(
rag_context=rag_context,
repo_context=repo_context,
attachment_context=attachment_context,
)
# Build conversation messages
messages = memory_service.build_messages(chat, db)
# Inject multimodal content blocks for current attachments
if attachments and messages and messages[-1]["role"] == "user":
content_blocks = attachment_service.build_claude_content_blocks(attachments)
content_blocks.append({"type": "text", "text": content})
messages[-1]["content"] = content_blocks
# Thinking config
effective_max = max_tokens
thinking_config = None
if reasoning_budget > 0:
......@@ -387,7 +356,6 @@ class GenerationManager:
usage = event.get("usage", {})
output_tokens = usage.get("output_tokens", 0)
# Save assistant message
assistant_msg = Message(
chat_id=chat_id, role="assistant", content=full_text,
thinking_content=full_thinking or None,
......@@ -404,7 +372,6 @@ class GenerationManager:
state.message_id = assistant_msg.id
# Auto-title
msg_count = db.query(Message).filter(Message.chat_id == chat_id).count()
if msg_count <= 2 and chat.title == "New Chat":
try:
......
......@@ -176,6 +176,8 @@ export const gitlabCommitSingle = (token, repoId, data) => request("POST", `/git
export const gitlabCreateMR = (token, repoId, data) => request("POST", `/gitlab/repos/${repoId}/merge-request`, token, data);
export const gitlabAnalyzeProject = (token, repoId, ref) =>
request("GET", `/gitlab/repos/${repoId}/analyze?ref=${encodeURIComponent(ref || "")}`, token);
export const gitlabReanalyzeRepo = (token, repoId) => request("POST", `/gitlab/repos/${repoId}/analyze`, token);
export const gitlabGetRepoMap = (token, repoId) => request("GET", `/gitlab/repos/${repoId}/map`, token);
export const gitlabListActions = (token, status) => request("GET", `/gitlab/actions?status=${status || "pending"}`, token);
export const gitlabCreateAction = (token, data) => request("POST", "/gitlab/actions", token, data);
export const gitlabApproveAction = (token, actionId) => request("POST", `/gitlab/actions/${actionId}/approve`, token);
......
import React, { useState, useEffect, useRef, useCallback, useMemo } from "react";
import React, { useState, useEffect, useRef, useCallback } from "react";
import { useApp } from "../store";
import {
getMessages, downloadZip, listKnowledgeBases, updateChat,
......@@ -108,9 +108,6 @@ export default function ChatView({ chatId }) {
linked_repo_id: selectedRepoId || "",
});
// ── THIS IS THE FIX ──
// Build the full linked_repo object from the local repos list
// so the UI immediately sees the repo banner, commit buttons, etc.
const repoObj = selectedRepoId
? repos.find(r => r.id === selectedRepoId) || null
: null;
......@@ -124,7 +121,7 @@ export default function ChatView({ chatId }) {
reasoning_budget: reasoningBudget,
knowledge_base_id: selectedKbId,
linked_repo_id: selectedRepoId,
linked_repo: repoObj, // ← was missing
linked_repo: repoObj,
},
});
} catch { }
......@@ -164,7 +161,6 @@ export default function ChatView({ chatId }) {
if (!msg) return;
try {
await gitlabCommitSingle(state.token, linkedRepo.id, { branch, file_path: filePath, content: code, commit_message: msg, action });
// Refresh repo cache so AI sees updated code
try { await refreshRepoContext(state.token, chatId); } catch { }
} catch (e) { alert(`❌ ${e.message}`); throw e; }
}, [linkedRepo, state.token, chatId]);
......@@ -185,14 +181,31 @@ export default function ChatView({ chatId }) {
{/* Repo banner */}
{linkedRepo && (
<div className="px-3 py-1.5 bg-orange-500/10 border-b border-orange-500/20 flex items-center gap-2 text-xs">
<div className="px-3 py-1.5 bg-orange-500/10 border-b border-orange-500/20 flex items-center gap-2 text-xs flex-wrap">
<GitBranch size={12} className="text-orange-400" />
<span className="text-orange-300 font-medium">{linkedRepo.name}</span>
<span className="text-orange-300/60">({linkedRepo.default_branch})</span>
<span className="text-orange-300/40">Full codebase loaded</span>
<button onClick={handleRefreshRepo} disabled={refreshingRepo} className="ml-auto text-orange-300/60 hover:text-orange-300 transition" title="Refresh repo context">
<RefreshCw size={12} className={refreshingRepo ? "animate-spin" : ""} />
</button>
{linkedRepo.map_status === "ready" && (
<span className="text-green-400/80 flex items-center gap-1">
<span className="w-1.5 h-1.5 bg-green-400 rounded-full" /> Mindmap ready
</span>
)}
{linkedRepo.map_status === "analyzing" && (
<span className="text-amber-400/80 flex items-center gap-1">
<Loader2 size={10} className="animate-spin" /> Analyzing…
</span>
)}
{linkedRepo.map_status === "failed" && (
<span className="text-red-400/80">Map failed</span>
)}
{(!linkedRepo.map_status || linkedRepo.map_status === "none") && (
<span className="text-orange-300/40">No mindmap</span>
)}
<div className="ml-auto flex items-center gap-2">
<button onClick={handleRefreshRepo} disabled={refreshingRepo} className="text-orange-300/60 hover:text-orange-300 transition" title="Refresh repo cache">
<RefreshCw size={12} className={refreshingRepo ? "animate-spin" : ""} />
</button>
</div>
</div>
)}
......@@ -246,9 +259,9 @@ export default function ChatView({ chatId }) {
<label className="text-xs text-anton-muted mb-1 flex items-center gap-1"><GitBranch size={12} className="text-orange-400" /> Repository (AI sees all files)</label>
<select value={selectedRepoId || ""} onChange={e => setSelectedRepoId(e.target.value || null)} className="w-full bg-anton-bg border border-anton-border rounded-lg px-3 py-2.5 text-white focus:outline-none focus:border-orange-400">
<option value="">None</option>
{repos.map(r => <option key={r.id} value={r.id}>🔀 {r.name} ({r.default_branch})</option>)}
{repos.map(r => <option key={r.id} value={r.id}>🔀 {r.name} ({r.default_branch}){r.map_status === "ready" ? " ✅" : r.map_status === "analyzing" ? " ⏳" : ""}</option>)}
</select>
<p className="text-[9px] text-orange-400/60 mt-1">When linked, AI loads the full codebase into context.</p>
<p className="text-[9px] text-orange-400/60 mt-1">When linked, AI loads the full codebase + architecture mindmap into context.</p>
</div>
)}
</div>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment