fghfghjfghfb nfgmn

d55ca0c4 · Mahmoud Aglan · 41f45d5e · d55ca0c4 · d55ca0c4 · d55ca0c4
Commit d55ca0c4 authored Mar 19, 2026 by Mahmoud Aglan
6 changed files
--- a/Dockerfile
+++ b/Dockerfile
@@ -4,22 +4,10 @@
 FROM node:20-alpine AS frontend-build
 WORKDIR /build/frontend
+COPY frontend/package.json frontend/package-lock.json* ./
-# Copy everything so lockfile, configs (vite, tailwind, postcss) are all present
+RUN npm install --legacy-peer-deps
 COPY frontend/ ./
+RUN npm run build
-# Install deps: use ci if lockfile exists, otherwise install and generate one
-RUN if [ -f package-lock.json ]; then \
-      echo "📦 Found package-lock.json — running npm ci" && \
-      npm ci --legacy-peer-deps; \
-    else \
-      echo "⚠️  No package-lock.json — running npm install" && \
-      npm install --legacy-peer-deps; \
-    fi && \
-    npm cache clean --force
-# Build production bundle
-RUN NODE_ENV=production npm run build
 # ============================================
 # Stage 2: Python Backend + Serve Frontend
@@ -28,6 +16,7 @@ FROM python:3.11-slim
 RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential \
+    ffmpeg \
    && rm -rf /var/lib/apt/lists/*
 WORKDIR /app
@@ -40,12 +29,11 @@ COPY backend/ ./backend/
 COPY --from=frontend-build /build/frontend/dist ./frontend/dist
 # Warm up the ChromaDB embedding model so first request is fast
-# Using a separate script file to avoid all quoting issues
 COPY warmup.py /tmp/warmup.py
 RUN python /tmp/warmup.py && rm /tmp/warmup.py
 # Create persistent data directories
-RUN mkdir -p /data/chromadb /data/uploads
+RUN mkdir -p /data/chromadb /data/uploads /data/uploads/chat_attachments
 ENV PYTHONUNBUFFERED=1

--- a/backend/routes/attachment_routes.py
+++ b/backend/routes/attachment_routes.py
+"""
+Chat attachment upload, serve, and delete routes.
+"""
+import os
+from fastapi import APIRouter, Depends, HTTPException, UploadFile, File
+from fastapi.responses import FileResponse
+from sqlalchemy.orm import Session
+from backend.database import get_db
+from backend.models import User, Chat, ChatAttachment
+from backend.auth import get_current_user
+from backend.services import attachment_service
+from backend.config import MAX_ATTACHMENT_BYTES
+router = APIRouter()
+@router.post("/chats/{chat_id}/attachments")
+async def upload_attachments(
+    chat_id: str,
+    files: list[UploadFile] = File(...),
+    user: User = Depends(get_current_user),
+    db: Session = Depends(get_db),
+):
+    """Upload one or more files as chat attachments. Returns attachment metadata."""
+    chat = db.query(Chat).filter(Chat.id == chat_id, Chat.user_id == user.id).first()
+    if not chat:
+        raise HTTPException(404, "Chat not found")
+    results = []
+    for file in files:
+        filename = file.filename or "file"
+        try:
+            content = await file.read()
+            if len(content) > MAX_ATTACHMENT_BYTES:
+                results.append({
+                    "error": f"File too large: {filename} ({len(content) // 1024 // 1024}MB). Max {MAX_ATTACHMENT_BYTES // 1024 // 1024}MB.",
+                })
+                continue
+            meta = attachment_service.save_attachment(
+                chat_id=chat_id,
+                filename=filename,
+                content=content,
+                content_type=file.content_type,
+            )
+            att = ChatAttachment(
+                id=meta["id"],
+                chat_id=chat_id,
+                filename=meta["filename"],
+                original_filename=meta["original_filename"],
+                mime_type=meta["mime_type"],
+                file_type=meta["file_type"],
+                file_size=meta["file_size"],
+                storage_path=meta["storage_path"],
+                text_extract=meta.get("text_extract"),
+            )
+            db.add(att)
+            db.commit()
+            db.refresh(att)
+            results.append(_att_dict(att))
+        except Exception as e:
+            results.append({"error": f"Failed to upload {filename}: {str(e)}"})
+    return {"attachments": results}
+@router.get("/attachments/{attachment_id}/file")
+def serve_attachment(
+    attachment_id: str,
+    user: User = Depends(get_current_user),
+    db: Session = Depends(get_db),
+):
+    """Serve an attachment file. Validates user owns the chat."""
+    att = db.query(ChatAttachment).filter(ChatAttachment.id == attachment_id).first()
+    if not att:
+        raise HTTPException(404, "Attachment not found")
+    chat = db.query(Chat).filter(Chat.id == att.chat_id).first()
+    if not chat or (chat.user_id != user.id and user.role != "superadmin"):
+        raise HTTPException(403, "Access denied")
+    if not os.path.exists(att.storage_path):
+        raise HTTPException(404, "File not found on disk")
+    return FileResponse(
+        att.storage_path,
+        media_type=att.mime_type,
+        filename=att.original_filename,
+    )
+@router.delete("/attachments/{attachment_id}")
+def delete_attachment(
+    attachment_id: str,
+    user: User = Depends(get_current_user),
+    db: Session = Depends(get_db),
+):
+    """Delete a single attachment."""
+    att = db.query(ChatAttachment).filter(ChatAttachment.id == attachment_id).first()
+    if not att:
+        raise HTTPException(404)
+    chat = db.query(Chat).filter(Chat.id == att.chat_id).first()
+    if not chat or (chat.user_id != user.id and user.role != "superadmin"):
+        raise HTTPException(403)
+    attachment_service.delete_attachment_file(att.storage_path)
+    db.delete(att)
+    db.commit()
+    return {"ok": True}
+def _att_dict(att: ChatAttachment) -> dict:
+    return {
+        "id": att.id,
+        "chat_id": att.chat_id,
+        "message_id": att.message_id,
+        "filename": att.filename,
+        "original_filename": att.original_filename,
+        "mime_type": att.mime_type,
+        "file_type": att.file_type,
+        "file_size": att.file_size,
+        "created_at": str(att.created_at),
+    }
\ No newline at end of file
--- a/backend/routes/attachment_routes_15.py
+++ b/backend/routes/attachment_routes_15.py
+"""
+Chat attachment upload, serve, and delete routes.
+"""
+import os
+from typing import Optional
+from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Query
+from fastapi.responses import FileResponse
+from sqlalchemy.orm import Session
+from backend.database import get_db
+from backend.models import User, Chat, ChatAttachment
+from backend.auth import get_current_user, decode_token
+from backend.services import attachment_service
+from backend.config import MAX_ATTACHMENT_BYTES
+router = APIRouter()
+@router.post("/chats/{chat_id}/attachments")
+async def upload_attachments(
+    chat_id: str,
+    files: list[UploadFile] = File(...),
+    user: User = Depends(get_current_user),
+    db: Session = Depends(get_db),
+):
+    """Upload one or more files as chat attachments. Returns attachment metadata."""
+    chat = db.query(Chat).filter(Chat.id == chat_id, Chat.user_id == user.id).first()
+    if not chat:
+        raise HTTPException(404, "Chat not found")
+    results = []
+    for file in files:
+        filename = file.filename or "file"
+        try:
+            content = await file.read()
+            if len(content) > MAX_ATTACHMENT_BYTES:
+                results.append({
+                    "error": f"File too large: {filename} ({len(content) // 1024 // 1024}MB). Max {MAX_ATTACHMENT_BYTES // 1024 // 1024}MB.",
+                })
+                continue
+            meta = attachment_service.save_attachment(
+                chat_id=chat_id,
+                filename=filename,
+                content=content,
+                content_type=file.content_type,
+            )
+            att = ChatAttachment(
+                id=meta["id"],
+                chat_id=chat_id,
+                filename=meta["filename"],
+                original_filename=meta["original_filename"],
+                mime_type=meta["mime_type"],
+                file_type=meta["file_type"],
+                file_size=meta["file_size"],
+                storage_path=meta["storage_path"],
+                text_extract=meta.get("text_extract"),
+            )
+            db.add(att)
+            db.commit()
+            db.refresh(att)
+            results.append(_att_dict(att))
+        except Exception as e:
+            results.append({"error": f"Failed to upload {filename}: {str(e)}"})
+    return {"attachments": results}
+@router.get("/attachments/{attachment_id}/file")
+def serve_attachment(
+    attachment_id: str,
+    token: Optional[str] = Query(None),
+    user: Optional[User] = Depends(_optional_current_user),
+    db: Session = Depends(get_db),
+):
+    """
+    Serve an attachment file.
+    Supports both Bearer header auth and ?token= query param
+    (needed for <img> tags that can't send headers).
+    """
+    # Try query param auth if header auth didn't work
+    if user is None and token:
+        try:
+            payload = decode_token(token)
+            user = db.query(User).filter(User.id == payload["sub"]).first()
+        except Exception:
+            pass
+    if user is None:
+        raise HTTPException(401, "Authentication required")
+    att = db.query(ChatAttachment).filter(ChatAttachment.id == attachment_id).first()
+    if not att:
+        raise HTTPException(404, "Attachment not found")
+    chat = db.query(Chat).filter(Chat.id == att.chat_id).first()
+    if not chat or (chat.user_id != user.id and user.role != "superadmin"):
+        raise HTTPException(403, "Access denied")
+    if not os.path.exists(att.storage_path):
+        raise HTTPException(404, "File not found on disk")
+    return FileResponse(
+        att.storage_path,
+        media_type=att.mime_type,
+        filename=att.original_filename,
+    )
+@router.delete("/attachments/{attachment_id}")
+def delete_attachment(
+    attachment_id: str,
+    user: User = Depends(get_current_user),
+    db: Session = Depends(get_db),
+):
+    """Delete a single attachment."""
+    att = db.query(ChatAttachment).filter(ChatAttachment.id == attachment_id).first()
+    if not att:
+        raise HTTPException(404)
+    chat = db.query(Chat).filter(Chat.id == att.chat_id).first()
+    if not chat or (chat.user_id != user.id and user.role != "superadmin"):
+        raise HTTPException(403)
+    attachment_service.delete_attachment_file(att.storage_path)
+    db.delete(att)
+    db.commit()
+    return {"ok": True}
+def _optional_current_user(
+    db: Session = Depends(get_db),
+):
+    """
+    A dependency that tries to get current user but returns None on failure.
+    This allows the endpoint to also accept ?token= query param.
+    """
+    # This is a placeholder — the actual auth is handled in the route
+    # by checking both header and query param
+    return None
+def _att_dict(att: ChatAttachment) -> dict:
+    return {
+        "id": att.id,
+        "chat_id": att.chat_id,
+        "message_id": att.message_id,
+        "filename": att.filename,
+        "original_filename": att.original_filename,
+        "mime_type": att.mime_type,
+        "file_type": att.file_type,
+        "file_size": att.file_size,
+        "created_at": str(att.created_at),
+    }
\ No newline at end of file
--- a/backend/routes/attachment_routes_16.py
+++ b/backend/routes/attachment_routes_16.py
+"""
+Chat attachment upload, serve, and delete routes.
+"""
+import os
+from typing import Optional
+from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Query, Request
+from fastapi.responses import FileResponse
+from sqlalchemy.orm import Session
+from backend.database import get_db
+from backend.models import User, Chat, ChatAttachment
+from backend.auth import get_current_user, decode_token
+from backend.services import attachment_service
+from backend.config import MAX_ATTACHMENT_BYTES
+router = APIRouter()
+def _get_user_from_request(request: Request, db: Session, token_param: Optional[str] = None) -> User:
+    """
+    Resolve user from either:
+    1. Authorization: Bearer <token> header
+    2. ?token=<token> query parameter (for img/video tags)
+    """
+    raw_token = None
+    # Try header first
+    auth_header = request.headers.get("authorization", "")
+    if auth_header.startswith("Bearer "):
+        raw_token = auth_header[7:]
+    # Fall back to query param
+    if not raw_token and token_param:
+        raw_token = token_param
+    if not raw_token:
+        raise HTTPException(401, "Authentication required")
+    payload = decode_token(raw_token)
+    user = db.query(User).filter(User.id == payload["sub"]).first()
+    if not user or not user.is_active:
+        raise HTTPException(401, "User not found or inactive")
+    return user
+@router.post("/chats/{chat_id}/attachments")
+async def upload_attachments(
+    chat_id: str,
+    files: list[UploadFile] = File(...),
+    user: User = Depends(get_current_user),
+    db: Session = Depends(get_db),
+):
+    """Upload one or more files as chat attachments. Returns attachment metadata."""
+    chat = db.query(Chat).filter(Chat.id == chat_id, Chat.user_id == user.id).first()
+    if not chat:
+        raise HTTPException(404, "Chat not found")
+    results = []
+    for file in files:
+        filename = file.filename or "file"
+        try:
+            content = await file.read()
+            if len(content) > MAX_ATTACHMENT_BYTES:
+                results.append({
+                    "error": f"File too large: {filename} ({len(content) // 1024 // 1024}MB). Max {MAX_ATTACHMENT_BYTES // 1024 // 1024}MB.",
+                })
+                continue
+            meta = attachment_service.save_attachment(
+                chat_id=chat_id,
+                filename=filename,
+                content=content,
+                content_type=file.content_type,
+            )
+            att = ChatAttachment(
+                id=meta["id"],
+                chat_id=chat_id,
+                filename=meta["filename"],
+                original_filename=meta["original_filename"],
+                mime_type=meta["mime_type"],
+                file_type=meta["file_type"],
+                file_size=meta["file_size"],
+                storage_path=meta["storage_path"],
+                text_extract=meta.get("text_extract"),
+            )
+            db.add(att)
+            db.commit()
+            db.refresh(att)
+            results.append(_att_dict(att))
+        except Exception as e:
+            results.append({"error": f"Failed to upload {filename}: {str(e)}"})
+    return {"attachments": results}
+@router.get("/attachments/{attachment_id}/file")
+def serve_attachment(
+    attachment_id: str,
+    request: Request,
+    token: Optional[str] = Query(None),
+    db: Session = Depends(get_db),
+):
+    """
+    Serve an attachment file.
+    Supports both Bearer header auth and ?token= query param
+    (needed for <img> tags that can't send headers).
+    """
+    user = _get_user_from_request(request, db, token)
+    att = db.query(ChatAttachment).filter(ChatAttachment.id == attachment_id).first()
+    if not att:
+        raise HTTPException(404, "Attachment not found")
+    chat = db.query(Chat).filter(Chat.id == att.chat_id).first()
+    if not chat or (chat.user_id != user.id and user.role != "superadmin"):
+        raise HTTPException(403, "Access denied")
+    if not os.path.exists(att.storage_path):
+        raise HTTPException(404, "File not found on disk")
+    return FileResponse(
+        att.storage_path,
+        media_type=att.mime_type,
+        filename=att.original_filename,
+    )
+@router.delete("/attachments/{attachment_id}")
+def delete_attachment(
+    attachment_id: str,
+    user: User = Depends(get_current_user),
+    db: Session = Depends(get_db),
+):
+    """Delete a single attachment."""
+    att = db.query(ChatAttachment).filter(ChatAttachment.id == attachment_id).first()
+    if not att:
+        raise HTTPException(404)
+    chat = db.query(Chat).filter(Chat.id == att.chat_id).first()
+    if not chat or (chat.user_id != user.id and user.role != "superadmin"):
+        raise HTTPException(403)
+    attachment_service.delete_attachment_file(att.storage_path)
+    db.delete(att)
+    db.commit()
+    return {"ok": True}
+def _att_dict(att: ChatAttachment) -> dict:
+    return {
+        "id": att.id,
+        "chat_id": att.chat_id,
+        "message_id": att.message_id,
+        "filename": att.filename,
+        "original_filename": att.original_filename,
+        "mime_type": att.mime_type,
+        "file_type": att.file_type,
+        "file_size": att.file_size,
+        "created_at": str(att.created_at),
+    }
\ No newline at end of file
--- a/backend/services/attachment_service.py
+++ b/backend/services/attachment_service.py
+"""
+Attachment processing service.
+Handles images (resize + base64 for Claude vision),
+videos (frame extraction via ffmpeg),
+PDFs (native document support),
+and text files (read content).
+"""
+import os
+import io
+import base64
+import shutil
+import subprocess
+import tempfile
+import mimetypes
+from pathlib import Path
+from uuid import uuid4
+from typing import Optional
+from backend import config
+os.makedirs(config.ATTACHMENT_PATH, exist_ok=True)
+# ── File type detection ──────────────────────────────
+IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff"}
+VIDEO_EXTENSIONS = {".mp4", ".mov", ".avi", ".mkv", ".webm", ".flv", ".wmv", ".m4v"}
+PDF_EXTENSIONS = {".pdf"}
+TEXT_EXTENSIONS = {
+    ".txt", ".md", ".py", ".js", ".ts", ".jsx", ".tsx", ".cs", ".java",
+    ".cpp", ".c", ".h", ".hpp", ".go", ".rs", ".rb", ".php", ".swift",
+    ".kt", ".lua", ".gd", ".html", ".css", ".scss", ".json", ".yaml",
+    ".yml", ".xml", ".toml", ".ini", ".cfg", ".conf", ".sh", ".bash",
+    ".sql", ".r", ".dart", ".vue", ".svelte", ".csv", ".log", ".env",
+    ".gitignore", ".dockerfile", ".makefile",
+}
+IMAGE_MIMES = {"image/jpeg", "image/png", "image/gif", "image/webp"}
+VIDEO_MIMES = {"video/mp4", "video/quicktime", "video/x-msvideo", "video/webm", "video/x-matroska"}
+def classify_file(filename: str, mime: str) -> str:
+    """Classify file into: image, video, document, text"""
+    ext = Path(filename).suffix.lower()
+    if ext in IMAGE_EXTENSIONS or mime in IMAGE_MIMES:
+        return "image"
+    if ext in VIDEO_EXTENSIONS or mime in VIDEO_MIMES:
+        return "video"
+    if ext in PDF_EXTENSIONS or mime == "application/pdf":
+        return "document"
+    return "text"
+def get_mime_type(filename: str, content_type: Optional[str] = None) -> str:
+    """Determine MIME type."""
+    if content_type and content_type != "application/octet-stream":
+        return content_type
+    mime, _ = mimetypes.guess_type(filename)
+    return mime or "application/octet-stream"
+# ── File storage ─────────────────────────────────────
+def save_attachment(chat_id: str, filename: str, content: bytes, content_type: Optional[str] = None) -> dict:
+    """
+    Save an uploaded file to disk. Returns metadata dict.
+    """
+    mime = get_mime_type(filename, content_type)
+    file_type = classify_file(filename, mime)
+    attachment_id = str(uuid4())
+    # Create chat-specific directory
+    chat_dir = os.path.join(config.ATTACHMENT_PATH, chat_id)
+    os.makedirs(chat_dir, exist_ok=True)
+    # Sanitize filename
+    safe_name = Path(filename).name.replace(" ", "_")
+    stored_name = f"{attachment_id}_{safe_name}"
+    storage_path = os.path.join(chat_dir, stored_name)
+    with open(storage_path, "wb") as f:
+        f.write(content)
+    # Extract text for text-based files
+    text_extract = None
+    if file_type == "text":
+        text_extract = _extract_text_content(storage_path, filename)
+    elif file_type == "document":
+        text_extract = _extract_pdf_text(storage_path)
+    return {
+        "id": attachment_id,
+        "filename": stored_name,
+        "original_filename": filename,
+        "mime_type": mime,
+        "file_type": file_type,
+        "file_size": len(content),
+        "storage_path": storage_path,
+        "text_extract": text_extract,
+    }
+def delete_attachment_file(storage_path: str):
+    """Delete an attachment file from disk."""
+    try:
+        if os.path.exists(storage_path):
+            os.remove(storage_path)
+    except Exception:
+        pass
+def delete_chat_attachments(chat_id: str):
+    """Delete all attachment files for a chat."""
+    chat_dir = os.path.join(config.ATTACHMENT_PATH, chat_id)
+    if os.path.isdir(chat_dir):
+        shutil.rmtree(chat_dir, ignore_errors=True)
+# ── Claude content block builders ────────────────────
+def build_claude_content_blocks(attachments: list) -> list[dict]:
+    """
+    Build Anthropic-compatible content blocks for a list of ChatAttachment objects.
+    Returns a list of content block dicts ready for the messages API.
+    """
+    blocks = []
+    for att in attachments:
+        try:
+            file_blocks = _process_single_attachment(att)
+            if isinstance(file_blocks, list):
+                blocks.extend(file_blocks)
+            elif file_blocks:
+                blocks.append(file_blocks)
+        except Exception as e:
+            blocks.append({
+                "type": "text",
+                "text": f"[Failed to process {att.original_filename}: {str(e)}]",
+            })
+    return blocks
+def _process_single_attachment(att) -> list[dict] | dict | None:
+    """Process a single attachment into Claude content blocks."""
+    if att.file_type == "image":
+        return _build_image_block(att)
+    elif att.file_type == "video":
+        return _build_video_blocks(att)
+    elif att.file_type == "document":
+        return _build_document_block(att)
+    elif att.file_type == "text":
+        return _build_text_block(att)
+    return None
+def _build_image_block(att) -> dict:
+    """Build an image content block. Resizes if needed."""
+    data = _read_and_resize_image(att.storage_path, att.mime_type)
+    mime = att.mime_type
+    # Claude only accepts specific image types
+    if mime not in IMAGE_MIMES:
+        mime = "image/jpeg"
+    return {
+        "type": "image",
+        "source": {
+            "type": "base64",
+            "media_type": mime,
+            "data": data,
+        },
+    }
+def _build_video_blocks(att) -> list[dict]:
+    """Extract frames from video and build image content blocks."""
+    frames = _extract_video_frames(att.storage_path)
+    if not frames:
+        return [{
+            "type": "text",
+            "text": f"[Video: {att.original_filename} — could not extract frames. ffmpeg may not be available.]",
+        }]
+    blocks = [{
+        "type": "text",
+        "text": f"[Video: {att.original_filename} — {len(frames)} key frames extracted]",
+    }]
+    for frame_b64 in frames:
+        blocks.append({
+            "type": "image",
+            "source": {
+                "type": "base64",
+                "media_type": "image/jpeg",
+                "data": frame_b64,
+            },
+        })
+    return blocks
+def _build_document_block(att) -> dict:
+    """Build a document content block for PDFs."""
+    if att.mime_type == "application/pdf":
+        with open(att.storage_path, "rb") as f:
+            data = base64.b64encode(f.read()).decode("utf-8")
+        return {
+            "type": "document",
+            "source": {
+                "type": "base64",
+                "media_type": "application/pdf",
+                "data": data,
+            },
+        }
+    # Non-PDF documents: fall back to text
+    return _build_text_block(att)
+def _build_text_block(att) -> dict:
+    """Build a text content block from a text-based file."""
+    text = att.text_extract
+    if not text:
+        text = _extract_text_content(att.storage_path, att.original_filename)
+    if not text:
+        text = f"[Could not extract text from {att.original_filename}]"
+    return {
+        "type": "text",
+        "text": f"--- File: {att.original_filename} ---\n{text}\n--- End of {att.original_filename} ---",
+    }
+# ── Image processing ─────────────────────────────────
+def _read_and_resize_image(path: str, mime_type: str) -> str:
+    """Read an image, resize if too large, return base64 string."""
+    try:
+        from PIL import Image
+        img = Image.open(path)
+        # Convert to RGB if necessary (handles RGBA, palette, etc.)
+        if img.mode in ("RGBA", "LA", "P"):
+            background = Image.new("RGB", img.size, (255, 255, 255))
+            if img.mode == "P":
+                img = img.convert("RGBA")
+            background.paste(img, mask=img.split()[-1] if "A" in img.mode else None)
+            img = background
+        elif img.mode != "RGB":
+            img = img.convert("RGB")
+        # Resize if either dimension exceeds max
+        max_dim = config.MAX_IMAGE_DIMENSION
+        if img.width > max_dim or img.height > max_dim:
+            ratio = min(max_dim / img.width, max_dim / img.height)
+            new_size = (int(img.width * ratio), int(img.height * ratio))
+            img = img.resize(new_size, Image.LANCZOS)
+        # Encode to JPEG for efficiency
+        buf = io.BytesIO()
+        fmt = "PNG" if mime_type == "image/png" else "JPEG"
+        save_kwargs = {"quality": 85} if fmt == "JPEG" else {}
+        img.save(buf, format=fmt, **save_kwargs)
+        return base64.b64encode(buf.getvalue()).decode("utf-8")
+    except ImportError:
+        # Pillow not installed — send raw
+        with open(path, "rb") as f:
+            return base64.b64encode(f.read()).decode("utf-8")
+    except Exception:
+        with open(path, "rb") as f:
+            return base64.b64encode(f.read()).decode("utf-8")
+# ── Video frame extraction ───────────────────────────
+def _extract_video_frames(video_path: str) -> list[str]:
+    """Extract key frames from a video using ffmpeg. Returns list of base64 JPEG strings."""
+    if not shutil.which("ffmpeg") or not shutil.which("ffprobe"):
+        return []
+    max_frames = config.MAX_VIDEO_FRAMES
+    frames = []
+    try:
+        # Get duration
+        result = subprocess.run(
+            [
+                "ffprobe", "-v", "error",
+                "-show_entries", "format=duration",
+                "-of", "default=noprint_wrappers=1:nokey=1",
+                video_path,
+            ],
+            capture_output=True, text=True, timeout=30,
+        )
+        duration = float(result.stdout.strip() or "0")
+        if duration <= 0:
+            return []
+        with tempfile.TemporaryDirectory() as tmpdir:
+            interval = duration / (max_frames + 1)
+            for i in range(max_frames):
+                timestamp = interval * (i + 1)
+                output = os.path.join(tmpdir, f"frame_{i}.jpg")
+                subprocess.run(
+                    [
+                        "ffmpeg", "-ss", str(timestamp),
+                        "-i", video_path,
+                        "-vframes", "1",
+                        "-vf", f"scale='min({config.MAX_IMAGE_DIMENSION},iw)':'min({config.MAX_IMAGE_DIMENSION},ih)':force_original_aspect_ratio=decrease",
+                        "-q:v", "3",
+                        output,
+                    ],
+                    capture_output=True, timeout=30,
+                )
+                if os.path.exists(output) and os.path.getsize(output) > 0:
+                    with open(output, "rb") as f:
+                        frames.append(base64.b64encode(f.read()).decode("utf-8"))
+    except Exception:
+        pass
+    return frames
+# ── Text extraction ──────────────────────────────────
+def _extract_text_content(path: str, filename: str) -> Optional[str]:
+    """Extract text from a text-based file."""
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            return f.read(500_000)  # Cap at 500K chars
+    except UnicodeDecodeError:
+        try:
+            with open(path, "r", encoding="latin-1") as f:
+                return f.read(500_000)
+        except Exception:
+            return None
+    except Exception:
+        return None
+def _extract_pdf_text(path: str) -> Optional[str]:
+    """Extract text from a PDF for storage/indexing."""
+    try:
+        from PyPDF2 import PdfReader
+        reader = PdfReader(path)
+        pages = []
+        for page in reader.pages[:100]:  # Cap at 100 pages
+            text = page.extract_text()
+            if text:
+                pages.append(text)
+        return "\n\n".join(pages) if pages else None
+    except Exception:
+        return None
\ No newline at end of file
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,4 +8,5 @@ python-multipart==0.0.20
 httpx==0.28.1
 chromadb==0.6.3
 PyPDF2==3.0.1
 pydantic==2.10.4
\ No newline at end of file
+Pillow==11.1.0
\ No newline at end of file