Commit d55ca0c4 authored by Mahmoud Aglan's avatar Mahmoud Aglan

fghfghjfghfb nfgmn

parent 41f45d5e
...@@ -4,22 +4,10 @@ ...@@ -4,22 +4,10 @@
FROM node:20-alpine AS frontend-build FROM node:20-alpine AS frontend-build
WORKDIR /build/frontend WORKDIR /build/frontend
COPY frontend/package.json frontend/package-lock.json* ./
# Copy everything so lockfile, configs (vite, tailwind, postcss) are all present RUN npm install --legacy-peer-deps
COPY frontend/ ./ COPY frontend/ ./
RUN npm run build
# Install deps: use ci if lockfile exists, otherwise install and generate one
RUN if [ -f package-lock.json ]; then \
echo "📦 Found package-lock.json — running npm ci" && \
npm ci --legacy-peer-deps; \
else \
echo "⚠️ No package-lock.json — running npm install" && \
npm install --legacy-peer-deps; \
fi && \
npm cache clean --force
# Build production bundle
RUN NODE_ENV=production npm run build
# ============================================ # ============================================
# Stage 2: Python Backend + Serve Frontend # Stage 2: Python Backend + Serve Frontend
...@@ -28,6 +16,7 @@ FROM python:3.11-slim ...@@ -28,6 +16,7 @@ FROM python:3.11-slim
RUN apt-get update && apt-get install -y --no-install-recommends \ RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \ build-essential \
ffmpeg \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
WORKDIR /app WORKDIR /app
...@@ -40,12 +29,11 @@ COPY backend/ ./backend/ ...@@ -40,12 +29,11 @@ COPY backend/ ./backend/
COPY --from=frontend-build /build/frontend/dist ./frontend/dist COPY --from=frontend-build /build/frontend/dist ./frontend/dist
# Warm up the ChromaDB embedding model so first request is fast # Warm up the ChromaDB embedding model so first request is fast
# Using a separate script file to avoid all quoting issues
COPY warmup.py /tmp/warmup.py COPY warmup.py /tmp/warmup.py
RUN python /tmp/warmup.py && rm /tmp/warmup.py RUN python /tmp/warmup.py && rm /tmp/warmup.py
# Create persistent data directories # Create persistent data directories
RUN mkdir -p /data/chromadb /data/uploads RUN mkdir -p /data/chromadb /data/uploads /data/uploads/chat_attachments
ENV PYTHONUNBUFFERED=1 ENV PYTHONUNBUFFERED=1
......
"""
Chat attachment upload, serve, and delete routes.
"""
import os
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File
from fastapi.responses import FileResponse
from sqlalchemy.orm import Session
from backend.database import get_db
from backend.models import User, Chat, ChatAttachment
from backend.auth import get_current_user
from backend.services import attachment_service
from backend.config import MAX_ATTACHMENT_BYTES
router = APIRouter()
@router.post("/chats/{chat_id}/attachments")
async def upload_attachments(
chat_id: str,
files: list[UploadFile] = File(...),
user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Upload one or more files as chat attachments. Returns attachment metadata."""
chat = db.query(Chat).filter(Chat.id == chat_id, Chat.user_id == user.id).first()
if not chat:
raise HTTPException(404, "Chat not found")
results = []
for file in files:
filename = file.filename or "file"
try:
content = await file.read()
if len(content) > MAX_ATTACHMENT_BYTES:
results.append({
"error": f"File too large: {filename} ({len(content) // 1024 // 1024}MB). Max {MAX_ATTACHMENT_BYTES // 1024 // 1024}MB.",
})
continue
meta = attachment_service.save_attachment(
chat_id=chat_id,
filename=filename,
content=content,
content_type=file.content_type,
)
att = ChatAttachment(
id=meta["id"],
chat_id=chat_id,
filename=meta["filename"],
original_filename=meta["original_filename"],
mime_type=meta["mime_type"],
file_type=meta["file_type"],
file_size=meta["file_size"],
storage_path=meta["storage_path"],
text_extract=meta.get("text_extract"),
)
db.add(att)
db.commit()
db.refresh(att)
results.append(_att_dict(att))
except Exception as e:
results.append({"error": f"Failed to upload {filename}: {str(e)}"})
return {"attachments": results}
@router.get("/attachments/{attachment_id}/file")
def serve_attachment(
attachment_id: str,
user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Serve an attachment file. Validates user owns the chat."""
att = db.query(ChatAttachment).filter(ChatAttachment.id == attachment_id).first()
if not att:
raise HTTPException(404, "Attachment not found")
chat = db.query(Chat).filter(Chat.id == att.chat_id).first()
if not chat or (chat.user_id != user.id and user.role != "superadmin"):
raise HTTPException(403, "Access denied")
if not os.path.exists(att.storage_path):
raise HTTPException(404, "File not found on disk")
return FileResponse(
att.storage_path,
media_type=att.mime_type,
filename=att.original_filename,
)
@router.delete("/attachments/{attachment_id}")
def delete_attachment(
attachment_id: str,
user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Delete a single attachment."""
att = db.query(ChatAttachment).filter(ChatAttachment.id == attachment_id).first()
if not att:
raise HTTPException(404)
chat = db.query(Chat).filter(Chat.id == att.chat_id).first()
if not chat or (chat.user_id != user.id and user.role != "superadmin"):
raise HTTPException(403)
attachment_service.delete_attachment_file(att.storage_path)
db.delete(att)
db.commit()
return {"ok": True}
def _att_dict(att: ChatAttachment) -> dict:
return {
"id": att.id,
"chat_id": att.chat_id,
"message_id": att.message_id,
"filename": att.filename,
"original_filename": att.original_filename,
"mime_type": att.mime_type,
"file_type": att.file_type,
"file_size": att.file_size,
"created_at": str(att.created_at),
}
\ No newline at end of file
"""
Chat attachment upload, serve, and delete routes.
"""
import os
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Query
from fastapi.responses import FileResponse
from sqlalchemy.orm import Session
from backend.database import get_db
from backend.models import User, Chat, ChatAttachment
from backend.auth import get_current_user, decode_token
from backend.services import attachment_service
from backend.config import MAX_ATTACHMENT_BYTES
router = APIRouter()
@router.post("/chats/{chat_id}/attachments")
async def upload_attachments(
chat_id: str,
files: list[UploadFile] = File(...),
user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Upload one or more files as chat attachments. Returns attachment metadata."""
chat = db.query(Chat).filter(Chat.id == chat_id, Chat.user_id == user.id).first()
if not chat:
raise HTTPException(404, "Chat not found")
results = []
for file in files:
filename = file.filename or "file"
try:
content = await file.read()
if len(content) > MAX_ATTACHMENT_BYTES:
results.append({
"error": f"File too large: {filename} ({len(content) // 1024 // 1024}MB). Max {MAX_ATTACHMENT_BYTES // 1024 // 1024}MB.",
})
continue
meta = attachment_service.save_attachment(
chat_id=chat_id,
filename=filename,
content=content,
content_type=file.content_type,
)
att = ChatAttachment(
id=meta["id"],
chat_id=chat_id,
filename=meta["filename"],
original_filename=meta["original_filename"],
mime_type=meta["mime_type"],
file_type=meta["file_type"],
file_size=meta["file_size"],
storage_path=meta["storage_path"],
text_extract=meta.get("text_extract"),
)
db.add(att)
db.commit()
db.refresh(att)
results.append(_att_dict(att))
except Exception as e:
results.append({"error": f"Failed to upload {filename}: {str(e)}"})
return {"attachments": results}
@router.get("/attachments/{attachment_id}/file")
def serve_attachment(
attachment_id: str,
token: Optional[str] = Query(None),
user: Optional[User] = Depends(_optional_current_user),
db: Session = Depends(get_db),
):
"""
Serve an attachment file.
Supports both Bearer header auth and ?token= query param
(needed for <img> tags that can't send headers).
"""
# Try query param auth if header auth didn't work
if user is None and token:
try:
payload = decode_token(token)
user = db.query(User).filter(User.id == payload["sub"]).first()
except Exception:
pass
if user is None:
raise HTTPException(401, "Authentication required")
att = db.query(ChatAttachment).filter(ChatAttachment.id == attachment_id).first()
if not att:
raise HTTPException(404, "Attachment not found")
chat = db.query(Chat).filter(Chat.id == att.chat_id).first()
if not chat or (chat.user_id != user.id and user.role != "superadmin"):
raise HTTPException(403, "Access denied")
if not os.path.exists(att.storage_path):
raise HTTPException(404, "File not found on disk")
return FileResponse(
att.storage_path,
media_type=att.mime_type,
filename=att.original_filename,
)
@router.delete("/attachments/{attachment_id}")
def delete_attachment(
attachment_id: str,
user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Delete a single attachment."""
att = db.query(ChatAttachment).filter(ChatAttachment.id == attachment_id).first()
if not att:
raise HTTPException(404)
chat = db.query(Chat).filter(Chat.id == att.chat_id).first()
if not chat or (chat.user_id != user.id and user.role != "superadmin"):
raise HTTPException(403)
attachment_service.delete_attachment_file(att.storage_path)
db.delete(att)
db.commit()
return {"ok": True}
def _optional_current_user(
db: Session = Depends(get_db),
):
"""
A dependency that tries to get current user but returns None on failure.
This allows the endpoint to also accept ?token= query param.
"""
# This is a placeholder — the actual auth is handled in the route
# by checking both header and query param
return None
def _att_dict(att: ChatAttachment) -> dict:
return {
"id": att.id,
"chat_id": att.chat_id,
"message_id": att.message_id,
"filename": att.filename,
"original_filename": att.original_filename,
"mime_type": att.mime_type,
"file_type": att.file_type,
"file_size": att.file_size,
"created_at": str(att.created_at),
}
\ No newline at end of file
"""
Chat attachment upload, serve, and delete routes.
"""
import os
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Query, Request
from fastapi.responses import FileResponse
from sqlalchemy.orm import Session
from backend.database import get_db
from backend.models import User, Chat, ChatAttachment
from backend.auth import get_current_user, decode_token
from backend.services import attachment_service
from backend.config import MAX_ATTACHMENT_BYTES
router = APIRouter()
def _get_user_from_request(request: Request, db: Session, token_param: Optional[str] = None) -> User:
"""
Resolve user from either:
1. Authorization: Bearer <token> header
2. ?token=<token> query parameter (for img/video tags)
"""
raw_token = None
# Try header first
auth_header = request.headers.get("authorization", "")
if auth_header.startswith("Bearer "):
raw_token = auth_header[7:]
# Fall back to query param
if not raw_token and token_param:
raw_token = token_param
if not raw_token:
raise HTTPException(401, "Authentication required")
payload = decode_token(raw_token)
user = db.query(User).filter(User.id == payload["sub"]).first()
if not user or not user.is_active:
raise HTTPException(401, "User not found or inactive")
return user
@router.post("/chats/{chat_id}/attachments")
async def upload_attachments(
chat_id: str,
files: list[UploadFile] = File(...),
user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Upload one or more files as chat attachments. Returns attachment metadata."""
chat = db.query(Chat).filter(Chat.id == chat_id, Chat.user_id == user.id).first()
if not chat:
raise HTTPException(404, "Chat not found")
results = []
for file in files:
filename = file.filename or "file"
try:
content = await file.read()
if len(content) > MAX_ATTACHMENT_BYTES:
results.append({
"error": f"File too large: {filename} ({len(content) // 1024 // 1024}MB). Max {MAX_ATTACHMENT_BYTES // 1024 // 1024}MB.",
})
continue
meta = attachment_service.save_attachment(
chat_id=chat_id,
filename=filename,
content=content,
content_type=file.content_type,
)
att = ChatAttachment(
id=meta["id"],
chat_id=chat_id,
filename=meta["filename"],
original_filename=meta["original_filename"],
mime_type=meta["mime_type"],
file_type=meta["file_type"],
file_size=meta["file_size"],
storage_path=meta["storage_path"],
text_extract=meta.get("text_extract"),
)
db.add(att)
db.commit()
db.refresh(att)
results.append(_att_dict(att))
except Exception as e:
results.append({"error": f"Failed to upload {filename}: {str(e)}"})
return {"attachments": results}
@router.get("/attachments/{attachment_id}/file")
def serve_attachment(
attachment_id: str,
request: Request,
token: Optional[str] = Query(None),
db: Session = Depends(get_db),
):
"""
Serve an attachment file.
Supports both Bearer header auth and ?token= query param
(needed for <img> tags that can't send headers).
"""
user = _get_user_from_request(request, db, token)
att = db.query(ChatAttachment).filter(ChatAttachment.id == attachment_id).first()
if not att:
raise HTTPException(404, "Attachment not found")
chat = db.query(Chat).filter(Chat.id == att.chat_id).first()
if not chat or (chat.user_id != user.id and user.role != "superadmin"):
raise HTTPException(403, "Access denied")
if not os.path.exists(att.storage_path):
raise HTTPException(404, "File not found on disk")
return FileResponse(
att.storage_path,
media_type=att.mime_type,
filename=att.original_filename,
)
@router.delete("/attachments/{attachment_id}")
def delete_attachment(
attachment_id: str,
user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Delete a single attachment."""
att = db.query(ChatAttachment).filter(ChatAttachment.id == attachment_id).first()
if not att:
raise HTTPException(404)
chat = db.query(Chat).filter(Chat.id == att.chat_id).first()
if not chat or (chat.user_id != user.id and user.role != "superadmin"):
raise HTTPException(403)
attachment_service.delete_attachment_file(att.storage_path)
db.delete(att)
db.commit()
return {"ok": True}
def _att_dict(att: ChatAttachment) -> dict:
return {
"id": att.id,
"chat_id": att.chat_id,
"message_id": att.message_id,
"filename": att.filename,
"original_filename": att.original_filename,
"mime_type": att.mime_type,
"file_type": att.file_type,
"file_size": att.file_size,
"created_at": str(att.created_at),
}
\ No newline at end of file
"""
Attachment processing service.
Handles images (resize + base64 for Claude vision),
videos (frame extraction via ffmpeg),
PDFs (native document support),
and text files (read content).
"""
import os
import io
import base64
import shutil
import subprocess
import tempfile
import mimetypes
from pathlib import Path
from uuid import uuid4
from typing import Optional
from backend import config
os.makedirs(config.ATTACHMENT_PATH, exist_ok=True)
# ── File type detection ──────────────────────────────
IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff"}
VIDEO_EXTENSIONS = {".mp4", ".mov", ".avi", ".mkv", ".webm", ".flv", ".wmv", ".m4v"}
PDF_EXTENSIONS = {".pdf"}
TEXT_EXTENSIONS = {
".txt", ".md", ".py", ".js", ".ts", ".jsx", ".tsx", ".cs", ".java",
".cpp", ".c", ".h", ".hpp", ".go", ".rs", ".rb", ".php", ".swift",
".kt", ".lua", ".gd", ".html", ".css", ".scss", ".json", ".yaml",
".yml", ".xml", ".toml", ".ini", ".cfg", ".conf", ".sh", ".bash",
".sql", ".r", ".dart", ".vue", ".svelte", ".csv", ".log", ".env",
".gitignore", ".dockerfile", ".makefile",
}
IMAGE_MIMES = {"image/jpeg", "image/png", "image/gif", "image/webp"}
VIDEO_MIMES = {"video/mp4", "video/quicktime", "video/x-msvideo", "video/webm", "video/x-matroska"}
def classify_file(filename: str, mime: str) -> str:
"""Classify file into: image, video, document, text"""
ext = Path(filename).suffix.lower()
if ext in IMAGE_EXTENSIONS or mime in IMAGE_MIMES:
return "image"
if ext in VIDEO_EXTENSIONS or mime in VIDEO_MIMES:
return "video"
if ext in PDF_EXTENSIONS or mime == "application/pdf":
return "document"
return "text"
def get_mime_type(filename: str, content_type: Optional[str] = None) -> str:
"""Determine MIME type."""
if content_type and content_type != "application/octet-stream":
return content_type
mime, _ = mimetypes.guess_type(filename)
return mime or "application/octet-stream"
# ── File storage ─────────────────────────────────────
def save_attachment(chat_id: str, filename: str, content: bytes, content_type: Optional[str] = None) -> dict:
"""
Save an uploaded file to disk. Returns metadata dict.
"""
mime = get_mime_type(filename, content_type)
file_type = classify_file(filename, mime)
attachment_id = str(uuid4())
# Create chat-specific directory
chat_dir = os.path.join(config.ATTACHMENT_PATH, chat_id)
os.makedirs(chat_dir, exist_ok=True)
# Sanitize filename
safe_name = Path(filename).name.replace(" ", "_")
stored_name = f"{attachment_id}_{safe_name}"
storage_path = os.path.join(chat_dir, stored_name)
with open(storage_path, "wb") as f:
f.write(content)
# Extract text for text-based files
text_extract = None
if file_type == "text":
text_extract = _extract_text_content(storage_path, filename)
elif file_type == "document":
text_extract = _extract_pdf_text(storage_path)
return {
"id": attachment_id,
"filename": stored_name,
"original_filename": filename,
"mime_type": mime,
"file_type": file_type,
"file_size": len(content),
"storage_path": storage_path,
"text_extract": text_extract,
}
def delete_attachment_file(storage_path: str):
"""Delete an attachment file from disk."""
try:
if os.path.exists(storage_path):
os.remove(storage_path)
except Exception:
pass
def delete_chat_attachments(chat_id: str):
"""Delete all attachment files for a chat."""
chat_dir = os.path.join(config.ATTACHMENT_PATH, chat_id)
if os.path.isdir(chat_dir):
shutil.rmtree(chat_dir, ignore_errors=True)
# ── Claude content block builders ────────────────────
def build_claude_content_blocks(attachments: list) -> list[dict]:
"""
Build Anthropic-compatible content blocks for a list of ChatAttachment objects.
Returns a list of content block dicts ready for the messages API.
"""
blocks = []
for att in attachments:
try:
file_blocks = _process_single_attachment(att)
if isinstance(file_blocks, list):
blocks.extend(file_blocks)
elif file_blocks:
blocks.append(file_blocks)
except Exception as e:
blocks.append({
"type": "text",
"text": f"[Failed to process {att.original_filename}: {str(e)}]",
})
return blocks
def _process_single_attachment(att) -> list[dict] | dict | None:
"""Process a single attachment into Claude content blocks."""
if att.file_type == "image":
return _build_image_block(att)
elif att.file_type == "video":
return _build_video_blocks(att)
elif att.file_type == "document":
return _build_document_block(att)
elif att.file_type == "text":
return _build_text_block(att)
return None
def _build_image_block(att) -> dict:
"""Build an image content block. Resizes if needed."""
data = _read_and_resize_image(att.storage_path, att.mime_type)
mime = att.mime_type
# Claude only accepts specific image types
if mime not in IMAGE_MIMES:
mime = "image/jpeg"
return {
"type": "image",
"source": {
"type": "base64",
"media_type": mime,
"data": data,
},
}
def _build_video_blocks(att) -> list[dict]:
"""Extract frames from video and build image content blocks."""
frames = _extract_video_frames(att.storage_path)
if not frames:
return [{
"type": "text",
"text": f"[Video: {att.original_filename} — could not extract frames. ffmpeg may not be available.]",
}]
blocks = [{
"type": "text",
"text": f"[Video: {att.original_filename} — {len(frames)} key frames extracted]",
}]
for frame_b64 in frames:
blocks.append({
"type": "image",
"source": {
"type": "base64",
"media_type": "image/jpeg",
"data": frame_b64,
},
})
return blocks
def _build_document_block(att) -> dict:
"""Build a document content block for PDFs."""
if att.mime_type == "application/pdf":
with open(att.storage_path, "rb") as f:
data = base64.b64encode(f.read()).decode("utf-8")
return {
"type": "document",
"source": {
"type": "base64",
"media_type": "application/pdf",
"data": data,
},
}
# Non-PDF documents: fall back to text
return _build_text_block(att)
def _build_text_block(att) -> dict:
"""Build a text content block from a text-based file."""
text = att.text_extract
if not text:
text = _extract_text_content(att.storage_path, att.original_filename)
if not text:
text = f"[Could not extract text from {att.original_filename}]"
return {
"type": "text",
"text": f"--- File: {att.original_filename} ---\n{text}\n--- End of {att.original_filename} ---",
}
# ── Image processing ─────────────────────────────────
def _read_and_resize_image(path: str, mime_type: str) -> str:
"""Read an image, resize if too large, return base64 string."""
try:
from PIL import Image
img = Image.open(path)
# Convert to RGB if necessary (handles RGBA, palette, etc.)
if img.mode in ("RGBA", "LA", "P"):
background = Image.new("RGB", img.size, (255, 255, 255))
if img.mode == "P":
img = img.convert("RGBA")
background.paste(img, mask=img.split()[-1] if "A" in img.mode else None)
img = background
elif img.mode != "RGB":
img = img.convert("RGB")
# Resize if either dimension exceeds max
max_dim = config.MAX_IMAGE_DIMENSION
if img.width > max_dim or img.height > max_dim:
ratio = min(max_dim / img.width, max_dim / img.height)
new_size = (int(img.width * ratio), int(img.height * ratio))
img = img.resize(new_size, Image.LANCZOS)
# Encode to JPEG for efficiency
buf = io.BytesIO()
fmt = "PNG" if mime_type == "image/png" else "JPEG"
save_kwargs = {"quality": 85} if fmt == "JPEG" else {}
img.save(buf, format=fmt, **save_kwargs)
return base64.b64encode(buf.getvalue()).decode("utf-8")
except ImportError:
# Pillow not installed — send raw
with open(path, "rb") as f:
return base64.b64encode(f.read()).decode("utf-8")
except Exception:
with open(path, "rb") as f:
return base64.b64encode(f.read()).decode("utf-8")
# ── Video frame extraction ───────────────────────────
def _extract_video_frames(video_path: str) -> list[str]:
"""Extract key frames from a video using ffmpeg. Returns list of base64 JPEG strings."""
if not shutil.which("ffmpeg") or not shutil.which("ffprobe"):
return []
max_frames = config.MAX_VIDEO_FRAMES
frames = []
try:
# Get duration
result = subprocess.run(
[
"ffprobe", "-v", "error",
"-show_entries", "format=duration",
"-of", "default=noprint_wrappers=1:nokey=1",
video_path,
],
capture_output=True, text=True, timeout=30,
)
duration = float(result.stdout.strip() or "0")
if duration <= 0:
return []
with tempfile.TemporaryDirectory() as tmpdir:
interval = duration / (max_frames + 1)
for i in range(max_frames):
timestamp = interval * (i + 1)
output = os.path.join(tmpdir, f"frame_{i}.jpg")
subprocess.run(
[
"ffmpeg", "-ss", str(timestamp),
"-i", video_path,
"-vframes", "1",
"-vf", f"scale='min({config.MAX_IMAGE_DIMENSION},iw)':'min({config.MAX_IMAGE_DIMENSION},ih)':force_original_aspect_ratio=decrease",
"-q:v", "3",
output,
],
capture_output=True, timeout=30,
)
if os.path.exists(output) and os.path.getsize(output) > 0:
with open(output, "rb") as f:
frames.append(base64.b64encode(f.read()).decode("utf-8"))
except Exception:
pass
return frames
# ── Text extraction ──────────────────────────────────
def _extract_text_content(path: str, filename: str) -> Optional[str]:
"""Extract text from a text-based file."""
try:
with open(path, "r", encoding="utf-8") as f:
return f.read(500_000) # Cap at 500K chars
except UnicodeDecodeError:
try:
with open(path, "r", encoding="latin-1") as f:
return f.read(500_000)
except Exception:
return None
except Exception:
return None
def _extract_pdf_text(path: str) -> Optional[str]:
"""Extract text from a PDF for storage/indexing."""
try:
from PyPDF2 import PdfReader
reader = PdfReader(path)
pages = []
for page in reader.pages[:100]: # Cap at 100 pages
text = page.extract_text()
if text:
pages.append(text)
return "\n\n".join(pages) if pages else None
except Exception:
return None
\ No newline at end of file
...@@ -8,4 +8,5 @@ python-multipart==0.0.20 ...@@ -8,4 +8,5 @@ python-multipart==0.0.20
httpx==0.28.1 httpx==0.28.1
chromadb==0.6.3 chromadb==0.6.3
PyPDF2==3.0.1 PyPDF2==3.0.1
pydantic==2.10.4 pydantic==2.10.4
\ No newline at end of file Pillow==11.1.0
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment