Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
S
Son Of Anton
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Administrator
Son Of Anton
Commits
d55ca0c4
Commit
d55ca0c4
authored
Mar 19, 2026
by
Mahmoud Aglan
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fghfghjfghfb nfgmn
parent
41f45d5e
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
808 additions
and
18 deletions
+808
-18
Dockerfile
Dockerfile
+5
-17
attachment_routes.py
backend/routes/attachment_routes.py
+129
-0
attachment_routes_15.py
backend/routes/attachment_routes_15.py
+159
-0
attachment_routes_16.py
backend/routes/attachment_routes_16.py
+165
-0
attachment_service.py
backend/services/attachment_service.py
+348
-0
requirements.txt
requirements.txt
+2
-1
No files found.
Dockerfile
View file @
d55ca0c4
...
...
@@ -4,22 +4,10 @@
FROM
node:20-alpine AS frontend-build
WORKDIR
/build/frontend
# Copy everything so lockfile, configs (vite, tailwind, postcss) are all present
COPY
frontend/package.json frontend/package-lock.json* ./
RUN
npm
install
--legacy-peer-deps
COPY
frontend/ ./
# Install deps: use ci if lockfile exists, otherwise install and generate one
RUN if
[
-f
package-lock.json
]
;
then
\
echo
"📦 Found package-lock.json — running npm ci"
&&
\
npm ci
--legacy-peer-deps
;
\
else
\
echo
"⚠️ No package-lock.json — running npm install"
&&
\
npm
install
--legacy-peer-deps
;
\
fi
&&
\
npm cache clean
--force
# Build production bundle
RUN
NODE_ENV
=
production npm run build
RUN
npm run build
# ============================================
# Stage 2: Python Backend + Serve Frontend
...
...
@@ -28,6 +16,7 @@ FROM python:3.11-slim
RUN
apt-get update
&&
apt-get
install
-y
--no-install-recommends
\
build-essential
\
ffmpeg
\
&&
rm
-rf
/var/lib/apt/lists/
*
WORKDIR
/app
...
...
@@ -40,12 +29,11 @@ COPY backend/ ./backend/
COPY
--from=frontend-build /build/frontend/dist ./frontend/dist
# Warm up the ChromaDB embedding model so first request is fast
# Using a separate script file to avoid all quoting issues
COPY
warmup.py /tmp/warmup.py
RUN
python /tmp/warmup.py
&&
rm
/tmp/warmup.py
# Create persistent data directories
RUN
mkdir
-p
/data/chromadb /data/uploads
RUN
mkdir
-p
/data/chromadb /data/uploads
/data/uploads/chat_attachments
ENV
PYTHONUNBUFFERED=1
...
...
backend/routes/attachment_routes.py
0 → 100644
View file @
d55ca0c4
"""
Chat attachment upload, serve, and delete routes.
"""
import
os
from
fastapi
import
APIRouter
,
Depends
,
HTTPException
,
UploadFile
,
File
from
fastapi.responses
import
FileResponse
from
sqlalchemy.orm
import
Session
from
backend.database
import
get_db
from
backend.models
import
User
,
Chat
,
ChatAttachment
from
backend.auth
import
get_current_user
from
backend.services
import
attachment_service
from
backend.config
import
MAX_ATTACHMENT_BYTES
router
=
APIRouter
()
@
router
.
post
(
"/chats/{chat_id}/attachments"
)
async
def
upload_attachments
(
chat_id
:
str
,
files
:
list
[
UploadFile
]
=
File
(
...
),
user
:
User
=
Depends
(
get_current_user
),
db
:
Session
=
Depends
(
get_db
),
):
"""Upload one or more files as chat attachments. Returns attachment metadata."""
chat
=
db
.
query
(
Chat
)
.
filter
(
Chat
.
id
==
chat_id
,
Chat
.
user_id
==
user
.
id
)
.
first
()
if
not
chat
:
raise
HTTPException
(
404
,
"Chat not found"
)
results
=
[]
for
file
in
files
:
filename
=
file
.
filename
or
"file"
try
:
content
=
await
file
.
read
()
if
len
(
content
)
>
MAX_ATTACHMENT_BYTES
:
results
.
append
({
"error"
:
f
"File too large: {filename} ({len(content) // 1024 // 1024}MB). Max {MAX_ATTACHMENT_BYTES // 1024 // 1024}MB."
,
})
continue
meta
=
attachment_service
.
save_attachment
(
chat_id
=
chat_id
,
filename
=
filename
,
content
=
content
,
content_type
=
file
.
content_type
,
)
att
=
ChatAttachment
(
id
=
meta
[
"id"
],
chat_id
=
chat_id
,
filename
=
meta
[
"filename"
],
original_filename
=
meta
[
"original_filename"
],
mime_type
=
meta
[
"mime_type"
],
file_type
=
meta
[
"file_type"
],
file_size
=
meta
[
"file_size"
],
storage_path
=
meta
[
"storage_path"
],
text_extract
=
meta
.
get
(
"text_extract"
),
)
db
.
add
(
att
)
db
.
commit
()
db
.
refresh
(
att
)
results
.
append
(
_att_dict
(
att
))
except
Exception
as
e
:
results
.
append
({
"error"
:
f
"Failed to upload {filename}: {str(e)}"
})
return
{
"attachments"
:
results
}
@
router
.
get
(
"/attachments/{attachment_id}/file"
)
def
serve_attachment
(
attachment_id
:
str
,
user
:
User
=
Depends
(
get_current_user
),
db
:
Session
=
Depends
(
get_db
),
):
"""Serve an attachment file. Validates user owns the chat."""
att
=
db
.
query
(
ChatAttachment
)
.
filter
(
ChatAttachment
.
id
==
attachment_id
)
.
first
()
if
not
att
:
raise
HTTPException
(
404
,
"Attachment not found"
)
chat
=
db
.
query
(
Chat
)
.
filter
(
Chat
.
id
==
att
.
chat_id
)
.
first
()
if
not
chat
or
(
chat
.
user_id
!=
user
.
id
and
user
.
role
!=
"superadmin"
):
raise
HTTPException
(
403
,
"Access denied"
)
if
not
os
.
path
.
exists
(
att
.
storage_path
):
raise
HTTPException
(
404
,
"File not found on disk"
)
return
FileResponse
(
att
.
storage_path
,
media_type
=
att
.
mime_type
,
filename
=
att
.
original_filename
,
)
@
router
.
delete
(
"/attachments/{attachment_id}"
)
def
delete_attachment
(
attachment_id
:
str
,
user
:
User
=
Depends
(
get_current_user
),
db
:
Session
=
Depends
(
get_db
),
):
"""Delete a single attachment."""
att
=
db
.
query
(
ChatAttachment
)
.
filter
(
ChatAttachment
.
id
==
attachment_id
)
.
first
()
if
not
att
:
raise
HTTPException
(
404
)
chat
=
db
.
query
(
Chat
)
.
filter
(
Chat
.
id
==
att
.
chat_id
)
.
first
()
if
not
chat
or
(
chat
.
user_id
!=
user
.
id
and
user
.
role
!=
"superadmin"
):
raise
HTTPException
(
403
)
attachment_service
.
delete_attachment_file
(
att
.
storage_path
)
db
.
delete
(
att
)
db
.
commit
()
return
{
"ok"
:
True
}
def
_att_dict
(
att
:
ChatAttachment
)
->
dict
:
return
{
"id"
:
att
.
id
,
"chat_id"
:
att
.
chat_id
,
"message_id"
:
att
.
message_id
,
"filename"
:
att
.
filename
,
"original_filename"
:
att
.
original_filename
,
"mime_type"
:
att
.
mime_type
,
"file_type"
:
att
.
file_type
,
"file_size"
:
att
.
file_size
,
"created_at"
:
str
(
att
.
created_at
),
}
\ No newline at end of file
backend/routes/attachment_routes_15.py
0 → 100644
View file @
d55ca0c4
"""
Chat attachment upload, serve, and delete routes.
"""
import
os
from
typing
import
Optional
from
fastapi
import
APIRouter
,
Depends
,
HTTPException
,
UploadFile
,
File
,
Query
from
fastapi.responses
import
FileResponse
from
sqlalchemy.orm
import
Session
from
backend.database
import
get_db
from
backend.models
import
User
,
Chat
,
ChatAttachment
from
backend.auth
import
get_current_user
,
decode_token
from
backend.services
import
attachment_service
from
backend.config
import
MAX_ATTACHMENT_BYTES
router
=
APIRouter
()
@
router
.
post
(
"/chats/{chat_id}/attachments"
)
async
def
upload_attachments
(
chat_id
:
str
,
files
:
list
[
UploadFile
]
=
File
(
...
),
user
:
User
=
Depends
(
get_current_user
),
db
:
Session
=
Depends
(
get_db
),
):
"""Upload one or more files as chat attachments. Returns attachment metadata."""
chat
=
db
.
query
(
Chat
)
.
filter
(
Chat
.
id
==
chat_id
,
Chat
.
user_id
==
user
.
id
)
.
first
()
if
not
chat
:
raise
HTTPException
(
404
,
"Chat not found"
)
results
=
[]
for
file
in
files
:
filename
=
file
.
filename
or
"file"
try
:
content
=
await
file
.
read
()
if
len
(
content
)
>
MAX_ATTACHMENT_BYTES
:
results
.
append
({
"error"
:
f
"File too large: {filename} ({len(content) // 1024 // 1024}MB). Max {MAX_ATTACHMENT_BYTES // 1024 // 1024}MB."
,
})
continue
meta
=
attachment_service
.
save_attachment
(
chat_id
=
chat_id
,
filename
=
filename
,
content
=
content
,
content_type
=
file
.
content_type
,
)
att
=
ChatAttachment
(
id
=
meta
[
"id"
],
chat_id
=
chat_id
,
filename
=
meta
[
"filename"
],
original_filename
=
meta
[
"original_filename"
],
mime_type
=
meta
[
"mime_type"
],
file_type
=
meta
[
"file_type"
],
file_size
=
meta
[
"file_size"
],
storage_path
=
meta
[
"storage_path"
],
text_extract
=
meta
.
get
(
"text_extract"
),
)
db
.
add
(
att
)
db
.
commit
()
db
.
refresh
(
att
)
results
.
append
(
_att_dict
(
att
))
except
Exception
as
e
:
results
.
append
({
"error"
:
f
"Failed to upload {filename}: {str(e)}"
})
return
{
"attachments"
:
results
}
@
router
.
get
(
"/attachments/{attachment_id}/file"
)
def
serve_attachment
(
attachment_id
:
str
,
token
:
Optional
[
str
]
=
Query
(
None
),
user
:
Optional
[
User
]
=
Depends
(
_optional_current_user
),
db
:
Session
=
Depends
(
get_db
),
):
"""
Serve an attachment file.
Supports both Bearer header auth and ?token= query param
(needed for <img> tags that can't send headers).
"""
# Try query param auth if header auth didn't work
if
user
is
None
and
token
:
try
:
payload
=
decode_token
(
token
)
user
=
db
.
query
(
User
)
.
filter
(
User
.
id
==
payload
[
"sub"
])
.
first
()
except
Exception
:
pass
if
user
is
None
:
raise
HTTPException
(
401
,
"Authentication required"
)
att
=
db
.
query
(
ChatAttachment
)
.
filter
(
ChatAttachment
.
id
==
attachment_id
)
.
first
()
if
not
att
:
raise
HTTPException
(
404
,
"Attachment not found"
)
chat
=
db
.
query
(
Chat
)
.
filter
(
Chat
.
id
==
att
.
chat_id
)
.
first
()
if
not
chat
or
(
chat
.
user_id
!=
user
.
id
and
user
.
role
!=
"superadmin"
):
raise
HTTPException
(
403
,
"Access denied"
)
if
not
os
.
path
.
exists
(
att
.
storage_path
):
raise
HTTPException
(
404
,
"File not found on disk"
)
return
FileResponse
(
att
.
storage_path
,
media_type
=
att
.
mime_type
,
filename
=
att
.
original_filename
,
)
@
router
.
delete
(
"/attachments/{attachment_id}"
)
def
delete_attachment
(
attachment_id
:
str
,
user
:
User
=
Depends
(
get_current_user
),
db
:
Session
=
Depends
(
get_db
),
):
"""Delete a single attachment."""
att
=
db
.
query
(
ChatAttachment
)
.
filter
(
ChatAttachment
.
id
==
attachment_id
)
.
first
()
if
not
att
:
raise
HTTPException
(
404
)
chat
=
db
.
query
(
Chat
)
.
filter
(
Chat
.
id
==
att
.
chat_id
)
.
first
()
if
not
chat
or
(
chat
.
user_id
!=
user
.
id
and
user
.
role
!=
"superadmin"
):
raise
HTTPException
(
403
)
attachment_service
.
delete_attachment_file
(
att
.
storage_path
)
db
.
delete
(
att
)
db
.
commit
()
return
{
"ok"
:
True
}
def
_optional_current_user
(
db
:
Session
=
Depends
(
get_db
),
):
"""
A dependency that tries to get current user but returns None on failure.
This allows the endpoint to also accept ?token= query param.
"""
# This is a placeholder — the actual auth is handled in the route
# by checking both header and query param
return
None
def
_att_dict
(
att
:
ChatAttachment
)
->
dict
:
return
{
"id"
:
att
.
id
,
"chat_id"
:
att
.
chat_id
,
"message_id"
:
att
.
message_id
,
"filename"
:
att
.
filename
,
"original_filename"
:
att
.
original_filename
,
"mime_type"
:
att
.
mime_type
,
"file_type"
:
att
.
file_type
,
"file_size"
:
att
.
file_size
,
"created_at"
:
str
(
att
.
created_at
),
}
\ No newline at end of file
backend/routes/attachment_routes_16.py
0 → 100644
View file @
d55ca0c4
"""
Chat attachment upload, serve, and delete routes.
"""
import
os
from
typing
import
Optional
from
fastapi
import
APIRouter
,
Depends
,
HTTPException
,
UploadFile
,
File
,
Query
,
Request
from
fastapi.responses
import
FileResponse
from
sqlalchemy.orm
import
Session
from
backend.database
import
get_db
from
backend.models
import
User
,
Chat
,
ChatAttachment
from
backend.auth
import
get_current_user
,
decode_token
from
backend.services
import
attachment_service
from
backend.config
import
MAX_ATTACHMENT_BYTES
router
=
APIRouter
()
def
_get_user_from_request
(
request
:
Request
,
db
:
Session
,
token_param
:
Optional
[
str
]
=
None
)
->
User
:
"""
Resolve user from either:
1. Authorization: Bearer <token> header
2. ?token=<token> query parameter (for img/video tags)
"""
raw_token
=
None
# Try header first
auth_header
=
request
.
headers
.
get
(
"authorization"
,
""
)
if
auth_header
.
startswith
(
"Bearer "
):
raw_token
=
auth_header
[
7
:]
# Fall back to query param
if
not
raw_token
and
token_param
:
raw_token
=
token_param
if
not
raw_token
:
raise
HTTPException
(
401
,
"Authentication required"
)
payload
=
decode_token
(
raw_token
)
user
=
db
.
query
(
User
)
.
filter
(
User
.
id
==
payload
[
"sub"
])
.
first
()
if
not
user
or
not
user
.
is_active
:
raise
HTTPException
(
401
,
"User not found or inactive"
)
return
user
@
router
.
post
(
"/chats/{chat_id}/attachments"
)
async
def
upload_attachments
(
chat_id
:
str
,
files
:
list
[
UploadFile
]
=
File
(
...
),
user
:
User
=
Depends
(
get_current_user
),
db
:
Session
=
Depends
(
get_db
),
):
"""Upload one or more files as chat attachments. Returns attachment metadata."""
chat
=
db
.
query
(
Chat
)
.
filter
(
Chat
.
id
==
chat_id
,
Chat
.
user_id
==
user
.
id
)
.
first
()
if
not
chat
:
raise
HTTPException
(
404
,
"Chat not found"
)
results
=
[]
for
file
in
files
:
filename
=
file
.
filename
or
"file"
try
:
content
=
await
file
.
read
()
if
len
(
content
)
>
MAX_ATTACHMENT_BYTES
:
results
.
append
({
"error"
:
f
"File too large: {filename} ({len(content) // 1024 // 1024}MB). Max {MAX_ATTACHMENT_BYTES // 1024 // 1024}MB."
,
})
continue
meta
=
attachment_service
.
save_attachment
(
chat_id
=
chat_id
,
filename
=
filename
,
content
=
content
,
content_type
=
file
.
content_type
,
)
att
=
ChatAttachment
(
id
=
meta
[
"id"
],
chat_id
=
chat_id
,
filename
=
meta
[
"filename"
],
original_filename
=
meta
[
"original_filename"
],
mime_type
=
meta
[
"mime_type"
],
file_type
=
meta
[
"file_type"
],
file_size
=
meta
[
"file_size"
],
storage_path
=
meta
[
"storage_path"
],
text_extract
=
meta
.
get
(
"text_extract"
),
)
db
.
add
(
att
)
db
.
commit
()
db
.
refresh
(
att
)
results
.
append
(
_att_dict
(
att
))
except
Exception
as
e
:
results
.
append
({
"error"
:
f
"Failed to upload {filename}: {str(e)}"
})
return
{
"attachments"
:
results
}
@
router
.
get
(
"/attachments/{attachment_id}/file"
)
def
serve_attachment
(
attachment_id
:
str
,
request
:
Request
,
token
:
Optional
[
str
]
=
Query
(
None
),
db
:
Session
=
Depends
(
get_db
),
):
"""
Serve an attachment file.
Supports both Bearer header auth and ?token= query param
(needed for <img> tags that can't send headers).
"""
user
=
_get_user_from_request
(
request
,
db
,
token
)
att
=
db
.
query
(
ChatAttachment
)
.
filter
(
ChatAttachment
.
id
==
attachment_id
)
.
first
()
if
not
att
:
raise
HTTPException
(
404
,
"Attachment not found"
)
chat
=
db
.
query
(
Chat
)
.
filter
(
Chat
.
id
==
att
.
chat_id
)
.
first
()
if
not
chat
or
(
chat
.
user_id
!=
user
.
id
and
user
.
role
!=
"superadmin"
):
raise
HTTPException
(
403
,
"Access denied"
)
if
not
os
.
path
.
exists
(
att
.
storage_path
):
raise
HTTPException
(
404
,
"File not found on disk"
)
return
FileResponse
(
att
.
storage_path
,
media_type
=
att
.
mime_type
,
filename
=
att
.
original_filename
,
)
@
router
.
delete
(
"/attachments/{attachment_id}"
)
def
delete_attachment
(
attachment_id
:
str
,
user
:
User
=
Depends
(
get_current_user
),
db
:
Session
=
Depends
(
get_db
),
):
"""Delete a single attachment."""
att
=
db
.
query
(
ChatAttachment
)
.
filter
(
ChatAttachment
.
id
==
attachment_id
)
.
first
()
if
not
att
:
raise
HTTPException
(
404
)
chat
=
db
.
query
(
Chat
)
.
filter
(
Chat
.
id
==
att
.
chat_id
)
.
first
()
if
not
chat
or
(
chat
.
user_id
!=
user
.
id
and
user
.
role
!=
"superadmin"
):
raise
HTTPException
(
403
)
attachment_service
.
delete_attachment_file
(
att
.
storage_path
)
db
.
delete
(
att
)
db
.
commit
()
return
{
"ok"
:
True
}
def
_att_dict
(
att
:
ChatAttachment
)
->
dict
:
return
{
"id"
:
att
.
id
,
"chat_id"
:
att
.
chat_id
,
"message_id"
:
att
.
message_id
,
"filename"
:
att
.
filename
,
"original_filename"
:
att
.
original_filename
,
"mime_type"
:
att
.
mime_type
,
"file_type"
:
att
.
file_type
,
"file_size"
:
att
.
file_size
,
"created_at"
:
str
(
att
.
created_at
),
}
\ No newline at end of file
backend/services/attachment_service.py
0 → 100644
View file @
d55ca0c4
"""
Attachment processing service.
Handles images (resize + base64 for Claude vision),
videos (frame extraction via ffmpeg),
PDFs (native document support),
and text files (read content).
"""
import
os
import
io
import
base64
import
shutil
import
subprocess
import
tempfile
import
mimetypes
from
pathlib
import
Path
from
uuid
import
uuid4
from
typing
import
Optional
from
backend
import
config
os
.
makedirs
(
config
.
ATTACHMENT_PATH
,
exist_ok
=
True
)
# ── File type detection ──────────────────────────────
IMAGE_EXTENSIONS
=
{
".jpg"
,
".jpeg"
,
".png"
,
".gif"
,
".webp"
,
".bmp"
,
".tiff"
}
VIDEO_EXTENSIONS
=
{
".mp4"
,
".mov"
,
".avi"
,
".mkv"
,
".webm"
,
".flv"
,
".wmv"
,
".m4v"
}
PDF_EXTENSIONS
=
{
".pdf"
}
TEXT_EXTENSIONS
=
{
".txt"
,
".md"
,
".py"
,
".js"
,
".ts"
,
".jsx"
,
".tsx"
,
".cs"
,
".java"
,
".cpp"
,
".c"
,
".h"
,
".hpp"
,
".go"
,
".rs"
,
".rb"
,
".php"
,
".swift"
,
".kt"
,
".lua"
,
".gd"
,
".html"
,
".css"
,
".scss"
,
".json"
,
".yaml"
,
".yml"
,
".xml"
,
".toml"
,
".ini"
,
".cfg"
,
".conf"
,
".sh"
,
".bash"
,
".sql"
,
".r"
,
".dart"
,
".vue"
,
".svelte"
,
".csv"
,
".log"
,
".env"
,
".gitignore"
,
".dockerfile"
,
".makefile"
,
}
IMAGE_MIMES
=
{
"image/jpeg"
,
"image/png"
,
"image/gif"
,
"image/webp"
}
VIDEO_MIMES
=
{
"video/mp4"
,
"video/quicktime"
,
"video/x-msvideo"
,
"video/webm"
,
"video/x-matroska"
}
def
classify_file
(
filename
:
str
,
mime
:
str
)
->
str
:
"""Classify file into: image, video, document, text"""
ext
=
Path
(
filename
)
.
suffix
.
lower
()
if
ext
in
IMAGE_EXTENSIONS
or
mime
in
IMAGE_MIMES
:
return
"image"
if
ext
in
VIDEO_EXTENSIONS
or
mime
in
VIDEO_MIMES
:
return
"video"
if
ext
in
PDF_EXTENSIONS
or
mime
==
"application/pdf"
:
return
"document"
return
"text"
def
get_mime_type
(
filename
:
str
,
content_type
:
Optional
[
str
]
=
None
)
->
str
:
"""Determine MIME type."""
if
content_type
and
content_type
!=
"application/octet-stream"
:
return
content_type
mime
,
_
=
mimetypes
.
guess_type
(
filename
)
return
mime
or
"application/octet-stream"
# ── File storage ─────────────────────────────────────
def
save_attachment
(
chat_id
:
str
,
filename
:
str
,
content
:
bytes
,
content_type
:
Optional
[
str
]
=
None
)
->
dict
:
"""
Save an uploaded file to disk. Returns metadata dict.
"""
mime
=
get_mime_type
(
filename
,
content_type
)
file_type
=
classify_file
(
filename
,
mime
)
attachment_id
=
str
(
uuid4
())
# Create chat-specific directory
chat_dir
=
os
.
path
.
join
(
config
.
ATTACHMENT_PATH
,
chat_id
)
os
.
makedirs
(
chat_dir
,
exist_ok
=
True
)
# Sanitize filename
safe_name
=
Path
(
filename
)
.
name
.
replace
(
" "
,
"_"
)
stored_name
=
f
"{attachment_id}_{safe_name}"
storage_path
=
os
.
path
.
join
(
chat_dir
,
stored_name
)
with
open
(
storage_path
,
"wb"
)
as
f
:
f
.
write
(
content
)
# Extract text for text-based files
text_extract
=
None
if
file_type
==
"text"
:
text_extract
=
_extract_text_content
(
storage_path
,
filename
)
elif
file_type
==
"document"
:
text_extract
=
_extract_pdf_text
(
storage_path
)
return
{
"id"
:
attachment_id
,
"filename"
:
stored_name
,
"original_filename"
:
filename
,
"mime_type"
:
mime
,
"file_type"
:
file_type
,
"file_size"
:
len
(
content
),
"storage_path"
:
storage_path
,
"text_extract"
:
text_extract
,
}
def
delete_attachment_file
(
storage_path
:
str
):
"""Delete an attachment file from disk."""
try
:
if
os
.
path
.
exists
(
storage_path
):
os
.
remove
(
storage_path
)
except
Exception
:
pass
def
delete_chat_attachments
(
chat_id
:
str
):
"""Delete all attachment files for a chat."""
chat_dir
=
os
.
path
.
join
(
config
.
ATTACHMENT_PATH
,
chat_id
)
if
os
.
path
.
isdir
(
chat_dir
):
shutil
.
rmtree
(
chat_dir
,
ignore_errors
=
True
)
# ── Claude content block builders ────────────────────
def
build_claude_content_blocks
(
attachments
:
list
)
->
list
[
dict
]:
"""
Build Anthropic-compatible content blocks for a list of ChatAttachment objects.
Returns a list of content block dicts ready for the messages API.
"""
blocks
=
[]
for
att
in
attachments
:
try
:
file_blocks
=
_process_single_attachment
(
att
)
if
isinstance
(
file_blocks
,
list
):
blocks
.
extend
(
file_blocks
)
elif
file_blocks
:
blocks
.
append
(
file_blocks
)
except
Exception
as
e
:
blocks
.
append
({
"type"
:
"text"
,
"text"
:
f
"[Failed to process {att.original_filename}: {str(e)}]"
,
})
return
blocks
def
_process_single_attachment
(
att
)
->
list
[
dict
]
|
dict
|
None
:
"""Process a single attachment into Claude content blocks."""
if
att
.
file_type
==
"image"
:
return
_build_image_block
(
att
)
elif
att
.
file_type
==
"video"
:
return
_build_video_blocks
(
att
)
elif
att
.
file_type
==
"document"
:
return
_build_document_block
(
att
)
elif
att
.
file_type
==
"text"
:
return
_build_text_block
(
att
)
return
None
def
_build_image_block
(
att
)
->
dict
:
"""Build an image content block. Resizes if needed."""
data
=
_read_and_resize_image
(
att
.
storage_path
,
att
.
mime_type
)
mime
=
att
.
mime_type
# Claude only accepts specific image types
if
mime
not
in
IMAGE_MIMES
:
mime
=
"image/jpeg"
return
{
"type"
:
"image"
,
"source"
:
{
"type"
:
"base64"
,
"media_type"
:
mime
,
"data"
:
data
,
},
}
def
_build_video_blocks
(
att
)
->
list
[
dict
]:
"""Extract frames from video and build image content blocks."""
frames
=
_extract_video_frames
(
att
.
storage_path
)
if
not
frames
:
return
[{
"type"
:
"text"
,
"text"
:
f
"[Video: {att.original_filename} — could not extract frames. ffmpeg may not be available.]"
,
}]
blocks
=
[{
"type"
:
"text"
,
"text"
:
f
"[Video: {att.original_filename} — {len(frames)} key frames extracted]"
,
}]
for
frame_b64
in
frames
:
blocks
.
append
({
"type"
:
"image"
,
"source"
:
{
"type"
:
"base64"
,
"media_type"
:
"image/jpeg"
,
"data"
:
frame_b64
,
},
})
return
blocks
def
_build_document_block
(
att
)
->
dict
:
"""Build a document content block for PDFs."""
if
att
.
mime_type
==
"application/pdf"
:
with
open
(
att
.
storage_path
,
"rb"
)
as
f
:
data
=
base64
.
b64encode
(
f
.
read
())
.
decode
(
"utf-8"
)
return
{
"type"
:
"document"
,
"source"
:
{
"type"
:
"base64"
,
"media_type"
:
"application/pdf"
,
"data"
:
data
,
},
}
# Non-PDF documents: fall back to text
return
_build_text_block
(
att
)
def
_build_text_block
(
att
)
->
dict
:
"""Build a text content block from a text-based file."""
text
=
att
.
text_extract
if
not
text
:
text
=
_extract_text_content
(
att
.
storage_path
,
att
.
original_filename
)
if
not
text
:
text
=
f
"[Could not extract text from {att.original_filename}]"
return
{
"type"
:
"text"
,
"text"
:
f
"--- File: {att.original_filename} ---
\n
{text}
\n
--- End of {att.original_filename} ---"
,
}
# ── Image processing ─────────────────────────────────
def
_read_and_resize_image
(
path
:
str
,
mime_type
:
str
)
->
str
:
"""Read an image, resize if too large, return base64 string."""
try
:
from
PIL
import
Image
img
=
Image
.
open
(
path
)
# Convert to RGB if necessary (handles RGBA, palette, etc.)
if
img
.
mode
in
(
"RGBA"
,
"LA"
,
"P"
):
background
=
Image
.
new
(
"RGB"
,
img
.
size
,
(
255
,
255
,
255
))
if
img
.
mode
==
"P"
:
img
=
img
.
convert
(
"RGBA"
)
background
.
paste
(
img
,
mask
=
img
.
split
()[
-
1
]
if
"A"
in
img
.
mode
else
None
)
img
=
background
elif
img
.
mode
!=
"RGB"
:
img
=
img
.
convert
(
"RGB"
)
# Resize if either dimension exceeds max
max_dim
=
config
.
MAX_IMAGE_DIMENSION
if
img
.
width
>
max_dim
or
img
.
height
>
max_dim
:
ratio
=
min
(
max_dim
/
img
.
width
,
max_dim
/
img
.
height
)
new_size
=
(
int
(
img
.
width
*
ratio
),
int
(
img
.
height
*
ratio
))
img
=
img
.
resize
(
new_size
,
Image
.
LANCZOS
)
# Encode to JPEG for efficiency
buf
=
io
.
BytesIO
()
fmt
=
"PNG"
if
mime_type
==
"image/png"
else
"JPEG"
save_kwargs
=
{
"quality"
:
85
}
if
fmt
==
"JPEG"
else
{}
img
.
save
(
buf
,
format
=
fmt
,
**
save_kwargs
)
return
base64
.
b64encode
(
buf
.
getvalue
())
.
decode
(
"utf-8"
)
except
ImportError
:
# Pillow not installed — send raw
with
open
(
path
,
"rb"
)
as
f
:
return
base64
.
b64encode
(
f
.
read
())
.
decode
(
"utf-8"
)
except
Exception
:
with
open
(
path
,
"rb"
)
as
f
:
return
base64
.
b64encode
(
f
.
read
())
.
decode
(
"utf-8"
)
# ── Video frame extraction ───────────────────────────
def
_extract_video_frames
(
video_path
:
str
)
->
list
[
str
]:
"""Extract key frames from a video using ffmpeg. Returns list of base64 JPEG strings."""
if
not
shutil
.
which
(
"ffmpeg"
)
or
not
shutil
.
which
(
"ffprobe"
):
return
[]
max_frames
=
config
.
MAX_VIDEO_FRAMES
frames
=
[]
try
:
# Get duration
result
=
subprocess
.
run
(
[
"ffprobe"
,
"-v"
,
"error"
,
"-show_entries"
,
"format=duration"
,
"-of"
,
"default=noprint_wrappers=1:nokey=1"
,
video_path
,
],
capture_output
=
True
,
text
=
True
,
timeout
=
30
,
)
duration
=
float
(
result
.
stdout
.
strip
()
or
"0"
)
if
duration
<=
0
:
return
[]
with
tempfile
.
TemporaryDirectory
()
as
tmpdir
:
interval
=
duration
/
(
max_frames
+
1
)
for
i
in
range
(
max_frames
):
timestamp
=
interval
*
(
i
+
1
)
output
=
os
.
path
.
join
(
tmpdir
,
f
"frame_{i}.jpg"
)
subprocess
.
run
(
[
"ffmpeg"
,
"-ss"
,
str
(
timestamp
),
"-i"
,
video_path
,
"-vframes"
,
"1"
,
"-vf"
,
f
"scale='min({config.MAX_IMAGE_DIMENSION},iw)':'min({config.MAX_IMAGE_DIMENSION},ih)':force_original_aspect_ratio=decrease"
,
"-q:v"
,
"3"
,
output
,
],
capture_output
=
True
,
timeout
=
30
,
)
if
os
.
path
.
exists
(
output
)
and
os
.
path
.
getsize
(
output
)
>
0
:
with
open
(
output
,
"rb"
)
as
f
:
frames
.
append
(
base64
.
b64encode
(
f
.
read
())
.
decode
(
"utf-8"
))
except
Exception
:
pass
return
frames
# ── Text extraction ──────────────────────────────────
def
_extract_text_content
(
path
:
str
,
filename
:
str
)
->
Optional
[
str
]:
"""Extract text from a text-based file."""
try
:
with
open
(
path
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
return
f
.
read
(
500_000
)
# Cap at 500K chars
except
UnicodeDecodeError
:
try
:
with
open
(
path
,
"r"
,
encoding
=
"latin-1"
)
as
f
:
return
f
.
read
(
500_000
)
except
Exception
:
return
None
except
Exception
:
return
None
def
_extract_pdf_text
(
path
:
str
)
->
Optional
[
str
]:
"""Extract text from a PDF for storage/indexing."""
try
:
from
PyPDF2
import
PdfReader
reader
=
PdfReader
(
path
)
pages
=
[]
for
page
in
reader
.
pages
[:
100
]:
# Cap at 100 pages
text
=
page
.
extract_text
()
if
text
:
pages
.
append
(
text
)
return
"
\n\n
"
.
join
(
pages
)
if
pages
else
None
except
Exception
:
return
None
\ No newline at end of file
requirements.txt
View file @
d55ca0c4
...
...
@@ -8,4 +8,5 @@ python-multipart==0.0.20
httpx
==0.28.1
chromadb
==0.6.3
PyPDF2
==3.0.1
pydantic
==2.10.4
\ No newline at end of file
pydantic
==2.10.4
Pillow
==11.1.0
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment