Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
AI Tutor
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Salma Mohammed Hamed
AI Tutor
Commits
6715bfd4
Commit
6715bfd4
authored
Sep 29, 2025
by
SalmaMohammedHamedMustafa
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
deployes TTS code on aws
parent
58fc13a9
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
0 additions
and
276 deletions
+0
-276
download_model.py
TTS/machine_code/download_model.py
+0
-8
download_model_en.py
TTS/machine_code/download_model_en.py
+0
-59
main_old.py
TTS/machine_code/main_old.py
+0
-156
setup_model.py
TTS/machine_code/setup_model.py
+0
-53
No files found.
TTS/machine_code/download_model.py
deleted
100644 → 0
View file @
58fc13a9
from
huggingface_hub
import
snapshot_download
snapshot_download
(
repo_id
=
"OmarSamir/EGTTS-V0.1"
,
repo_type
=
"model"
,
local_dir
=
"./model/EGTTS-V0.1"
,
local_dir_use_symlinks
=
False
)
TTS/machine_code/download_model_en.py
deleted
100644 → 0
View file @
58fc13a9
from
TTS.api
import
TTS
import
torch
# --- 1. Server Startup: Load Model ---
use_gpu
=
torch
.
cuda
.
is_available
()
print
(
f
"GPU Available: {use_gpu}"
)
# Initialize the model directly on the GPU if available
tts
=
TTS
(
model_name
=
"tts_models/multilingual/multi-dataset/xtts_v2"
,
gpu
=
use_gpu
)
print
(
"TTS model loaded."
)
# --- 2. Server Startup: Pre-calculate Speaker Latents ---
SPEAKER_AUDIO_PATH
=
"calm_anan_1.wav"
# Make sure this path is correct
print
(
f
"Pre-calculating speaker latents from: {SPEAKER_AUDIO_PATH}"
)
try
:
gpt_cond_latent
,
speaker_embedding
=
tts
.
synthesizer
.
tts_model
.
get_conditioning_latents
(
audio_path
=
[
SPEAKER_AUDIO_PATH
])
print
(
"Speaker latents calculated and stored successfully."
)
except
Exception
as
e
:
print
(
f
"Error calculating speaker latents: {e}"
)
gpt_cond_latent
,
speaker_embedding
=
None
,
None
# --- 3. Inside Your API Endpoint (Handling a Request) ---
if
gpt_cond_latent
is
not
None
:
text1
=
"""This is the first sentence. It will use the pre-calculated voice.
This is the second sentence. It will use the same voice as the first one.
This is the third sentence. Again, same voice.
This is the fourth sentence. Still the same voice.
This is the fifth sentence. still the same voice.
This is the sixth sentence. still the same voice.
This is the seventh sentence. still the same voice.
This is the eighth sentence. still the same voice.
This is the ninth sentence. still the same voice.
"""
print
(
f
"Synthesizing: '{text1}'"
)
# Call the .inference() method on the actual model object
# This bypasses all the simple checks and uses our advanced parameters directly.
out
=
tts
.
synthesizer
.
tts_model
.
inference
(
# <--- CHANGE 1: Call .inference()
text
=
text1
,
language
=
"en"
,
speaker_embedding
=
speaker_embedding
,
gpt_cond_latent
=
gpt_cond_latent
)
# The .inference() method returns a dictionary, the audio is in the 'wav' key
wav_output_1
=
out
[
'wav'
]
# <--- CHANGE 2: Extract the audio from the dictionary
print
(
"Synthesis complete!"
)
# For testing, save the output to a file
import
soundfile
as
sf
sf
.
write
(
"output_test.wav"
,
wav_output_1
,
24000
)
print
(
"Test audio saved to output_test.wav"
)
else
:
print
(
"Could not proceed with synthesis because speaker latents failed to compute."
)
\ No newline at end of file
TTS/machine_code/main_old.py
deleted
100644 → 0
View file @
58fc13a9
import
os
import
torch
import
soundfile
as
sf
import
io
import
warnings
import
logging
import
numpy
as
np
import
nltk
from
typing
import
Literal
from
fastapi
import
FastAPI
from
fastapi.responses
import
StreamingResponse
,
JSONResponse
from
pydantic
import
BaseModel
from
TTS.tts.configs.xtts_config
import
XttsConfig
from
TTS.tts.models.xtts
import
Xtts
from
TTS.api
import
TTS
# --- NEW & IMPROVED: Suppress Harmless Warnings and Logs ---
# 1. Suppress UserWarnings and FutureWarnings from various libraries.
# This handles the torchaudio warnings and many from transformers.
warnings
.
filterwarnings
(
'ignore'
,
category
=
UserWarning
)
warnings
.
filterwarnings
(
'ignore'
,
category
=
FutureWarning
)
# 2. Set the logging level for the 'transformers' library to ERROR.
# This will hide the informational messages (like the one about GPT2InferenceModel)
# without suppressing actual errors. This is the most effective way to clean the log.
logging
.
getLogger
(
"transformers"
)
.
setLevel
(
logging
.
ERROR
)
# --- Application Setup ---
app
=
FastAPI
()
# --- Global Variables for Models and Speaker Latents ---
model_ar
=
None
gpt_cond_latent_ar
=
None
speaker_embedding_ar
=
None
tts_en
=
None
gpt_cond_latent_en
=
None
speaker_embedding_en
=
None
# --- Text Splitting Helper Function (Unchanged) ---
def
split_text_into_chunks
(
text
:
str
,
max_chars
:
int
,
language
:
str
):
sentences
=
nltk
.
sent_tokenize
(
text
)
chunks
=
[]
current_chunk
=
""
for
sentence
in
sentences
:
if
len
(
current_chunk
)
+
len
(
sentence
)
+
1
<=
max_chars
:
current_chunk
+=
sentence
+
" "
else
:
if
current_chunk
:
chunks
.
append
(
current_chunk
.
strip
())
if
len
(
sentence
)
>
max_chars
:
words
=
sentence
.
split
()
temp_chunk
=
""
for
word
in
words
:
if
len
(
temp_chunk
)
+
len
(
word
)
+
1
<=
max_chars
:
temp_chunk
+=
word
+
" "
else
:
chunks
.
append
(
temp_chunk
.
strip
())
temp_chunk
=
word
+
" "
if
temp_chunk
:
chunks
.
append
(
temp_chunk
.
strip
())
else
:
current_chunk
=
sentence
+
" "
if
current_chunk
:
chunks
.
append
(
current_chunk
.
strip
())
return
chunks
# --- Model Loading (Unchanged) ---
@
app
.
on_event
(
"startup"
)
def
load_models
():
global
model_ar
,
gpt_cond_latent_ar
,
speaker_embedding_ar
global
tts_en
,
gpt_cond_latent_en
,
speaker_embedding_en
use_gpu
=
torch
.
cuda
.
is_available
()
print
(
f
"GPU Available: {use_gpu}"
)
print
(
"Server starting up: Loading the ARABIC TTS model..."
)
try
:
CONFIG_FILE_PATH_AR
=
'./model/EGTTS-V0.1/config.json'
VOCAB_FILE_PATH_AR
=
'./model/EGTTS-V0.1/vocab.json'
MODEL_PATH_AR
=
'./model/EGTTS-V0.1/'
SPEAKER_AUDIO_PATH_AR
=
'calm_anan_1.wav'
config_ar
=
XttsConfig
()
config_ar
.
load_json
(
CONFIG_FILE_PATH_AR
)
model_ar
=
Xtts
.
init_from_config
(
config_ar
)
model_ar
.
load_checkpoint
(
config_ar
,
checkpoint_dir
=
MODEL_PATH_AR
,
use_deepspeed
=
False
,
vocab_path
=
VOCAB_FILE_PATH_AR
)
if
use_gpu
:
model_ar
.
cuda
()
print
(
"Computing ARABIC speaker characteristics..."
)
gpt_cond_latent_ar
,
speaker_embedding_ar
=
model_ar
.
get_conditioning_latents
(
audio_path
=
[
SPEAKER_AUDIO_PATH_AR
])
print
(
"ARABIC model loaded successfully."
)
except
Exception
as
e
:
print
(
f
"FATAL ERROR: Could not load the ARABIC model. Error: {e}"
)
model_ar
=
None
print
(
"Server starting up: Loading the ENGLISH TTS model..."
)
try
:
tts_en
=
TTS
(
model_name
=
"tts_models/multilingual/multi-dataset/xtts_v2"
,
gpu
=
use_gpu
)
SPEAKER_AUDIO_PATH_EN
=
"calm_anan_1.wav"
print
(
"Computing ENGLISH speaker characteristics..."
)
gpt_cond_latent_en
,
speaker_embedding_en
=
tts_en
.
synthesizer
.
tts_model
.
get_conditioning_latents
(
audio_path
=
[
SPEAKER_AUDIO_PATH_EN
])
print
(
"ENGLISH model loaded successfully."
)
except
Exception
as
e
:
print
(
f
"FATAL ERROR: Could not load the ENGLISH model. Error: {e}"
)
tts_en
=
None
# --- Pydantic Model for Request Body (Unchanged) ---
class
SynthesisRequest
(
BaseModel
):
text
:
str
language
:
Literal
[
"ar"
,
"en"
]
# --- The Unified API Endpoint (Unchanged) ---
@
app
.
post
(
"/synthesize"
)
async
def
synthesize
(
request
:
SynthesisRequest
):
if
request
.
language
==
"ar"
and
model_ar
is
None
:
return
JSONResponse
(
content
=
{
"error"
:
"The Arabic model is not loaded."
},
status_code
=
503
)
if
request
.
language
==
"en"
and
tts_en
is
None
:
return
JSONResponse
(
content
=
{
"error"
:
"The English model is not loaded."
},
status_code
=
503
)
try
:
if
request
.
language
==
"ar"
:
char_limit
=
140
else
:
char_limit
=
220
text_chunks
=
split_text_into_chunks
(
request
.
text
,
char_limit
,
request
.
language
)
print
(
f
"Text split into {len(text_chunks)} chunks."
)
all_audio_chunks
=
[]
silence_duration_ms
=
300
silence_samples
=
np
.
zeros
(
int
(
24000
*
silence_duration_ms
/
1000
),
dtype
=
np
.
float32
)
for
i
,
chunk
in
enumerate
(
text_chunks
):
print
(
f
"Synthesizing chunk {i+1}/{len(text_chunks)}: '{chunk}'"
)
out
=
None
if
request
.
language
==
"ar"
:
out
=
model_ar
.
inference
(
chunk
,
"ar"
,
gpt_cond_latent_ar
,
speaker_embedding_ar
,
temperature
=
0.1
)
elif
request
.
language
==
"en"
:
out
=
tts_en
.
synthesizer
.
tts_model
.
inference
(
text
=
chunk
,
language
=
"en"
,
speaker_embedding
=
speaker_embedding_en
,
gpt_cond_latent
=
gpt_cond_latent_en
)
all_audio_chunks
.
append
(
out
[
"wav"
])
if
i
<
len
(
text_chunks
)
-
1
:
all_audio_chunks
.
append
(
silence_samples
)
final_audio
=
np
.
concatenate
(
all_audio_chunks
)
buffer
=
io
.
BytesIO
()
sf
.
write
(
buffer
,
final_audio
,
24000
,
format
=
'WAV'
)
buffer
.
seek
(
0
)
return
StreamingResponse
(
buffer
,
media_type
=
"audio/wav"
)
except
Exception
as
e
:
print
(
f
"An error occurred during audio generation: {e}"
)
return
JSONResponse
(
content
=
{
"error"
:
"Failed to generate audio"
},
status_code
=
500
)
# ssh -i "SalmaAI.pem" -L 5000:localhost:5000 ubuntu@ec2-18-193-226-85.eu-central-1.compute.amazonaws.com
TTS/machine_code/setup_model.py
deleted
100644 → 0
View file @
58fc13a9
import
os
import
torch
import
torchaudio
from
TTS.tts.configs.xtts_config
import
XttsConfig
from
TTS.tts.models.xtts
import
Xtts
CONFIG_FILE_PATH
=
'./model/EGTTS-V0.1/config.json'
VOCAB_FILE_PATH
=
'./model/EGTTS-V0.1/vocab.json'
MODEL_PATH
=
'./model/EGTTS-V0.1/'
print
(
"Loading model..."
)
config
=
XttsConfig
()
config
.
load_json
(
CONFIG_FILE_PATH
)
model
=
Xtts
.
init_from_config
(
config
)
model
.
load_checkpoint
(
config
,
checkpoint_dir
=
MODEL_PATH
,
use_deepspeed
=
False
,
vocab_path
=
VOCAB_FILE_PATH
)
# move model to GPU if available
if
torch
.
cuda
.
is_available
():
model
.
cuda
()
print
(
"Model moved to GPU."
)
# compute speaker latents
SPEAKER_AUDIO_PATH
=
'calm_anan_1.wav'
print
(
"Computing speaker latents..."
)
gpt_cond_latent
,
speaker_embedding
=
model
.
get_conditioning_latents
(
audio_path
=
[
SPEAKER_AUDIO_PATH
])
text
=
"""
انا عنان مؤسس شرع العلوم وانا هنا عشان اساعدك تتعلم اي حاجة عايز تتعلمها فالعلوم
انا شرع العلوم موقع تعليمي بيقدم كورسات مجانية في مجالات متعددة زي البرمجة، التصميم، التسويق، وغيرها
كل اللي عليك تعمله تزور الموقع وتختار الكورس اللي يناسبك وتبدأ تتعلم على طول من غير اي تكلفة
تحب تتعلم ايه النهاردة؟
اي اسئلة عندك انا هنا عشان اساعدك
اي استفسار انا تحت امرك
اسال زي ما انت عايز
في اي مجال تحب تتعلم اكتر؟
"""
print
(
"Inference..."
)
out
=
model
.
inference
(
text
,
"ar"
,
gpt_cond_latent
,
speaker_embedding
,
temperature
=
0.1
,
)
AUDIO_OUTPUT_PATH
=
"output_audio.wav"
import
soundfile
as
sf
sf
.
write
(
AUDIO_OUTPUT_PATH
,
out
[
"wav"
],
24000
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment