from TTS.api import TTS
import torch

# --- 1. Server Startup: Load Model ---
use_gpu = torch.cuda.is_available()
print(f"GPU Available: {use_gpu}")

# Initialize the model directly on the GPU if available
tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=use_gpu)
print("TTS model loaded.")


# --- 2. Server Startup: Pre-calculate Speaker Latents ---
SPEAKER_AUDIO_PATH = "calm_anan_1.wav" # Make sure this path is correct
print(f"Pre-calculating speaker latents from: {SPEAKER_AUDIO_PATH}")

try:
    gpt_cond_latent, speaker_embedding = tts.synthesizer.tts_model.get_conditioning_latents(audio_path=[SPEAKER_AUDIO_PATH])
    print("Speaker latents calculated and stored successfully.")
except Exception as e:
    print(f"Error calculating speaker latents: {e}")
    gpt_cond_latent, speaker_embedding = None, None


# --- 3. Inside Your API Endpoint (Handling a Request) ---
if gpt_cond_latent is not None:
    text1 = """This is the first sentence. It will use the pre-calculated voice.
This is the second sentence. It will use the same voice as the first one.
This is the third sentence. Again, same voice.
This is the fourth sentence. Still the same voice.
This is the fifth sentence. still the same voice.
This is the sixth sentence. still the same voice.
This is the seventh sentence. still the same voice.
This is the eighth sentence. still the same voice.
This is the ninth sentence. still the same voice.
"""
    print(f"Synthesizing: '{text1}'")

    # Call the .inference() method on the actual model object
    # This bypasses all the simple checks and uses our advanced parameters directly.
    out = tts.synthesizer.tts_model.inference(   # <--- CHANGE 1: Call .inference()
        text=text1,
        language="en",
        speaker_embedding=speaker_embedding,
        gpt_cond_latent=gpt_cond_latent
    )
    
    # The .inference() method returns a dictionary, the audio is in the 'wav' key
    wav_output_1 = out['wav'] # <--- CHANGE 2: Extract the audio from the dictionary

    print("Synthesis complete!")

    # For testing, save the output to a file
    import soundfile as sf
    sf.write("output_test.wav", wav_output_1, 24000)
    print("Test audio saved to output_test.wav")

else:
    print("Could not proceed with synthesis because speaker latents failed to compute.")