28 lines
795 B
Python
28 lines
795 B
Python
|
|
"""
|
||
|
|
Voice output — converts text to speech using Fish Audio TTS API.
|
||
|
|
"""
|
||
|
|
import os
|
||
|
|
import asyncio
|
||
|
|
from fish_audio_sdk import Session, TTSRequest
|
||
|
|
|
||
|
|
async def speak(text: str) -> bytes:
|
||
|
|
"""
|
||
|
|
Convert text to speech audio bytes using Fish Audio TTS.
|
||
|
|
Returns raw audio bytes (MP3).
|
||
|
|
"""
|
||
|
|
api_key = os.getenv("FISH_AUDIO_API_KEY")
|
||
|
|
voice_id = os.getenv("FISH_AUDIO_VOICE_ID", None)
|
||
|
|
loop = asyncio.get_event_loop()
|
||
|
|
|
||
|
|
def _tts():
|
||
|
|
with Session(apikey=api_key) as session:
|
||
|
|
audio_chunks = []
|
||
|
|
for chunk in session.tts(TTSRequest(
|
||
|
|
text=text,
|
||
|
|
reference_id=voice_id
|
||
|
|
)):
|
||
|
|
audio_chunks.append(chunk)
|
||
|
|
return b"".join(audio_chunks)
|
||
|
|
|
||
|
|
return await loop.run_in_executor(None, _tts)
|