Skip to content

Commit

Permalink
Merge pull request #30 from KeneNwogu/main
Browse files Browse the repository at this point in the history
fix: speech synthesis integration into streamlit runner
  • Loading branch information
Sammybams authored Oct 15, 2024
2 parents 2fcb821 + 01704e6 commit 19c1e63
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 15 deletions.
44 changes: 32 additions & 12 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
import logging
from dotenv import load_dotenv
from src.speech_io import transcribe_audio
from src.speech_io import transcribe_audio, synthesize_speech
from src.rag_functions import allowed_files, file_check_num, extract_contents_from_doc
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
Expand Down Expand Up @@ -95,42 +95,62 @@ def get_llm() -> ChatOpenAI:
else:
st.session_state.uploaded_files = None


def send_response(message, response=None):
dummy_response = "Hello. How are you?"
st.session_state.messages.append(('assistant', response or dummy_response))
# TODO: make async ??
print(response or dummy_response)
synthesize_speech(text=response or dummy_response)


# Chat area and audio input handling
def send_message():
prompt = st.session_state.prompt
st.session_state.messages.append(('user', prompt))

# get response turn it to speech and reply user
send_response(prompt)


if 'messages' not in st.session_state:
st.session_state.messages = []

message = st.container()

# Handle text input from user
if prompt := st.chat_input("Enter your query"):
message.chat_message("user").write(prompt)
# if prompt := st.chat_input("Enter your query"):
# message.chat_message("user").write(prompt)

# Handle audio input from user
audio_value = st.experimental_audio_input("Record a voice message")
if audio_value:
def handle_audio_message():
audio_value = st.session_state.audio_prompt
try:
with open("audio.wav", "wb") as f:
f.write(audio_value.getbuffer())

speech_text = transcribe_audio("audio.wav")
if speech_text:
message.chat_message("user").write(speech_text)
st.session_state.messages.append(("user", speech_text))
send_response(speech_text, "You have a great voice")
logging.info("Audio transcribed successfully.")
else:
message.chat_message("user").write("Sorry, I couldn't transcribe your audio. Please try again.")
# st.session_state.messages.append(("assistant", ))
send_response(speech_text, "Sorry, I couldn't transcribe your audio. Please try again.")
logging.warning("Audio transcription failed.")
except Exception as e:
st.error("An error occurred while processing the audio. Please try again.")
logging.error(f"Error processing audio input: {e}")
logging.error(f"Error processing audio input: {e}")


# Input area for user queries
st.chat_input("Enter your query", key='prompt', on_submit=send_message)

# Display chat messages
with message:
for role, text in st.session_state.messages:
st.chat_message(role).write(text)

# with message:
for role, text in st.session_state.messages:
st.chat_message(role).write(text)


# Handle audio input from user
audio_value = st.experimental_audio_input("Record a voice message", key="audio_prompt", on_change=handle_audio_message)
5 changes: 4 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ altair==5.4.1
annotated-types==0.7.0
anyio==4.6.0
attrs==24.2.0
azure-ai-formrecognizer==3.3.3
azure-identity==1.19.0
azure-cognitiveservices-speech==1.40.0
blinker==1.8.2
cachetools==5.5.0
Expand Down Expand Up @@ -68,4 +70,5 @@ tqdm==4.66.5
typing_extensions==4.12.2
tzdata==2024.2
urllib3==2.2.3
yarl==1.12.1
yarl==1.12.1
werkzeug==3.0.4
7 changes: 5 additions & 2 deletions src/speech_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,16 +77,19 @@ def synthesize_speech(text, output_file="output.wav", voice_name='en-NG-EzinneNe
if not SPEECH_KEY or not SPEECH_REGION:
return False, "Azure Speech Service credentials are missing."

output = open(output_file, 'w+')
output.close()

try:
# Configure speech service
speech_config = speechsdk.SpeechConfig(subscription=SPEECH_KEY, region=SPEECH_REGION)
audio_config = speechsdk.audio.AudioOutputConfig(filename=output_file)
# audio_config = speechsdk.audio.AudioOutputConfig(filename=output_file)

# Set the voice for synthesis
speech_config.speech_synthesis_voice_name = voice_name

# Create synthesizer and generate speech
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config)
result = speech_synthesizer.speak_text_async(text).get()

# Handle the result
Expand Down

0 comments on commit 19c1e63

Please sign in to comment.