-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvoice_chat_agent.py
74 lines (60 loc) · 2.73 KB
/
voice_chat_agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from voice_recognition import VoiceRecognizer, select_audio_device
from llm_thinking import LLMThinker
from text_to_speech import TTSGenerator
class VoiceAgent:
def __init__(self, device_id=1):
print("\nInitializing Voice Chat Agent...")
self.recognizer = VoiceRecognizer(device_id)
self.thinker = LLMThinker()
self.tts = TTSGenerator(default_voice='af_heart')
print("Voice Chat Agent initialization complete!")
def cleanup(self):
"""Clean up all resources."""
if hasattr(self, 'recognizer'):
self.recognizer.cleanup()
if hasattr(self, 'tts'):
self.tts.cleanup()
def chat_loop(self):
print("\nVoice Chat Agent ready! Press Ctrl+C to exit")
print("Make sure LM Studio is running and the API is active!")
print("Speak clearly into your microphone. You should see █ when voice is detected.")
try:
while True:
try:
# Record and transcribe audio
audio_data = self.recognizer.record_audio()
if audio_data is not None and len(audio_data) > 0:
# Convert speech to text
text = self.recognizer.transcribe_audio(audio_data)
print(f"\nYou said: {text}")
if text.lower() in ['quit', 'exit', 'goodbye', 'bye']:
print("\nGoodbye!")
break
if not text.strip():
print("\nNo speech detected, trying again...")
continue
# Get LLM response
response = self.thinker.get_response(text)
# Convert response to speech
print("\nSpeaking...")
self.tts.generate_speech(response)
else:
print("\nNo audio recorded, trying again...")
except Exception as e:
print(f"\nError in conversation loop: {e}")
print("Restarting recording...")
except KeyboardInterrupt:
print("\nExiting...")
except Exception as e:
print(f"\nError: {e}")
print("Make sure LM Studio is running and the API is active!")
finally:
self.cleanup()
def main():
device_id = select_audio_device()
agent = VoiceAgent(
device_id=device_id # Selected device
)
agent.chat_loop()
if __name__ == "__main__":
main()