[voice] add support for whisper-1 model

Leslie0310 · Mar 8, 2023 · 74e7f7c · 74e7f7c
1 parent 7123559
commit 74e7f7c
Show file tree

Hide file tree

Showing 4 changed files with 13 additions and 7 deletions.
diff --git a/README.md b/README.md
@@ -72,6 +72,9 @@ cd chatgpt-on-wechat/
 pip3 install itchat-uos==1.5.0.dev0
 pip3 install --upgrade openai
 
+默认使用openai的whisper-1模型
+如果使用百度的语音识别，需要安装百度的pythonSDK
+pip3 install baidu-aip
 如果使用google的语音识别，需要安装speech_recognition和依赖的ffmpeg和espeak
 pip3 install SpeechRecognition
 --在MacOS中安装ffmpeg，brew install ffmpeg espeak

diff --git a/channel/wechat/wechat_channel.py b/channel/wechat/wechat_channel.py
@@ -5,6 +5,7 @@
 """
 
 import os
+import pathlib
 import itchat
 import json
 from itchat.content import *
@@ -37,11 +38,11 @@ def handler_single_voice(msg):
 
 
 class WechatChannel(Channel):
-    tmpFilePath = './tmp/'
+    tmpFilePath = pathlib.Path('./tmp/')
 
     def __init__(self):
-        isExists = os.path.exists(self.tmpFilePath)
-        if not isExists: 
+        pathExists = os.path.exists(self.tmpFilePath)
+        if not pathExists and conf().get('speech_recognition') == True: 
             os.makedirs(self.tmpFilePath)
 
     def startup(self):

diff --git a/voice/google/google_voice.py b/voice/google/google_voice.py
@@ -3,6 +3,7 @@
 google voice service
 """
 
+import pathlib
 import subprocess
 import time
 import speech_recognition
@@ -12,7 +13,6 @@
 
 
 class GoogleVoice(Voice):
-    tmpFilePath = './tmp/'
     recognizer = speech_recognition.Recognizer()
     engine = pyttsx3.init()
 

diff --git a/voice/openai/openai_voice.py b/voice/openai/openai_voice.py
@@ -4,19 +4,21 @@
 """
 import json
 import openai
+from config import conf
 from common.log import logger
 from voice.voice import Voice
 
 
 class OpenaiVoice(Voice):
     def __init__(self):
-        pass
+        openai.api_key = conf().get('open_ai_api_key')
 
     def voiceToText(self, voice_file):
+        logger.debug(
+            '[Openai] voice file name={}'.format(voice_file))
         file = open(voice_file, "rb")
         reply = openai.Audio.transcribe("whisper-1", file)
-        json_dict = json.loads(reply)
-        text = json_dict['text']
+        text = reply["text"]
         logger.info(
             '[Openai] voiceToText text={} voice file name={}'.format(text, voice_file))
         return text