Skip to content

Commit

Permalink
[voice] add support for whisper-1 model
Browse files Browse the repository at this point in the history
  • Loading branch information
wanggang1987 committed Mar 8, 2023
1 parent 7123559 commit 74e7f7c
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 7 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ cd chatgpt-on-wechat/
pip3 install itchat-uos==1.5.0.dev0
pip3 install --upgrade openai

默认使用openai的whisper-1模型
如果使用百度的语音识别,需要安装百度的pythonSDK
pip3 install baidu-aip
如果使用google的语音识别,需要安装speech_recognition和依赖的ffmpeg和espeak
pip3 install SpeechRecognition
--在MacOS中安装ffmpeg,brew install ffmpeg espeak
Expand Down
7 changes: 4 additions & 3 deletions channel/wechat/wechat_channel.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""

import os
import pathlib
import itchat
import json
from itchat.content import *
Expand Down Expand Up @@ -37,11 +38,11 @@ def handler_single_voice(msg):


class WechatChannel(Channel):
tmpFilePath = './tmp/'
tmpFilePath = pathlib.Path('./tmp/')

def __init__(self):
isExists = os.path.exists(self.tmpFilePath)
if not isExists:
pathExists = os.path.exists(self.tmpFilePath)
if not pathExists and conf().get('speech_recognition') == True:
os.makedirs(self.tmpFilePath)

def startup(self):
Expand Down
2 changes: 1 addition & 1 deletion voice/google/google_voice.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
google voice service
"""

import pathlib
import subprocess
import time
import speech_recognition
Expand All @@ -12,7 +13,6 @@


class GoogleVoice(Voice):
tmpFilePath = './tmp/'
recognizer = speech_recognition.Recognizer()
engine = pyttsx3.init()

Expand Down
8 changes: 5 additions & 3 deletions voice/openai/openai_voice.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,21 @@
"""
import json
import openai
from config import conf
from common.log import logger
from voice.voice import Voice


class OpenaiVoice(Voice):
def __init__(self):
pass
openai.api_key = conf().get('open_ai_api_key')

def voiceToText(self, voice_file):
logger.debug(
'[Openai] voice file name={}'.format(voice_file))
file = open(voice_file, "rb")
reply = openai.Audio.transcribe("whisper-1", file)
json_dict = json.loads(reply)
text = json_dict['text']
text = reply["text"]
logger.info(
'[Openai] voiceToText text={} voice file name={}'.format(text, voice_file))
return text
Expand Down

0 comments on commit 74e7f7c

Please sign in to comment.