wed版语音机器人:
GitHub项目地址:https://github.com/Yang915/WebToy
特别说明:该项目在本机测试,通过浏览器调用系统麦克风(https请求),实际环境在Firefox进行测试,其他浏览器不确保正常运行!!!
项目目录说明:
项目主程序:main.py
1 import os 2 from flask import Flask, render_template, request 3 4 from BP_get_answer import app_get_answer # 返回应答语音的蓝图模块 5 6 app = Flask(__name__) 7 app.debug = True 8 9 app.register_blueprint(app_get_answer) # 注册蓝图 10 11 12 @app.route(‘/ai_uploader‘, methods=[‘GET‘, ‘POST‘]) 13 def answer(): 14 # 接收前端发来的语音消息,并指定路径保存 15 reco_file = request.files.get(‘reco‘) 16 # 调用uuid三方模块生成唯一文件名 17 from uuid import uuid4 18 filename = f‘{uuid4()}.wav‘ 19 20 filepath = os.path.join(os.path.dirname(__file__), ‘audio‘, ‘questions‘, filename) 21 reco_file.save(filepath) 22 # print(‘语音问题保存路径:‘,filepath) 23 24 # 调用语音识别模块,对语音信息进行格式转换保存在指定目录,然后进行识别,得到文字信息 25 from ASR import asr 26 text_question = asr(filepath) 27 # print(‘语音问题文本‘,text_question) 28 29 # 调用自然语言处理模块,对文字信息进行处理,得到回复文字信息 30 from NLP import nlp 31 text_answer = nlp(text_question) 32 # print(‘语音回答文本‘,text_answer) 33 34 # 调用语音合成模块,对回复的文字信息进行合成并保存在指定目录下 35 from TTS import tts 36 answer_filepath = tts(text_answer) 37 print(‘语音回答文件路径:‘, answer_filepath) 38 39 # 获取语音应答消息文件名并返回 40 answer_filename = os.path.basename(answer_filepath) 41 return {‘filename‘: answer_filename} 42 43 44 # 返回展示页面 45 @app.route(‘/record‘) 46 def get_record(): 47 return render_template(‘WebToy.html‘) 48 49 50 if __name__ == ‘__main__‘: 51 app.run()
main.py
模板页面展示:WebToy.html(静态文件从github可以获取)
1 <!DOCTYPE html> 2 <html lang="en"> 3 <head> 4 <meta charset="UTF-8"> 5 <title id="title"></title> 6 7 </head> 8 <body> 9 <audio id="player" autoplay controls></audio> 10 <p> 11 <button onclick="start_reco()">开始录音</button> 12 </p> 13 <p> 14 <button onclick="ai_reco()" style="background-color: cornflowerblue">发送语音指令</button> 15 </p> 16 </body> 17 <script type="application/javascript" src="/static/jquery-3.3.1.min.js"></script> 18 <script type="text/javascript" src="/static/Recorder.js"></script> 19 <script type="application/javascript"> 20 var serv = "http://127.0.0.1:5000"; 21 var audio_context = new AudioContext();//音频内容对象 22 navigator.getUserMedia = (navigator.getUserMedia || 23 navigator.webkitGetUserMedia || 24 navigator.mozGetUserMedia || 25 navigator.msGetUserMedia); 26 27 navigator.getUserMedia({audio: true}, create_stream, function (err) { 28 console.log(err) 29 }); 30 31 function create_stream(user_media) { 32 var stream_input = audio_context.createMediaStreamSource(user_media); 33 reco = new Recorder(stream_input); 34 } 35 36 37 function start_reco() { 38 reco.record(); 39 } 40 41 function ai_reco() { 42 reco.stop(); 43 44 reco.exportWAV(function (wav_file) { 45 console.log(wav_file); 46 var formdata = new FormData(); // form 表单 {key:value} 47 formdata.append("reco", wav_file); // form input type="file" 48 $.ajax({ 49 url: serv + "/ai_uploader", 50 type: ‘post‘, 51 processData: false, 52 contentType: false, 53 data: formdata, 54 dataType: ‘json‘, 55 success: function (data) { 56 //console.log(data.filename); 57 var filename = data.filename; 58 document.getElementById(‘player‘).src = serv + ‘/get_answer/‘ + filename; 59 } 60 }) 61 }); 62 63 reco.clear(); 64 } 65 66 </script> 67 </html>
WebToy.html
分支--语音识别:ASR.py
1 import os 2 from aip import AipSpeech 3 4 APP_ID = ‘16815394‘ 5 API_KEY = ‘jM4b8GIG9gzrzySTRq3szK2E‘ 6 SECRET_KEY = ‘iE626cEpjT1iAVwh24XV5h1QFuR8FPD2‘ 7 8 client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) 9 10 11 # 读取文件 12 def get_file_content(filepath): 13 # 文件格式转换成pcm(前提是需要安装ffmpeg软件并配置环境变量) 14 filename = os.path.basename(filepath) 15 pcm_filename = filename.split(‘.‘)[0] + ‘.pcm‘ 16 pcm_filepath = os.path.join(os.path.dirname(__file__), ‘audio‘, ‘pcm‘, pcm_filename) 17 cmd_str = f‘ffmpeg -y -i {filepath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {pcm_filepath}‘ # ffmpeg软件安装及环境变量配置 18 os.system(cmd_str) # 调用os.system()在CMD执行命令 19 filepath = pcm_filepath 20 21 with open(filepath, ‘rb‘) as fp: 22 return fp.read() 23 24 25 # 识别本地文件 26 def asr(filePath): 27 pcm_file = get_file_content(filePath) 28 result = client.asr(pcm_file, ‘pcm‘, 16000, { 29 ‘dev_pid‘: 1536, 30 }) 31 print(result) 32 try: 33 text = result.get(‘result‘)[0] 34 except: 35 text = ‘[email protected]#$%^&^%$#$%^&*‘ 36 # print(text) 37 return text
ASR.py
分支--图灵机器人配置模块:TuringRobotAPI.py
1 ‘‘‘ 2 图灵机器人API V2.0接入文档:https://www.kancloud.cn/turing/www-tuling123-com/718227 3 ‘‘‘ 4 import json 5 import requests 6 def turingRobotAnswer(text_question): 7 request_json={ 8 "perception": { #必须参数:输入信息(注意:输入参数必须包含inputText或inputImage或inputMedia) 9 "inputText": { #非必须参数:文本信息 10 "text": text_question #必须参数:1-128字符 ,直接输入文本 11 }, 12 }, 13 "userInfo": { #必须参数:用户参数 14 "apiKey": "11cb5ce350c54016974151892635388b", #必须参数:32位,机器人标识 15 "userId": "123" #必须参数:长度小于等于32位,用户唯一标识 16 } 17 } 18 result=requests.post(‘http://openapi.tuling123.com/openapi/api/v2‘,json=request_json)#POST请求,参数文档有说明 19 # print(result) 20 text_answer=json.loads(result.content).get(‘results‘)[0].get(‘values‘).get(‘text‘) 21 return text_answer
TuringRobotAPI.py
分支--自然语言处理模块:NLP.py
1 from aip import AipNlp 2 from TuringRobotAPI import turingRobotAnswer 3 4 APP_ID = ‘16815394‘ 5 API_KEY = ‘jM4b8GIG9gzrzySTRq3szK2E‘ 6 SECRET_KEY = ‘iE626cEpjT1iAVwh24XV5h1QFuR8FPD2‘ 7 8 NLP_client = AipNlp(APP_ID, API_KEY, SECRET_KEY) 9 10 11 def nlp(text_question): 12 text_answer = ‘我是无所不知的智能小机器人飞飞!‘ 13 score = NLP_client.simnet(text_question, ‘你叫什么名字‘).get(‘score‘) 14 if score < 0.58: 15 text_answer = turingRobotAnswer(text_question) 16 return text_answer
NLP.py
分支--语音合成模块:TTS.py
1 import os 2 3 from aip import AipSpeech 4 5 APP_ID = ‘16815394‘ 6 API_KEY = ‘jM4b8GIG9gzrzySTRq3szK2E‘ 7 SECRET_KEY = ‘iE626cEpjT1iAVwh24XV5h1QFuR8FPD2‘ 8 9 SPEECH_client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) 10 11 # TTS合成语音文件(语音合成) 12 VOICE = {‘spd‘: 5, ‘pit‘: 7, ‘vol‘: 6, ‘per‘: 4, } 13 14 15 def tts(text_answer): 16 audio = SPEECH_client.synthesis(text_answer, ‘zh‘, 1, VOICE) 17 from uuid import uuid4 18 answer_filepanme = f‘{uuid4()}.mp3‘ 19 answer_filepath = os.path.join(os.path.dirname(__file__), ‘audio‘, ‘answers‘, answer_filepanme) 20 21 if not isinstance(audio, dict): 22 with open(answer_filepath, ‘wb‘) as f: 23 f.write(audio) 24 25 return answer_filepath
TTS.py
分支--语音应答蓝图模块:BP_get_answer.py
1 import os 2 from flask import Blueprint, send_file, request 3 4 dir_static = os.path.join(‘audio‘, ‘answers‘) 5 app_get_answer = Blueprint(‘get_answer‘, __name__, static_folder=dir_static, static_url_path=‘/get_answer‘) 6 7 8 @app_get_answer.route(‘/<filename>‘) 9 def get_answer(filename): 10 return send_file(filename)
BP_get_answer.py
原文地址:https://www.cnblogs.com/open-yang/p/11202683.html
时间: 2024-10-31 01:56:42