Add voice recognition

This commit is contained in:
Richard Chien 2017-01-01 23:16:34 +08:00
parent 6e86d36056
commit 1880409b7f
11 changed files with 156 additions and 15 deletions

View File

@ -9,4 +9,8 @@ COPY requirements.txt requirements.txt
RUN pip install --upgrade pip RUN pip install --upgrade pip
RUN pip install -r requirements.txt RUN pip install -r requirements.txt
RUN apt-get update \
&& apt-get install -y ffmpeg \
&& rm -rf /var/lib/apt/lists/*
CMD python app.py CMD python app.py

View File

@ -0,0 +1,12 @@
import jieba
from command import CommandRegistry
__registry__ = cr = CommandRegistry()
@cr.register('process')
@cr.restrict(full_command_only=True)
def process(args_text, ctx_msg, internal=False):
print('自然语言消息处理', args_text)
print(list(jieba.cut_for_search(args_text)))

View File

@ -97,6 +97,6 @@ def translate_to(args_text, ctx_msg):
data = resp.json() data = resp.json()
print(data) print(data)
if 'trans_result' in data: if 'trans_result' in data:
core.echo('翻译结果\n' + '\n'.join([x['dst'] for x in data['trans_result']]), ctx_msg) core.echo('翻译结果(百度翻译)\n' + '\n'.join([x['dst'] for x in data['trans_result']]), ctx_msg)
return return
core.echo('翻译失败,可能因为后台接口的频率限制或服务器连接不上', ctx_msg) core.echo('翻译失败,可能因为后台接口的频率限制或服务器连接不上', ctx_msg)

View File

@ -1,5 +1,5 @@
config = { config = {
'fallback_command': 'core.chat', 'fallback_command': 'natural_language.process',
'command_start_flags': ('/', '', '来,', '来,'), 'command_start_flags': ('/', '', '来,', '来,'),
'command_name_separators': ('\.', '->', '::', '/'), # Regex 'command_name_separators': ('\.', '->', '::', '/'), # Regex
'command_args_start_flags': ('', '', ',', ', ', ':', ': '), # Regex 'command_args_start_flags': ('', '', ',', ', ', ':', ': '), # Regex

View File

@ -30,30 +30,33 @@ def _load_commands():
def _dispatch_command(ctx_msg): def _dispatch_command(ctx_msg):
# noinspection PyBroadException
try: try:
content = ctx_msg.get('content', '').lstrip() text = ctx_msg.get('text', '').lstrip()
if not text:
raise SkipException
source = get_source(ctx_msg) source = get_source(ctx_msg)
start_flag = None start_flag = None
for flag in _command_start_flags: for flag in _command_start_flags:
# Match the command start flag # Match the command start flag
if content.startswith(flag): if text.startswith(flag):
start_flag = flag start_flag = flag
break break
if not start_flag or len(content) <= len(start_flag): if not start_flag or len(text) <= len(start_flag):
# No command, check if a session exists # No command, check if a session exists
if interactive.has_session(source): if interactive.has_session(source):
command = [interactive.get_session(source).cmd, content] command = [interactive.get_session(source).cmd, text]
else: else:
# Use fallback # Use fallback
if _fallback_command: if _fallback_command:
command = [_fallback_command, content] command = [_fallback_command, text]
else: else:
# No fallback # No fallback
raise SkipException raise SkipException
else: else:
# Split command and arguments # Split command and arguments
command = re.split('|'.join(_command_args_start_flags), command = re.split('|'.join(_command_args_start_flags),
content[len(start_flag):], 1) text[len(start_flag):], 1)
if len(command) == 1: if len(command) == 1:
# Add an empty argument # Add an empty argument
command.append('') command.append('')

View File

@ -5,8 +5,9 @@ from commands import core
def _print_help_message(ctx_msg): def _print_help_message(ctx_msg):
a = ['help', '怎么用', '怎么用啊', '你好', '你好啊', '帮助', a = ['help', '怎么用', '怎么用啊', '你好', '你好啊', '帮助',
'用法', '使用帮助', '使用指南', '使用说明', '使用方法', '用法', '使用帮助', '使用指南', '使用说明', '使用方法',
'你能做什么', '你能做些什么', '你会做什么', '你会做些什么'] '你能做什么', '你能做些什么', '你会做什么', '你会做些什么',
if ctx_msg.get('content', '').strip() in a: '你可以做什么', '你可以做些什么']
if ctx_msg.get('text', '').strip() in a:
core.help('', ctx_msg) core.help('', ctx_msg)
return False return False
return True return True

View File

@ -0,0 +1,22 @@
"""
This filter intercepts messages that contains content not allowed and move text content to 'text' field.
"""
from filter import add_filter
def _filter(ctx_msg):
if ctx_msg.get('via') == 'wx':
msg_format = ctx_msg.get('format')
if msg_format != 'text' and ctx_msg.get('type') != 'friend_message':
return False
if msg_format not in ('text', 'media'):
return False
if msg_format == 'text':
ctx_msg['text'] = ctx_msg.get('content')
elif ctx_msg.get('via') == 'qq':
ctx_msg['text'] = ctx_msg.get('content')
return True
add_filter(_filter, 100)

View File

@ -1,3 +1,7 @@
"""
This filter just log message to stdout.
"""
from filter import add_filter from filter import add_filter

View File

@ -0,0 +1,88 @@
"""
This filter recognizes speech in voice message and stores it in 'text' field of context message.
"""
import re
import os
import base64
import requests
from pydub import AudioSegment
import speech_recognition as sr
from filter import add_filter
from commands import core
def _recognize_baidu(wav_path, unique_id, api_key, secret_key, language='zh'):
api_url = 'http://vop.baidu.com/server_api'
auth_url = 'https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id=%s&client_secret=%s' \
% (api_key, secret_key)
resp = requests.get(auth_url)
if resp.status_code == 200:
data = resp.json()
if data and 'access_token' in data:
token = data['access_token']
with open(wav_path, 'rb') as f:
audio_data = f.read()
audio_data_b64 = base64.b64encode(audio_data).decode('utf-8')
json = {
'format': 'wav',
'rate': 8000,
'channel': 1,
'cuid': unique_id,
'token': token,
'lan': language,
'speech': audio_data_b64,
'len': len(audio_data)
}
resp = requests.post(api_url, json=json)
if resp.status_code == 200:
data = resp.json()
if data and 'result' in data:
return ''.join(data['result']).strip(',。?!')
return None
def _recognize_bing(wav_path, api_key, language='zh-CN'):
r = sr.Recognizer()
with sr.AudioFile(wav_path) as source:
audio = r.record(source)
try:
text = r.recognize_bing(audio, key=api_key, language=language)
return text
except (sr.UnknownValueError, sr.RequestError):
return None
def _filter(ctx_msg):
if ctx_msg.get('via') == 'wx' and ctx_msg.get('format') == 'media' and ctx_msg.get('media_type') == 'voice':
m = re.match('\[语音\]\(([/_A-Za-z0-9]+\.mp3)\)', ctx_msg.get('content'))
if m:
core.echo('正在识别语音内容,请稍等……', ctx_msg)
mp3_path = m.group(1)
wav_path = os.path.splitext(mp3_path)[0] + '.wav'
voice = AudioSegment.from_mp3(mp3_path)
voice.export(wav_path, format='wav')
text = _recognize_baidu(
wav_path,
ctx_msg.get('sender_id')[-60:],
os.environ.get('BAIDU_SPEECH_API_KEY'),
os.environ.get('BAIDU_SPEECH_SECRET_KEY'),
language='zh'
)
# text = _recognize_bing(
# wav_path,
# os.environ.get('BING_SPEECH_API_KEY'),
# language='zh-CN'
# )
if text:
reply = '识别结果(百度语音识别):\n%s\n\n下面将把识别到的内容作为文字消息处理……' % text
ctx_msg['text'] = text
else:
reply = '抱歉哦,没有识别出你说的是什么'
core.echo(reply, ctx_msg)
os.remove(wav_path)
add_filter(_filter, 90)

View File

@ -1,21 +1,25 @@
"""
This filter intercepts messages not intended to the bot and removes the beginning "@xxx".
"""
from filter import add_filter from filter import add_filter
def _split_at_xiaokai(ctx_msg): def _split_at_xiaokai(ctx_msg):
if ctx_msg.get('type') == 'group_message' or ctx_msg.get('type') == 'discuss_message': if ctx_msg.get('type') == 'group_message' or ctx_msg.get('type') == 'discuss_message':
content = ctx_msg.get('content', '') text = ctx_msg.get('text', '')
if content.startswith('@'): if text.startswith('@'):
my_group_nick = ctx_msg.get('receiver') my_group_nick = ctx_msg.get('receiver')
if not my_group_nick: if not my_group_nick:
return False return False
at_me = '@' + my_group_nick at_me = '@' + my_group_nick
if not content.startswith(at_me): if not text.startswith(at_me):
return False return False
content = content[len(at_me):] text = text[len(at_me):]
else: else:
# Not starts with '@' # Not starts with '@'
return False return False
ctx_msg['content'] = content.lstrip() ctx_msg['text'] = text.lstrip()
return True return True

View File

@ -5,3 +5,6 @@ cachetools
pytz pytz
flask flask
sqlalchemy sqlalchemy
pydub
SpeechRecognition
jieba