mirror of
https://github.com/nonebot/nonebot2.git
synced 2025-02-20 17:46:51 +08:00
Add voice recognition
This commit is contained in:
parent
6e86d36056
commit
1880409b7f
@ -9,4 +9,8 @@ COPY requirements.txt requirements.txt
|
||||
RUN pip install --upgrade pip
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y ffmpeg \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
CMD python app.py
|
12
commands/natural_language.py
Normal file
12
commands/natural_language.py
Normal file
@ -0,0 +1,12 @@
|
||||
import jieba
|
||||
|
||||
from command import CommandRegistry
|
||||
|
||||
__registry__ = cr = CommandRegistry()
|
||||
|
||||
|
||||
@cr.register('process')
|
||||
@cr.restrict(full_command_only=True)
|
||||
def process(args_text, ctx_msg, internal=False):
|
||||
print('自然语言消息处理', args_text)
|
||||
print(list(jieba.cut_for_search(args_text)))
|
@ -97,6 +97,6 @@ def translate_to(args_text, ctx_msg):
|
||||
data = resp.json()
|
||||
print(data)
|
||||
if 'trans_result' in data:
|
||||
core.echo('翻译结果:\n' + '\n'.join([x['dst'] for x in data['trans_result']]), ctx_msg)
|
||||
core.echo('翻译结果(百度翻译):\n' + '\n'.join([x['dst'] for x in data['trans_result']]), ctx_msg)
|
||||
return
|
||||
core.echo('翻译失败,可能因为后台接口的频率限制或服务器连接不上', ctx_msg)
|
||||
|
@ -1,5 +1,5 @@
|
||||
config = {
|
||||
'fallback_command': 'core.chat',
|
||||
'fallback_command': 'natural_language.process',
|
||||
'command_start_flags': ('/', '/', '来,', '来,'),
|
||||
'command_name_separators': ('\.', '->', '::', '/'), # Regex
|
||||
'command_args_start_flags': (',', ':', ',', ', ', ':', ': '), # Regex
|
||||
|
@ -30,30 +30,33 @@ def _load_commands():
|
||||
|
||||
|
||||
def _dispatch_command(ctx_msg):
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
content = ctx_msg.get('content', '').lstrip()
|
||||
text = ctx_msg.get('text', '').lstrip()
|
||||
if not text:
|
||||
raise SkipException
|
||||
source = get_source(ctx_msg)
|
||||
start_flag = None
|
||||
for flag in _command_start_flags:
|
||||
# Match the command start flag
|
||||
if content.startswith(flag):
|
||||
if text.startswith(flag):
|
||||
start_flag = flag
|
||||
break
|
||||
if not start_flag or len(content) <= len(start_flag):
|
||||
if not start_flag or len(text) <= len(start_flag):
|
||||
# No command, check if a session exists
|
||||
if interactive.has_session(source):
|
||||
command = [interactive.get_session(source).cmd, content]
|
||||
command = [interactive.get_session(source).cmd, text]
|
||||
else:
|
||||
# Use fallback
|
||||
if _fallback_command:
|
||||
command = [_fallback_command, content]
|
||||
command = [_fallback_command, text]
|
||||
else:
|
||||
# No fallback
|
||||
raise SkipException
|
||||
else:
|
||||
# Split command and arguments
|
||||
command = re.split('|'.join(_command_args_start_flags),
|
||||
content[len(start_flag):], 1)
|
||||
text[len(start_flag):], 1)
|
||||
if len(command) == 1:
|
||||
# Add an empty argument
|
||||
command.append('')
|
||||
|
@ -5,8 +5,9 @@ from commands import core
|
||||
def _print_help_message(ctx_msg):
|
||||
a = ['help', '怎么用', '怎么用啊', '你好', '你好啊', '帮助',
|
||||
'用法', '使用帮助', '使用指南', '使用说明', '使用方法',
|
||||
'你能做什么', '你能做些什么', '你会做什么', '你会做些什么']
|
||||
if ctx_msg.get('content', '').strip() in a:
|
||||
'你能做什么', '你能做些什么', '你会做什么', '你会做些什么',
|
||||
'你可以做什么', '你可以做些什么']
|
||||
if ctx_msg.get('text', '').strip() in a:
|
||||
core.help('', ctx_msg)
|
||||
return False
|
||||
return True
|
||||
|
22
filters/intercept_some_message_formats_100.py
Normal file
22
filters/intercept_some_message_formats_100.py
Normal file
@ -0,0 +1,22 @@
|
||||
"""
|
||||
This filter intercepts messages that contains content not allowed and move text content to 'text' field.
|
||||
"""
|
||||
|
||||
from filter import add_filter
|
||||
|
||||
|
||||
def _filter(ctx_msg):
|
||||
if ctx_msg.get('via') == 'wx':
|
||||
msg_format = ctx_msg.get('format')
|
||||
if msg_format != 'text' and ctx_msg.get('type') != 'friend_message':
|
||||
return False
|
||||
if msg_format not in ('text', 'media'):
|
||||
return False
|
||||
if msg_format == 'text':
|
||||
ctx_msg['text'] = ctx_msg.get('content')
|
||||
elif ctx_msg.get('via') == 'qq':
|
||||
ctx_msg['text'] = ctx_msg.get('content')
|
||||
return True
|
||||
|
||||
|
||||
add_filter(_filter, 100)
|
@ -1,3 +1,7 @@
|
||||
"""
|
||||
This filter just log message to stdout.
|
||||
"""
|
||||
|
||||
from filter import add_filter
|
||||
|
||||
|
||||
|
88
filters/speech_recognition_90.py
Normal file
88
filters/speech_recognition_90.py
Normal file
@ -0,0 +1,88 @@
|
||||
"""
|
||||
This filter recognizes speech in voice message and stores it in 'text' field of context message.
|
||||
"""
|
||||
|
||||
import re
|
||||
import os
|
||||
import base64
|
||||
|
||||
import requests
|
||||
from pydub import AudioSegment
|
||||
import speech_recognition as sr
|
||||
|
||||
from filter import add_filter
|
||||
from commands import core
|
||||
|
||||
|
||||
def _recognize_baidu(wav_path, unique_id, api_key, secret_key, language='zh'):
|
||||
api_url = 'http://vop.baidu.com/server_api'
|
||||
auth_url = 'https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id=%s&client_secret=%s' \
|
||||
% (api_key, secret_key)
|
||||
resp = requests.get(auth_url)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
if data and 'access_token' in data:
|
||||
token = data['access_token']
|
||||
with open(wav_path, 'rb') as f:
|
||||
audio_data = f.read()
|
||||
audio_data_b64 = base64.b64encode(audio_data).decode('utf-8')
|
||||
json = {
|
||||
'format': 'wav',
|
||||
'rate': 8000,
|
||||
'channel': 1,
|
||||
'cuid': unique_id,
|
||||
'token': token,
|
||||
'lan': language,
|
||||
'speech': audio_data_b64,
|
||||
'len': len(audio_data)
|
||||
}
|
||||
resp = requests.post(api_url, json=json)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
if data and 'result' in data:
|
||||
return ''.join(data['result']).strip(',。?!')
|
||||
return None
|
||||
|
||||
|
||||
def _recognize_bing(wav_path, api_key, language='zh-CN'):
|
||||
r = sr.Recognizer()
|
||||
with sr.AudioFile(wav_path) as source:
|
||||
audio = r.record(source)
|
||||
try:
|
||||
text = r.recognize_bing(audio, key=api_key, language=language)
|
||||
return text
|
||||
except (sr.UnknownValueError, sr.RequestError):
|
||||
return None
|
||||
|
||||
|
||||
def _filter(ctx_msg):
|
||||
if ctx_msg.get('via') == 'wx' and ctx_msg.get('format') == 'media' and ctx_msg.get('media_type') == 'voice':
|
||||
m = re.match('\[语音\]\(([/_A-Za-z0-9]+\.mp3)\)', ctx_msg.get('content'))
|
||||
if m:
|
||||
core.echo('正在识别语音内容,请稍等……', ctx_msg)
|
||||
mp3_path = m.group(1)
|
||||
wav_path = os.path.splitext(mp3_path)[0] + '.wav'
|
||||
voice = AudioSegment.from_mp3(mp3_path)
|
||||
voice.export(wav_path, format='wav')
|
||||
text = _recognize_baidu(
|
||||
wav_path,
|
||||
ctx_msg.get('sender_id')[-60:],
|
||||
os.environ.get('BAIDU_SPEECH_API_KEY'),
|
||||
os.environ.get('BAIDU_SPEECH_SECRET_KEY'),
|
||||
language='zh'
|
||||
)
|
||||
# text = _recognize_bing(
|
||||
# wav_path,
|
||||
# os.environ.get('BING_SPEECH_API_KEY'),
|
||||
# language='zh-CN'
|
||||
# )
|
||||
if text:
|
||||
reply = '识别结果(百度语音识别):\n%s\n\n下面将把识别到的内容作为文字消息处理……' % text
|
||||
ctx_msg['text'] = text
|
||||
else:
|
||||
reply = '抱歉哦,没有识别出你说的是什么'
|
||||
core.echo(reply, ctx_msg)
|
||||
os.remove(wav_path)
|
||||
|
||||
|
||||
add_filter(_filter, 90)
|
@ -1,21 +1,25 @@
|
||||
"""
|
||||
This filter intercepts messages not intended to the bot and removes the beginning "@xxx".
|
||||
"""
|
||||
|
||||
from filter import add_filter
|
||||
|
||||
|
||||
def _split_at_xiaokai(ctx_msg):
|
||||
if ctx_msg.get('type') == 'group_message' or ctx_msg.get('type') == 'discuss_message':
|
||||
content = ctx_msg.get('content', '')
|
||||
if content.startswith('@'):
|
||||
text = ctx_msg.get('text', '')
|
||||
if text.startswith('@'):
|
||||
my_group_nick = ctx_msg.get('receiver')
|
||||
if not my_group_nick:
|
||||
return False
|
||||
at_me = '@' + my_group_nick
|
||||
if not content.startswith(at_me):
|
||||
if not text.startswith(at_me):
|
||||
return False
|
||||
content = content[len(at_me):]
|
||||
text = text[len(at_me):]
|
||||
else:
|
||||
# Not starts with '@'
|
||||
return False
|
||||
ctx_msg['content'] = content.lstrip()
|
||||
ctx_msg['text'] = text.lstrip()
|
||||
return True
|
||||
|
||||
|
||||
|
@ -5,3 +5,6 @@ cachetools
|
||||
pytz
|
||||
flask
|
||||
sqlalchemy
|
||||
pydub
|
||||
SpeechRecognition
|
||||
jieba
|
||||
|
Loading…
x
Reference in New Issue
Block a user