Add voice recognition

2024-09-21 05:12:34 +00:00 · 2017-01-01 23:16:34 +08:00 · 2017-01-01 23:16:34 +08:00 · 1880409b7f
commit 1880409b7f
parent 6e86d36056
11 changed files with 156 additions and 15 deletions
--- a/4
+++ b/4
@ -9,4 +9,8 @@ COPY requirements.txt requirements.txt
 RUN pip install --upgrade pip
 RUN pip install -r requirements.txt
 RUN apt-get update \
    && apt-get install -y ffmpeg \
    && rm -rf /var/lib/apt/lists/*
 CMD python app.py
--- a/commands/natural_language.py
+++ b/commands/natural_language.py
@ -0,0 +1,12 @@
 import jieba
 from command import CommandRegistry
 __registry__ = cr = CommandRegistry()
@cr.register('process')
@cr.restrict(full_command_only=True)
 def process(args_text, ctx_msg, internal=False):
    print('自然语言消息处理', args_text)
    print(list(jieba.cut_for_search(args_text)))
--- a/commands/translate.py
+++ b/commands/translate.py
@ -97,6 +97,6 @@ def translate_to(args_text, ctx_msg):
        data = resp.json()
        print(data)
        if 'trans_result' in data:
-            core.echo('翻译结果：\n' + '\n'.join([x['dst'] for x in data['trans_result']]), ctx_msg)
+            core.echo('翻译结果（百度翻译）：\n' + '\n'.join([x['dst'] for x in data['trans_result']]), ctx_msg)
            return
    core.echo('翻译失败，可能因为后台接口的频率限制或服务器连接不上', ctx_msg)
--- a/config.py
+++ b/config.py
@ -1,5 +1,5 @@
 config = {
-    'fallback_command': 'core.chat',
+    'fallback_command': 'natural_language.process',
    'command_start_flags': ('/', '／', '来，', '来,'),
    'command_name_separators': ('\.', '->', '::', '/'),  # Regex
    'command_args_start_flags': ('，', '：', ',', ', ', ':', ': '),  # Regex
--- a/filters/command_dispatcher_0.py
+++ b/filters/command_dispatcher_0.py
@ -30,30 +30,33 @@ def _load_commands():
 def _dispatch_command(ctx_msg):
    # noinspection PyBroadException
    try:
-        content = ctx_msg.get('content', '').lstrip()
+        text = ctx_msg.get('text', '').lstrip()
        if not text:
            raise SkipException
        source = get_source(ctx_msg)
        start_flag = None
        for flag in _command_start_flags:
            # Match the command start flag
-            if content.startswith(flag):
+            if text.startswith(flag):
                start_flag = flag
                break
-        if not start_flag or len(content) <= len(start_flag):
+        if not start_flag or len(text) <= len(start_flag):
            # No command, check if a session exists
            if interactive.has_session(source):
-                command = [interactive.get_session(source).cmd, content]
+                command = [interactive.get_session(source).cmd, text]
            else:
                # Use fallback
                if _fallback_command:
-                    command = [_fallback_command, content]
+                    command = [_fallback_command, text]
                else:
                    # No fallback
                    raise SkipException
        else:
            # Split command and arguments
            command = re.split('|'.join(_command_args_start_flags),
-                               content[len(start_flag):], 1)
+                               text[len(start_flag):], 1)
            if len(command) == 1:
                # Add an empty argument
                command.append('')
--- a/filters/how_to_use_1.py
+++ b/filters/how_to_use_1.py
@ -5,8 +5,9 @@ from commands import core
 def _print_help_message(ctx_msg):
    a = ['help', '怎么用', '怎么用啊', '你好', '你好啊', '帮助',
         '用法', '使用帮助', '使用指南', '使用说明', '使用方法',
-         '你能做什么', '你能做些什么', '你会做什么', '你会做些什么']
+         '你能做什么', '你能做些什么', '你会做什么', '你会做些什么',
-    if ctx_msg.get('content', '').strip() in a:
+         '你可以做什么', '你可以做些什么']
    if ctx_msg.get('text', '').strip() in a:
        core.help('', ctx_msg)
        return False
    return True
--- a/filters/intercept_some_message_formats_100.py
+++ b/filters/intercept_some_message_formats_100.py
@ -0,0 +1,22 @@
 """
 This filter intercepts messages that contains content not allowed and move text content to 'text' field.
 """
 from filter import add_filter
 def _filter(ctx_msg):
    if ctx_msg.get('via') == 'wx':
        msg_format = ctx_msg.get('format')
        if msg_format != 'text' and ctx_msg.get('type') != 'friend_message':
            return False
        if msg_format not in ('text', 'media'):
            return False
        if msg_format == 'text':
            ctx_msg['text'] = ctx_msg.get('content')
    elif ctx_msg.get('via') == 'qq':
        ctx_msg['text'] = ctx_msg.get('content')
    return True
 add_filter(_filter, 100)
--- a/filters/message_logger_1000.py
+++ b/filters/message_logger_1000.py
@ -1,3 +1,7 @@
 """
 This filter just log message to stdout.
 """
 from filter import add_filter
--- a/filters/speech_recognition_90.py
+++ b/filters/speech_recognition_90.py
@ -0,0 +1,88 @@
 """
 This filter recognizes speech in voice message and stores it in 'text' field of context message.
 """
 import re
 import os
 import base64
 import requests
 from pydub import AudioSegment
 import speech_recognition as sr
 from filter import add_filter
 from commands import core
 def _recognize_baidu(wav_path, unique_id, api_key, secret_key, language='zh'):
    api_url = 'http://vop.baidu.com/server_api'
    auth_url = 'https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id=%s&client_secret=%s' \
               % (api_key, secret_key)
    resp = requests.get(auth_url)
    if resp.status_code == 200:
        data = resp.json()
        if data and 'access_token' in data:
            token = data['access_token']
            with open(wav_path, 'rb') as f:
                audio_data = f.read()
            audio_data_b64 = base64.b64encode(audio_data).decode('utf-8')
            json = {
                'format': 'wav',
                'rate': 8000,
                'channel': 1,
                'cuid': unique_id,
                'token': token,
                'lan': language,
                'speech': audio_data_b64,
                'len': len(audio_data)
            }
            resp = requests.post(api_url, json=json)
            if resp.status_code == 200:
                data = resp.json()
                if data and 'result' in data:
                    return ''.join(data['result']).strip('，。？！')
    return None
 def _recognize_bing(wav_path, api_key, language='zh-CN'):
    r = sr.Recognizer()
    with sr.AudioFile(wav_path) as source:
        audio = r.record(source)
    try:
        text = r.recognize_bing(audio, key=api_key, language=language)
        return text
    except (sr.UnknownValueError, sr.RequestError):
        return None
 def _filter(ctx_msg):
    if ctx_msg.get('via') == 'wx' and ctx_msg.get('format') == 'media' and ctx_msg.get('media_type') == 'voice':
        m = re.match('\[语音\]\(([/_A-Za-z0-9]+\.mp3)\)', ctx_msg.get('content'))
        if m:
            core.echo('正在识别语音内容，请稍等……', ctx_msg)
            mp3_path = m.group(1)
            wav_path = os.path.splitext(mp3_path)[0] + '.wav'
            voice = AudioSegment.from_mp3(mp3_path)
            voice.export(wav_path, format='wav')
            text = _recognize_baidu(
                wav_path,
                ctx_msg.get('sender_id')[-60:],
                os.environ.get('BAIDU_SPEECH_API_KEY'),
                os.environ.get('BAIDU_SPEECH_SECRET_KEY'),
                language='zh'
            )
            # text = _recognize_bing(
            #     wav_path,
            #     os.environ.get('BING_SPEECH_API_KEY'),
            #     language='zh-CN'
            # )
            if text:
                reply = '识别结果（百度语音识别）：\n%s\n\n下面将把识别到的内容作为文字消息处理……' % text
                ctx_msg['text'] = text
            else:
                reply = '抱歉哦，没有识别出你说的是什么'
            core.echo(reply, ctx_msg)
            os.remove(wav_path)
 add_filter(_filter, 90)
--- a/filters/split_at_xiaokai_50.py
+++ b/filters/split_at_xiaokai_50.py
@ -1,21 +1,25 @@
 """
 This filter intercepts messages not intended to the bot and removes the beginning "@xxx".
 """
 from filter import add_filter
 def _split_at_xiaokai(ctx_msg):
    if ctx_msg.get('type') == 'group_message' or ctx_msg.get('type') == 'discuss_message':
-        content = ctx_msg.get('content', '')
+        text = ctx_msg.get('text', '')
-        if content.startswith('@'):
+        if text.startswith('@'):
            my_group_nick = ctx_msg.get('receiver')
            if not my_group_nick:
                return False
            at_me = '@' + my_group_nick
-            if not content.startswith(at_me):
+            if not text.startswith(at_me):
                return False
-            content = content[len(at_me):]
+            text = text[len(at_me):]
        else:
            # Not starts with '@'
            return False
-        ctx_msg['content'] = content.lstrip()
+        ctx_msg['text'] = text.lstrip()
    return True
--- a/requirements.txt
+++ b/requirements.txt
@ -5,3 +5,6 @@ cachetools
 pytz
 flask
 sqlalchemy
 pydub
 SpeechRecognition
 jieba