From 882810378d6c3f30e35ade881cef112653889d8d Mon Sep 17 00:00:00 2001
From: Richard Chien <richardchienthebest@gmail.com>
Date: Mon, 2 Jan 2017 23:51:19 +0800
Subject: [PATCH] Try to add the first natural language processor - 'translate'

---
 commands/natural_language.py     | 37 ++++++++++++++++++++++++++-----
 commands/translate.py            | 24 ++++++++++++++++++--
 filters/speech_recognition_90.py |  1 +
 little_shit.py                   |  4 ++++
 nl_processor.py                  | 38 ++++++++++++++++++++++++++++++++
 nl_processors/translate.py       | 31 ++++++++++++++++++++++++++
 6 files changed, 128 insertions(+), 7 deletions(-)
 create mode 100644 nl_processor.py
 create mode 100644 nl_processors/translate.py

diff --git a/commands/natural_language.py b/commands/natural_language.py
index b9d2b0ce..cb4a4800 100644
--- a/commands/natural_language.py
+++ b/commands/natural_language.py
@@ -1,12 +1,39 @@
-import jieba
+import os
+import importlib
 
 from command import CommandRegistry
+from commands import core
+from nl_processor import parse_potential_commands
+from little_shit import get_nl_processors_dir
+from command import hub as cmdhub
 
-__registry__ = cr = CommandRegistry()
+
+def _init():
+    _load_processors()
+
+
+__registry__ = cr = CommandRegistry(init_func=_init)
 
 
 @cr.register('process')
 @cr.restrict(full_command_only=True)
-def process(args_text, ctx_msg, internal=False):
-    print('自然语言消息处理', args_text)
-    print(list(jieba.cut_for_search(args_text)))
+def process(sentence, ctx_msg, internal=False):
+    sentence = sentence.strip()
+    potential_commands = parse_potential_commands(sentence)
+    potential_commands = sorted(filter(lambda x: x[0] > 60, potential_commands), key=lambda x: x[0], reverse=True)
+    if len(potential_commands) > 0:
+        most_possible_cmd = potential_commands[0]
+        ctx_msg['parsed_data'] = most_possible_cmd[3]
+        cmdhub.call(most_possible_cmd[1], most_possible_cmd[2], ctx_msg)
+    else:
+        core.echo('我暂时不理解你在说什么哦～', ctx_msg, internal)
+
+
+def _load_processors():
+    processor_mod_files = filter(
+        lambda filename: filename.endswith('.py') and not filename.startswith('_'),
+        os.listdir(get_nl_processors_dir())
+    )
+    command_mods = [os.path.splitext(file)[0] for file in processor_mod_files]
+    for mod_name in command_mods:
+        importlib.import_module('nl_processors.' + mod_name)
diff --git a/commands/translate.py b/commands/translate.py
index f27fafe9..d9b82590 100644
--- a/commands/translate.py
+++ b/commands/translate.py
@@ -47,7 +47,27 @@ _lang_alias_map = {
     '汉语': 'zh',
     '英文': 'en',
     '日文': 'jp',
-    '韩文': 'kor'
+    '韩文': 'kor',
+    '法文': 'fra',
+    '西班牙文': 'spa',
+    '阿拉伯文': 'ara',
+    '俄文': 'ru',
+    '葡萄牙文': 'pt',
+    '德文': 'de',
+    '意大利文': 'it',
+    '希腊文': 'el',
+    '荷兰文': 'nl',
+    '波兰文': 'pl',
+    '保加利亚文': 'bul',
+    '爱沙尼亚文': 'est',
+    '丹麦文': 'dan',
+    '芬兰文': 'fin',
+    '捷克文': 'cs',
+    '罗马尼亚文': 'rom',
+    '斯洛文尼亚文': 'slo',
+    '瑞典文': 'swe',
+    '匈牙利文': 'hu',
+    '越南文': 'vie'
 }
 
 
@@ -67,7 +87,7 @@ def translate(args_text, ctx_msg):
         return translate_to('简体中文 ' + args_text, ctx_msg)
 
 
-@cr.register('translate_to', 'translate-to', '翻译到', '翻译成')
+@cr.register('translate_to', 'translate-to', '翻译到', '翻译成', '翻译为')
 def translate_to(args_text, ctx_msg):
     args = args_text.strip().split(' ', 1)
     if len(args) < 2 or (args[0] not in _lang_map and args[0] not in _lang_alias_map):
diff --git a/filters/speech_recognition_90.py b/filters/speech_recognition_90.py
index 9b45423c..be10f952 100644
--- a/filters/speech_recognition_90.py
+++ b/filters/speech_recognition_90.py
@@ -80,6 +80,7 @@ def _filter(ctx_msg):
             if text:
                 reply = '识别结果（百度语音识别）：\n%s\n\n下面将把识别到的内容作为文字消息处理……' % text
                 ctx_msg['text'] = text
+                ctx_msg['from_voice'] = True
             else:
                 reply = '抱歉哦，没有识别出你说的是什么'
             core.echo(reply, ctx_msg)
diff --git a/little_shit.py b/little_shit.py
index d98dbea5..e874fbd8 100644
--- a/little_shit.py
+++ b/little_shit.py
@@ -26,6 +26,10 @@ def get_commands_dir():
     return _mkdir_if_not_exists_and_return_path(os.path.join(get_root_dir(), 'commands'))
 
 
+def get_nl_processors_dir():
+    return _mkdir_if_not_exists_and_return_path(os.path.join(get_root_dir(), 'nl_processors'))
+
+
 def get_db_dir():
     return _mkdir_if_not_exists_and_return_path(os.path.join(get_root_dir(), 'data', 'db'))
 
diff --git a/nl_processor.py b/nl_processor.py
new file mode 100644
index 00000000..d20073bf
--- /dev/null
+++ b/nl_processor.py
@@ -0,0 +1,38 @@
+import re
+
+import jieba.posseg
+
+_processors = []
+_processors_without_keyword = []
+
+
+def as_processor(keywords=None):
+    def decorator(func):
+        if keywords:
+            _processors.append((keywords, func))
+        else:
+            _processors_without_keyword.append(func)
+        return func
+
+    return decorator
+
+
+def parse_potential_commands(sentence):
+    segmentation = list(jieba.posseg.cut(sentence=sentence))
+    print('分词结果:', segmentation)
+    potential_commands = []
+    for processor in _processors:
+        processed = False
+        for regex in processor[0]:
+            for word, flag in segmentation:
+                if re.match(regex, word):
+                    potential_commands.append(processor[1](sentence, segmentation))
+                    processed = True
+                    # A word matched, skip the rest of words
+                    break
+            if processed:
+                # Current processor has processed, skip the rest of keywords
+                break
+    for func in _processors_without_keyword:
+        potential_commands.append(func(sentence, segmentation))
+    return potential_commands
diff --git a/nl_processors/translate.py b/nl_processors/translate.py
new file mode 100644
index 00000000..b3888ded
--- /dev/null
+++ b/nl_processors/translate.py
@@ -0,0 +1,31 @@
+import re
+
+from nl_processor import as_processor
+
+_query_lang_matcher = [
+    re.compile('[把将]?[ ,.，。]?(.*?)[ ,.，。]?(?:这[个]?(?:词[组]?|句(?:子|话)?|短语))翻译[成为到](\w+?[文语])(?![ :：,，.。])'),
+    re.compile('(\w+?)[ ,.，。]?(?:这[个]?(?:词[组]?|句(?:子|话)?|短语))?[的用](\w+?[文语])')
+]
+
+_lang_query_matcher = [
+    re.compile('[把将]?(?:(?:这[个]?|[下后][面]?)(?:词[组]?|句(?:子|话)?|短语))翻译[成为到](\w+?[文语])[ :：,，.。](.*)'),
+    re.compile('[用]?(\w+[文语])\w+?(?:说|讲|表达|表示)(.*)(?:这[个]?(?:词[组]?|句(?:子|话)?|短语))'),
+    re.compile('[用]?(\w+[文语])\w+?(?:说|讲|表达|表示)(.*)')
+]
+
+
+@as_processor(keywords=('翻译(为|成|到)?', '.+(文|语)'))
+def _processor(sentence, segmentation):
+    lang = None
+    query = None
+    for matcher in _query_lang_matcher + _lang_query_matcher:
+        m = matcher.match(sentence)
+        if m:
+            if matcher in _lang_query_matcher:
+                lang, query = m.group(1), m.group(2)
+            else:
+                lang, query = m.group(2), m.group(1)
+            break
+    if lang and query:
+        return 90, 'translate.translate_to', ' '.join((lang.strip(), query.strip(' ,，'))), None
+    return None