From ccb12f0f637319070cf066c53ad46c78ca9d62f7 Mon Sep 17 00:00:00 2001
From: Twisuki <Twisuki@outlook.com>
Date: Wed, 11 Dec 2024 01:50:06 +0800
Subject: [PATCH] =?UTF-8?q?=E6=94=B9=E6=88=90=E5=BC=82=E6=AD=A5=E4=BA=86,?=
 =?UTF-8?q?=20=E6=94=B9=E5=AE=8C=E5=BD=BB=E5=BA=95=E4=B8=8D=E8=83=BD?=
 =?UTF-8?q?=E7=94=A8=E4=BA=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../tools/marshoai-meogirl/__init__.py        |   2 +-
 .../tools/marshoai-meogirl/mg_Search.py       | 155 +++++++++---------
 2 files changed, 81 insertions(+), 76 deletions(-)

diff --git a/nonebot_plugin_marshoai/tools/marshoai-meogirl/__init__.py b/nonebot_plugin_marshoai/tools/marshoai-meogirl/__init__.py
index 4ad38666..1e9b5526 100644
--- a/nonebot_plugin_marshoai/tools/marshoai-meogirl/__init__.py
+++ b/nonebot_plugin_marshoai/tools/marshoai-meogirl/__init__.py
@@ -7,4 +7,4 @@ async def meogirl():
 
 # Search
 async def search(msg : str, num : int = 3):
-    return str(mg_Search.search(msg, num))
\ No newline at end of file
+    return str(await mg_Search.search(msg, num))
diff --git a/nonebot_plugin_marshoai/tools/marshoai-meogirl/mg_Search.py b/nonebot_plugin_marshoai/tools/marshoai-meogirl/mg_Search.py
index c8c22c11..27f6f637 100644
--- a/nonebot_plugin_marshoai/tools/marshoai-meogirl/mg_Search.py
+++ b/nonebot_plugin_marshoai/tools/marshoai-meogirl/mg_Search.py
@@ -1,91 +1,96 @@
 from nonebot.log import logger
 
 import re
-import requests
+import httpx
 from bs4 import BeautifulSoup
 
-def search(msg : str, num : int):
+async def search(msg : str, num : int):
     logger.info(f"搜索 : \"{msg}\"")
     result = ""
 
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36'
+    }
     url = "https://mzh.moegirl.org.cn/index.php?search=" + msg
-    response = requests.get(url)
-    logger.info(f"连接萌娘百科中, 状态码 : {response.status_code}")
+    async with httpx.AsyncClient() as client:
+        response = await client.get(url, headers = headers)
+        logger.info(response.headers.get('Location'))
+        logger.info(f"连接萌娘百科中, 状态码 : {response.status_code}")
 
-    """
-        萌娘百科搜索页面结构
-        div.searchresults                               # 若无, 证明页面已重定向
-        └── p ...
-        └── ul.mw-search-results                        # 若无, 证明无搜索结果
-            └── li                                      # 一个搜索结果
-                └── div.mw-search-result-heading > a    # 标题
-                └── div.mw-searchresult                 # 内容
-                └── div.mw-search-result-data
-            └── li ...
-            └── li ...
-    """
-    if response.status_code == 200:
-        soup = BeautifulSoup(response.text, 'html.parser')
+        """
+            萌娘百科搜索页面结构
+            div.searchresults                               # 若无, 证明页面已重定向
+            └── p ...
+            └── ul.mw-search-results                        # 若无, 证明无搜索结果
+                └── li                                      # 一个搜索结果
+                    └── div.mw-search-result-heading > a    # 标题
+                    └── div.mw-searchresult                 # 内容
+                    └── div.mw-search-result-data
+                └── li ...
+                └── li ...
+        """
+        if response.status_code == 200:
+            soup = BeautifulSoup(response.text, 'html.parser')
 
-        # 检测div.searchresults, 是否已重定向
-        if soup.find('div', class_='searchresults'):
-            # 检测ul.mw-search-results, 是否有结果
-            if soup.find('ul', class_='mw-search-results'):
-                ul_tag = soup.select('ul.mw-search-results')[0]
-                li_tags = ul_tag.select('li')
-                for li_tag in li_tags:
+            # 检测div.searchresults, 是否已重定向
+            if soup.find('div', class_='searchresults'):
+                # 检测ul.mw-search-results, 是否有结果
+                if soup.find('ul', class_='mw-search-results'):
+                    ul_tag = soup.select('ul.mw-search-results')[0]
+                    li_tags = ul_tag.select('li')
+                    for li_tag in li_tags:
 
-                    div_heading = li_tag.select('div.mw-search-result-heading')[0]
-                    if div_heading:
-                        a_tag = div_heading.select('a')[0]
-                        result += a_tag['title'] + "\n"
-                        logger.info(f"搜索到 : \"{a_tag['title']}\"")
+                        div_heading = li_tag.select('div.mw-search-result-heading')[0]
+                        if div_heading:
+                            a_tag = div_heading.select('a')[0]
+                            result += a_tag['title'] + "\n"
+                            logger.info(f"搜索到 : \"{a_tag['title']}\"")
 
-                    div_result = li_tag.find('div', class_='searchresult')
-                    if div_result:
-                        content = str(div_result).replace('<div class=\"searchresult\">', '').replace('</div>', '')
-                        content = content.replace('<span class=\"searchmatch\">', '').replace('</span>', '')
-                        result += content + "\n\n"
+                        div_result = li_tag.find('div', class_='searchresult')
+                        if div_result:
+                            content = str(div_result).replace('<div class=\"searchresult\">', '').replace('</div>', '')
+                            content = content.replace('<span class=\"searchmatch\">', '').replace('</span>', '')
+                            result += content + "\n\n"
 
-                    num -= 1
-                    if num == 0:
-                        break
-                return result
-
-            # 无ul.mw-search-results, 无结果
-            else:
-                logger.info("无结果")
-                return "无结果"
-
-        # 无div.searchresults, 重定向
-        else:
-            logger.info(f"\"{msg}\"已被重定向")
-            num = 0
-
-            """
-                萌娘百科重定向介绍页面结构
-                div#mw-content-text
-                └── div.mw-parser-output    # 介绍页面
-                    └── ....
-                    └── p ?                 # 可能存在的空p
-                    └── p                   # 人物介绍
-                    └── ...
-            """
-            if soup.find('div', class_='mw-parser-output'):
-                div = soup.find('div', class_='mw-parser-output')
-                p_tags = div.select('p')
-                for p_tag in p_tags:
-                    p = str(p_tag)
-                    p = re.sub(r'<.*?>', '', p)
-                    if p != '':
-                        result += str(p) + "/n"
-
-                        num += 1
-                        if num >= 5:
+                        num -= 1
+                        if num == 0:
                             break
-                return result
+                    return result
 
-    # 状态码非200
-    else:
-        logger.error(f"网络错误, 状态码 : {response.status_code}")
-        return f"网络错误, 状态码 : {response.status_code}"
+                # 无ul.mw-search-results, 无结果
+                else:
+                    logger.info("无结果")
+                    return "无结果"
+
+            # 无div.searchresults, 重定向
+            else:
+                logger.info(f"\"{msg}\"已被重定向")
+                num = 0
+
+                """
+                    萌娘百科重定向介绍页面结构
+                    div#mw-content-text
+                    └── div.mw-parser-output    # 介绍页面
+                        └── ....
+                        └── p ?                 # 可能存在的空p
+                        └── p                   # 人物介绍
+                        └── ...
+                """
+                if soup.find('div', class_='mw-parser-output'):
+                    div = soup.find('div', class_='mw-parser-output')
+                    p_tags = div.select('p')
+                    for p_tag in p_tags:
+                        p = str(p_tag)
+                        p = re.sub(r'<.*?>', '', p)
+                        if p != '':
+                            result += str(p) + "/n"
+
+                            num += 1
+                            if num >= 5:
+                                break
+                    return result
+
+        # 状态码非200
+        else:
+            logger.error(f"网络错误, 状态码 : {response.status_code}")
+            return f"网络错误, 状态码 : {response.status_code}"