diff --git a/nonebot_plugin_marshoai/tools/marshoai-meogirl/__init__.py b/nonebot_plugin_marshoai/tools/marshoai-meogirl/__init__.py index 1e9b5526..58d7a8e8 100644 --- a/nonebot_plugin_marshoai/tools/marshoai-meogirl/__init__.py +++ b/nonebot_plugin_marshoai/tools/marshoai-meogirl/__init__.py @@ -1,10 +1,16 @@ + from . import mg_Info from . import mg_Search +from . import mg_Introduce # meogirl -async def meogirl(): +async def meogirl () : return mg_Info.meogirl() # Search -async def search(msg : str, num : int = 3): +async def search (msg : str, num : int = 3) : return str(await mg_Search.search(msg, num)) + +# Show +async def introduce (msg : str) : + return str(await mg_Introduce.introduce(msg)) diff --git a/nonebot_plugin_marshoai/tools/marshoai-meogirl/mg_Introduce.py b/nonebot_plugin_marshoai/tools/marshoai-meogirl/mg_Introduce.py new file mode 100644 index 00000000..ffc072ba --- /dev/null +++ b/nonebot_plugin_marshoai/tools/marshoai-meogirl/mg_Introduce.py @@ -0,0 +1,76 @@ +from nonebot.log import logger + +import re +import httpx +import urllib.parse +from bs4 import BeautifulSoup + +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36" +} + +async def get_async_data (url) : + async with httpx.AsyncClient(timeout = None) as client: + return await client.get(url, headers = headers) + +async def introduce (msg : str) : + logger.info(f"介绍 : \"{msg}\" ...") + result = "" + + url = "https://mzh.moegirl.org.cn/" + urllib.parse.quote_plus(msg) + response = await get_async_data(url) + logger.success(f"连接\"{url}\"完成, 状态码 : {response.status_code}") + + soup = BeautifulSoup(response.text, "html.parser") + + # 正常页 + if response.status_code == 200 : + """ + 萌娘百科页面结构 + div#mw-content-text + └── div#404search # 空白页面出现 + └── div.mw-parser-output # 正常页面 + └── div, p, table ... # 大量的解释项 + """ + result += msg + "\n" + + img = soup.find("img", class_="infobox-image") + if img: + result += f"![ {msg} ]( {img["src"]} ) \n" + + div = soup.find("div", class_="mw-parser-output") + if div: + p_tags = div.find_all("p") + num = 0 + for p_tag in p_tags: + p = str(p_tag) + p = re.sub(r"|", "", p, flags=re.DOTALL) + p = re.sub(r"<.*?>", "", p, flags=re.DOTALL) + p = re.sub(r"\[.*?]", "", p, flags=re.DOTALL) + + if p != "": + result += str(p) + + num += 1 + if num >= 20: + break + return result + + # 空白页 + elif response.status_code == 404 : + logger.info(f"未找到\"{msg}\", 进行搜索") + + from . import mg_Search + context = await mg_Search.search(msg, 1) + keyword = re.search(r".*?\n", context, flags = re.DOTALL).group()[: -1] + + logger.success(f"搜索完成, 打开\"{keyword}\"") + return await introduce(keyword) + + # 搜索失败 + elif response.status_code == 301 : + return f"未找到{msg}" + + else : + logger.error(f"网络错误, 状态码 : {response.status_code}") + return f"网络错误, 状态码 : {response.status_code}" diff --git a/nonebot_plugin_marshoai/tools/marshoai-meogirl/mg_Search.py b/nonebot_plugin_marshoai/tools/marshoai-meogirl/mg_Search.py index cec9ad3d..0b793a6e 100644 --- a/nonebot_plugin_marshoai/tools/marshoai-meogirl/mg_Search.py +++ b/nonebot_plugin_marshoai/tools/marshoai-meogirl/mg_Search.py @@ -1,6 +1,5 @@ from nonebot.log import logger -import re import httpx import urllib.parse from bs4 import BeautifulSoup @@ -9,12 +8,12 @@ headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36" } -async def get_async_data (url): +async def get_async_data (url) : async with httpx.AsyncClient(timeout = None) as client: return await client.get(url, headers = headers) -async def search(msg : str, num : int): - logger.info(f"搜索 : \"{msg}\"") +async def search (msg : str, num : int) : + logger.info(f"搜索 : \"{msg}\" ...") result = "" url = "https://mzh.moegirl.org.cn/index.php?search=" + urllib.parse.quote_plus(msg) @@ -22,7 +21,7 @@ async def search(msg : str, num : int): logger.success(f"连接\"{url}\"完成, 状态码 : {response.status_code}") # 正常搜索 - if response.status_code == 200: + if response.status_code == 200 : """ 萌娘百科搜索页面结构 div.searchresults @@ -39,74 +38,39 @@ async def search(msg : str, num : int): # 检测ul.mw-search-results, 是否有结果 ul_tag = soup.find("ul", class_ = "mw-search-results") - if ul_tag: + if ul_tag : li_tags = ul_tag.find_all("li") - for li_tag in li_tags: + for li_tag in li_tags : div_heading = li_tag.find("div", class_ = "mw-search-result-heading") - if div_heading: + if div_heading : a_tag = div_heading.find("a") result += a_tag["title"] + "\n" logger.info(f"搜索到 : \"{a_tag["title"]}\"") div_result = li_tag.find("div", class_="searchresult") - if div_result: + if div_result : content = str(div_result).replace("
", "").replace("
", "") content = content.replace("", "").replace("", "") result += content + "\n" num -= 1 - if num == 0: + if num == 0 : break return result # 无ul.mw-search-results, 无结果 - else: + else : logger.info("无结果") return "无结果" # 重定向 - elif response.status_code == 302: + elif response.status_code == 302 : logger.info(f"\"{msg}\"已被重定向至\"{response.headers.get("location")}\"") # 读取重定向结果 - response = await get_async_data(response.headers.get("location")) - soup = BeautifulSoup(response.text, "html.parser") - logger.success("重定向成功") - num = 0 + from . import mg_Introduce + return await mg_Introduce.introduce(msg) - """ - 萌娘百科重定向介绍页面结构 - div#mw-content-text - └── div.mw-parser-output # 介绍页面 - └── .... - └── p ? # 可能存在的空p - └── p # 人物介绍 - └── ... - """ - - result += msg + "\n" - img = soup.find("img", class_="infobox-image") - if img: - logger.info(f"照片{img["src"]}") - result += f"![ {msg} ]( {img["src"]} ) \n" - - div = soup.find("div", class_="mw-parser-output") - if div: - p_tags = div.find_all("p") - for p_tag in p_tags: - p = str(p_tag) - p = re.sub(r"|", "", p, flags=re.DOTALL) - p = re.sub(r"<.*?>", "", p, flags = re.DOTALL) - p = re.sub(r"\[.*?]", "", p, flags = re.DOTALL) - if p != "": - result += str(p) - - num += 1 - if num >= 5: - break - return result - - # 状态码非200或302 - else: + else : logger.error(f"网络错误, 状态码 : {response.status_code}") return f"网络错误, 状态码 : {response.status_code}" diff --git a/nonebot_plugin_marshoai/tools/marshoai-meogirl/tools.json b/nonebot_plugin_marshoai/tools/marshoai-meogirl/tools.json index 94e8596d..fa008317 100644 --- a/nonebot_plugin_marshoai/tools/marshoai-meogirl/tools.json +++ b/nonebot_plugin_marshoai/tools/marshoai-meogirl/tools.json @@ -6,21 +6,40 @@ "description" : "介绍Meogirl" } }, + { + "type": "function", + "function": { + "name": "marshoai-meogirl__search", + "description": "查找/搜索 某角色/事物 (使用萌娘百科)", + "parameters": { + "type": "object", + "properties": { + "msg": { + "type": "string", + "description": "搜索关键词" + }, + "num": { + "type": "integer", + "description": "数据显示条数, 默认3, 可留空" + } + } + }, + "required": [ + "msg" + ] + } + }, { "type" : "function", "function" : { - "name" : "marshoai-meogirl__search", - "description" : "在萌娘百科中搜索(仅用户指定在萌娘百科中搜索才调用此函数)", + "name" : "marshoai-meogirl__introduce", + "description" : "介绍/展示 某角色/事物 (使用萌娘百科)", "parameters" : { "type" : "object", "properties" : { "msg" : { "type": "string", - "description": "搜索关键词" - }, - "num" : { - "type": "integer", - "description": "数据显示条数, 默认3, 可留空" + "description": "关键词" } } },