From 777e577a1732cdfac766ed7771a9e80f6fa12b96 Mon Sep 17 00:00:00 2001 From: Snowykami Date: Tue, 17 Dec 2024 13:51:18 +0800 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20=E4=BC=98=E5=8C=96=E7=BD=91?= =?UTF-8?q?=E9=A1=B5=E5=86=85=E5=AE=B9=E8=8E=B7=E5=8F=96=E5=8A=9F=E8=83=BD?= =?UTF-8?q?=EF=BC=8C=E6=B7=BB=E5=8A=A0=E6=91=98=E8=A6=81=E7=94=9F=E6=88=90?= =?UTF-8?q?=E6=94=AF=E6=8C=81=EF=BC=8C=E9=87=8D=E6=9E=84=E7=9B=B8=E5=85=B3?= =?UTF-8?q?=E5=87=BD=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- nonebot_plugin_marshoai/azure.py | 6 ++-- .../plugins/builtin_tools/network.py | 33 +++++++++-------- .../plugins/builtin_tools/utils.py | 35 +++++++++++++++++++ pyproject.toml | 3 +- 4 files changed, 59 insertions(+), 18 deletions(-) diff --git a/nonebot_plugin_marshoai/azure.py b/nonebot_plugin_marshoai/azure.py index 95cf4635..76420444 100644 --- a/nonebot_plugin_marshoai/azure.py +++ b/nonebot_plugin_marshoai/azure.py @@ -350,10 +350,12 @@ async def marsho( tool_call.function.arguments.replace("'", '"') ) logger.info( - f"调用函数 {tool_call.function.name} ,参数为 {function_args}" + f"调用函数 {tool_call.function.name.replace("-", ".")}\n参数:" + + "\n".join([f"{k}={v}" for k, v in function_args.items()]) ) await UniMessage( - f"调用函数 {tool_call.function.name} ,参数为 {function_args}" + f"调用函数 {tool_call.function.name.replace("-", ".")}\n参数:" + + "\n".join([f"{k}={v}" for k, v in function_args.items()]) ).send() # TODO 临时追加插件函数,若工具中没有则调用插件函数 if tools.has_function(tool_call.function.name): diff --git a/nonebot_plugin_marshoai/plugins/builtin_tools/network.py b/nonebot_plugin_marshoai/plugins/builtin_tools/network.py index 3dd8a907..66284c40 100644 --- a/nonebot_plugin_marshoai/plugins/builtin_tools/network.py +++ b/nonebot_plugin_marshoai/plugins/builtin_tools/network.py @@ -1,12 +1,12 @@ -import time - from httpx import AsyncClient -from newspaper import Article +from newspaper import Article # type: ignore from nonebot import logger from nonebot_plugin_marshoai.plugin.func_call.caller import on_function_call from nonebot_plugin_marshoai.plugin.func_call.params import String +from .utils import make_html_summary + headers = { "User-Agent": "Firefox/90.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0" } @@ -16,9 +16,9 @@ headers = { description="使用网页链接(url)获取网页内容摘要,可以让AI上网查询资料" ).params( url=String(description="网页链接"), - typ=String(description="获取类型,摘要还是内容", enum=["摘要", "内容"]), + typ=String(description="获取类型,摘要还是内容"), ) -async def get_web_content(url: str, typ: str) -> str: +async def get_web_content(url: str) -> str: """使用网页链接获取网页内容摘要 为什么要获取摘要,不然token超限了 @@ -31,16 +31,19 @@ async def get_web_content(url: str, typ: str) -> str: async with AsyncClient(headers=headers) as client: try: response = await client.get(url) - t1 = time.time() - article = Article(url) - article.set_html(response.text) - article.parse() - t2 = time.time() - logger.debug(f"获取网页内容耗时: {t2 - t1}") - if typ == "摘要": - return f"标题: {article.title}\n作者: {article.authors}\n发布日期: {article.publish_date}" - elif typ == "内容": - return f"标题: {article.title}\n作者: {article.authors}\n发布日期: {article.publish_date}\n摘要: {article.summary}\n正文: {article.text}" + if response.status_code == 200: + article = Article(url) + article.download(input_html=response.text) + article.parse() + if article.text: + return article.text + elif article.html: + return await make_html_summary(article.html) + else: + return "未能获取到有效的网页内容" + else: + return "获取网页内容失败" + str(response.status_code) + except Exception as e: logger.error(f"marsho builtin: 获取网页内容失败: {e}") return "获取网页内容失败:" + str(e) diff --git a/nonebot_plugin_marshoai/plugins/builtin_tools/utils.py b/nonebot_plugin_marshoai/plugins/builtin_tools/utils.py index e69de29b..d2fb55d4 100644 --- a/nonebot_plugin_marshoai/plugins/builtin_tools/utils.py +++ b/nonebot_plugin_marshoai/plugins/builtin_tools/utils.py @@ -0,0 +1,35 @@ +import asyncio +from concurrent.futures import ThreadPoolExecutor + +from newspaper import Article # type: ignore +from sumy.nlp.tokenizers import Tokenizer # type: ignore +from sumy.parsers.plaintext import PlaintextParser # type: ignore +from sumy.summarizers.lsa import LsaSummarizer # type: ignore + +executor = ThreadPoolExecutor() + + +async def make_html_summary( + html_content: str, language: str = "english", length: int = 3 +) -> str: + """使用html内容生成摘要 + + Args: + html_content (str): html内容 + language (str, optional): 语言. Defaults to "english". + length (int, optional): 摘要长度. Defaults to 3. + + Returns: + str: 摘要 + """ + loop = asyncio.get_event_loop() + return await loop.run_in_executor( + executor, _make_summary, html_content, language, length + ) + + +def _make_summary(html_content: str, language: str, length: int) -> str: + parser = PlaintextParser.from_string(html_content, Tokenizer(language)) + summarizer = LsaSummarizer() + summary = summarizer(parser.document, length) + return " ".join([str(sentence) for sentence in summary]) diff --git a/pyproject.toml b/pyproject.toml index 7e830466..72116774 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,8 @@ dependencies = [ "litedoc>=0.1.0.dev20241214103915", "newspaper3k>=0.2.8", "lxml[html_clean]>=5.3.0", - "aiofiles>=24.1.0" + "aiofiles>=24.1.0", + "sumy>=0.11.0" ] license = { text = "MIT, Mulan PSL v2" }