mirror of
https://github.com/LiteyukiStudio/nonebot-plugin-marshoai.git
synced 2025-01-26 18:12:47 +08:00
✨ 优化网页内容获取功能,添加摘要生成支持,重构相关函数
This commit is contained in:
parent
4d5af4bc00
commit
777e577a17
@ -350,10 +350,12 @@ async def marsho(
|
|||||||
tool_call.function.arguments.replace("'", '"')
|
tool_call.function.arguments.replace("'", '"')
|
||||||
)
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"调用函数 {tool_call.function.name} ,参数为 {function_args}"
|
f"调用函数 {tool_call.function.name.replace("-", ".")}\n参数:"
|
||||||
|
+ "\n".join([f"{k}={v}" for k, v in function_args.items()])
|
||||||
)
|
)
|
||||||
await UniMessage(
|
await UniMessage(
|
||||||
f"调用函数 {tool_call.function.name} ,参数为 {function_args}"
|
f"调用函数 {tool_call.function.name.replace("-", ".")}\n参数:"
|
||||||
|
+ "\n".join([f"{k}={v}" for k, v in function_args.items()])
|
||||||
).send()
|
).send()
|
||||||
# TODO 临时追加插件函数,若工具中没有则调用插件函数
|
# TODO 临时追加插件函数,若工具中没有则调用插件函数
|
||||||
if tools.has_function(tool_call.function.name):
|
if tools.has_function(tool_call.function.name):
|
||||||
|
@ -1,12 +1,12 @@
|
|||||||
import time
|
|
||||||
|
|
||||||
from httpx import AsyncClient
|
from httpx import AsyncClient
|
||||||
from newspaper import Article
|
from newspaper import Article # type: ignore
|
||||||
from nonebot import logger
|
from nonebot import logger
|
||||||
|
|
||||||
from nonebot_plugin_marshoai.plugin.func_call.caller import on_function_call
|
from nonebot_plugin_marshoai.plugin.func_call.caller import on_function_call
|
||||||
from nonebot_plugin_marshoai.plugin.func_call.params import String
|
from nonebot_plugin_marshoai.plugin.func_call.params import String
|
||||||
|
|
||||||
|
from .utils import make_html_summary
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
"User-Agent": "Firefox/90.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0"
|
"User-Agent": "Firefox/90.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0"
|
||||||
}
|
}
|
||||||
@ -16,9 +16,9 @@ headers = {
|
|||||||
description="使用网页链接(url)获取网页内容摘要,可以让AI上网查询资料"
|
description="使用网页链接(url)获取网页内容摘要,可以让AI上网查询资料"
|
||||||
).params(
|
).params(
|
||||||
url=String(description="网页链接"),
|
url=String(description="网页链接"),
|
||||||
typ=String(description="获取类型,摘要还是内容", enum=["摘要", "内容"]),
|
typ=String(description="获取类型,摘要还是内容"),
|
||||||
)
|
)
|
||||||
async def get_web_content(url: str, typ: str) -> str:
|
async def get_web_content(url: str) -> str:
|
||||||
"""使用网页链接获取网页内容摘要
|
"""使用网页链接获取网页内容摘要
|
||||||
为什么要获取摘要,不然token超限了
|
为什么要获取摘要,不然token超限了
|
||||||
|
|
||||||
@ -31,16 +31,19 @@ async def get_web_content(url: str, typ: str) -> str:
|
|||||||
async with AsyncClient(headers=headers) as client:
|
async with AsyncClient(headers=headers) as client:
|
||||||
try:
|
try:
|
||||||
response = await client.get(url)
|
response = await client.get(url)
|
||||||
t1 = time.time()
|
if response.status_code == 200:
|
||||||
article = Article(url)
|
article = Article(url)
|
||||||
article.set_html(response.text)
|
article.download(input_html=response.text)
|
||||||
article.parse()
|
article.parse()
|
||||||
t2 = time.time()
|
if article.text:
|
||||||
logger.debug(f"获取网页内容耗时: {t2 - t1}")
|
return article.text
|
||||||
if typ == "摘要":
|
elif article.html:
|
||||||
return f"标题: {article.title}\n作者: {article.authors}\n发布日期: {article.publish_date}"
|
return await make_html_summary(article.html)
|
||||||
elif typ == "内容":
|
else:
|
||||||
return f"标题: {article.title}\n作者: {article.authors}\n发布日期: {article.publish_date}\n摘要: {article.summary}\n正文: {article.text}"
|
return "未能获取到有效的网页内容"
|
||||||
|
else:
|
||||||
|
return "获取网页内容失败" + str(response.status_code)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"marsho builtin: 获取网页内容失败: {e}")
|
logger.error(f"marsho builtin: 获取网页内容失败: {e}")
|
||||||
return "获取网页内容失败:" + str(e)
|
return "获取网页内容失败:" + str(e)
|
||||||
|
@ -0,0 +1,35 @@
|
|||||||
|
import asyncio
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
|
from newspaper import Article # type: ignore
|
||||||
|
from sumy.nlp.tokenizers import Tokenizer # type: ignore
|
||||||
|
from sumy.parsers.plaintext import PlaintextParser # type: ignore
|
||||||
|
from sumy.summarizers.lsa import LsaSummarizer # type: ignore
|
||||||
|
|
||||||
|
executor = ThreadPoolExecutor()
|
||||||
|
|
||||||
|
|
||||||
|
async def make_html_summary(
|
||||||
|
html_content: str, language: str = "english", length: int = 3
|
||||||
|
) -> str:
|
||||||
|
"""使用html内容生成摘要
|
||||||
|
|
||||||
|
Args:
|
||||||
|
html_content (str): html内容
|
||||||
|
language (str, optional): 语言. Defaults to "english".
|
||||||
|
length (int, optional): 摘要长度. Defaults to 3.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: 摘要
|
||||||
|
"""
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
return await loop.run_in_executor(
|
||||||
|
executor, _make_summary, html_content, language, length
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_summary(html_content: str, language: str, length: int) -> str:
|
||||||
|
parser = PlaintextParser.from_string(html_content, Tokenizer(language))
|
||||||
|
summarizer = LsaSummarizer()
|
||||||
|
summary = summarizer(parser.document, length)
|
||||||
|
return " ".join([str(sentence) for sentence in summary])
|
@ -24,7 +24,8 @@ dependencies = [
|
|||||||
"litedoc>=0.1.0.dev20241214103915",
|
"litedoc>=0.1.0.dev20241214103915",
|
||||||
"newspaper3k>=0.2.8",
|
"newspaper3k>=0.2.8",
|
||||||
"lxml[html_clean]>=5.3.0",
|
"lxml[html_clean]>=5.3.0",
|
||||||
"aiofiles>=24.1.0"
|
"aiofiles>=24.1.0",
|
||||||
|
"sumy>=0.11.0"
|
||||||
|
|
||||||
]
|
]
|
||||||
license = { text = "MIT, Mulan PSL v2" }
|
license = { text = "MIT, Mulan PSL v2" }
|
||||||
|
Loading…
x
Reference in New Issue
Block a user