36 lines
1.1 KiB
Python

import asyncio
from concurrent.futures import ThreadPoolExecutor
from newspaper import Article # type: ignore
from sumy.nlp.tokenizers import Tokenizer # type: ignore
from sumy.parsers.plaintext import PlaintextParser # type: ignore
from sumy.summarizers.lsa import LsaSummarizer # type: ignore
executor = ThreadPoolExecutor()
async def make_html_summary(
html_content: str, language: str = "english", length: int = 3
) -> str:
"""使用html内容生成摘要
Args:
html_content (str): html内容
language (str, optional): 语言. Defaults to "english".
length (int, optional): 摘要长度. Defaults to 3.
Returns:
str: 摘要
"""
loop = asyncio.get_event_loop()
return await loop.run_in_executor(
executor, _make_summary, html_content, language, length
)
def _make_summary(html_content: str, language: str, length: int) -> str:
parser = PlaintextParser.from_string(html_content, Tokenizer(language))
summarizer = LsaSummarizer()
summary = summarizer(parser.document, length)
return " ".join([str(sentence) for sentence in summary])