mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-06-04 11:14:10 +08:00
fix: import jieba.analyse (#12133)
Signed-off-by: -LAN- <laipz8200@outlook.com>
This commit is contained in:
parent
26b5680913
commit
dae1b5a619
@ -1,5 +1,5 @@
|
|||||||
import re
|
import re
|
||||||
from typing import Optional
|
from typing import Optional, cast
|
||||||
|
|
||||||
|
|
||||||
class JiebaKeywordTableHandler:
|
class JiebaKeywordTableHandler:
|
||||||
@ -8,18 +8,20 @@ class JiebaKeywordTableHandler:
|
|||||||
|
|
||||||
from core.rag.datasource.keyword.jieba.stopwords import STOPWORDS
|
from core.rag.datasource.keyword.jieba.stopwords import STOPWORDS
|
||||||
|
|
||||||
jieba.analyse.default_tfidf.stop_words = STOPWORDS
|
jieba.analyse.default_tfidf.stop_words = STOPWORDS # type: ignore
|
||||||
|
|
||||||
def extract_keywords(self, text: str, max_keywords_per_chunk: Optional[int] = 10) -> set[str]:
|
def extract_keywords(self, text: str, max_keywords_per_chunk: Optional[int] = 10) -> set[str]:
|
||||||
"""Extract keywords with JIEBA tfidf."""
|
"""Extract keywords with JIEBA tfidf."""
|
||||||
import jieba # type: ignore
|
import jieba.analyse # type: ignore
|
||||||
|
|
||||||
keywords = jieba.analyse.extract_tags(
|
keywords = jieba.analyse.extract_tags(
|
||||||
sentence=text,
|
sentence=text,
|
||||||
topK=max_keywords_per_chunk,
|
topK=max_keywords_per_chunk,
|
||||||
)
|
)
|
||||||
|
# jieba.analyse.extract_tags returns list[Any] when withFlag is False by default.
|
||||||
|
keywords = cast(list[str], keywords)
|
||||||
|
|
||||||
return set(self._expand_tokens_with_subtokens(keywords))
|
return set(self._expand_tokens_with_subtokens(set(keywords)))
|
||||||
|
|
||||||
def _expand_tokens_with_subtokens(self, tokens: set[str]) -> set[str]:
|
def _expand_tokens_with_subtokens(self, tokens: set[str]) -> set[str]:
|
||||||
"""Get subtokens from a list of tokens., filtering for stopwords."""
|
"""Get subtokens from a list of tokens., filtering for stopwords."""
|
||||||
|
Loading…
x
Reference in New Issue
Block a user