mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-15 19:35:58 +08:00
Add component Wikipedia (#1513)
### What problem does this PR solve? ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
parent
fdc21ec853
commit
258a10fb74
@ -11,6 +11,7 @@ from .rewrite import RewriteQuestion, RewriteQuestionParam
|
||||
from .keyword import KeywordExtract, KeywordExtractParam
|
||||
from .baidu import Baidu, BaiduParam
|
||||
from .duckduckgo import DuckDuckGo, DuckDuckGoParam
|
||||
from .wikipedia import Wikipedia, WikipediaParam
|
||||
|
||||
|
||||
def component_class(class_name):
|
||||
|
61
graph/component/wikipedia.py
Normal file
61
graph/component/wikipedia.py
Normal file
@ -0,0 +1,61 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import random
|
||||
from abc import ABC
|
||||
from functools import partial
|
||||
import wikipedia
|
||||
import pandas as pd
|
||||
from graph.settings import DEBUG
|
||||
from graph.component.base import ComponentBase, ComponentParamBase
|
||||
|
||||
|
||||
class WikipediaParam(ComponentParamBase):
|
||||
"""
|
||||
Define the Wikipedia component parameters.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.top_n = 10
|
||||
|
||||
def check(self):
|
||||
self.check_positive_integer(self.top_n, "Top N")
|
||||
|
||||
|
||||
class Wikipedia(ComponentBase, ABC):
|
||||
component_name = "Wikipedia"
|
||||
|
||||
def _run(self, history, **kwargs):
|
||||
ans = self.get_input()
|
||||
ans = " - ".join(ans["content"]) if "content" in ans else ""
|
||||
if not ans:
|
||||
return Wikipedia.be_output(self._param.no)
|
||||
|
||||
wiki_res = []
|
||||
for wiki_key in wikipedia.search(ans, results=self._param.top_n):
|
||||
try:
|
||||
page = wikipedia.page(title=wiki_key, auto_suggest=False)
|
||||
wiki_res.append({"content": '<a href="' + page.url + '">' + page.title + '</a> ' + page.summary})
|
||||
except Exception as e:
|
||||
print(e)
|
||||
pass
|
||||
|
||||
if not wiki_res:
|
||||
return Wikipedia.be_output(self._param.no)
|
||||
|
||||
df = pd.DataFrame(wiki_res)
|
||||
if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
|
||||
return df
|
62
graph/test/dsl_examples/keyword_wikipedia_and_generate.json
Normal file
62
graph/test/dsl_examples/keyword_wikipedia_and_generate.json
Normal file
@ -0,0 +1,62 @@
|
||||
{
|
||||
"components": {
|
||||
"begin": {
|
||||
"obj":{
|
||||
"component_name": "Begin",
|
||||
"params": {
|
||||
"prologue": "Hi there!"
|
||||
}
|
||||
},
|
||||
"downstream": ["answer:0"],
|
||||
"upstream": []
|
||||
},
|
||||
"answer:0": {
|
||||
"obj": {
|
||||
"component_name": "Answer",
|
||||
"params": {}
|
||||
},
|
||||
"downstream": ["keyword:0"],
|
||||
"upstream": ["begin"]
|
||||
},
|
||||
"keyword:0": {
|
||||
"obj": {
|
||||
"component_name": "KeywordExtract",
|
||||
"params": {
|
||||
"llm_id": "deepseek-chat",
|
||||
"prompt": "- Role: You're a question analyzer.\n - Requirements:\n - Summarize user's question, and give top %s important keyword/phrase.\n - Use comma as a delimiter to separate keywords/phrases.\n - Answer format: (in language of user's question)\n - keyword: ",
|
||||
"temperature": 0.2,
|
||||
"top_n": 1
|
||||
}
|
||||
},
|
||||
"downstream": ["wikipedia:0"],
|
||||
"upstream": ["answer:0"]
|
||||
},
|
||||
"wikipedia:0": {
|
||||
"obj":{
|
||||
"component_name": "Wikipedia",
|
||||
"params": {
|
||||
"top_n": 10
|
||||
}
|
||||
},
|
||||
"downstream": ["generate:0"],
|
||||
"upstream": ["keyword:0"]
|
||||
},
|
||||
"generate:1": {
|
||||
"obj": {
|
||||
"component_name": "Generate",
|
||||
"params": {
|
||||
"llm_id": "deepseek-chat",
|
||||
"prompt": "You are an intelligent assistant. Please answer the question based on content from Wikipedia. When the answer from Wikipedia is incomplete, you need to output the URL link of the corresponding content as well. When all the content searched from Wikipedia is irrelevant to the question, your answer must include the sentence, \"The answer you are looking for is not found in the Wikipedia!\". Answers need to consider chat history.\n The content of Wikipedia is as follows:\n {input}\n The above is the content of Wikipedia.",
|
||||
"temperature": 0.2
|
||||
}
|
||||
},
|
||||
"downstream": ["answer:0"],
|
||||
"upstream": ["wikipedia:0"]
|
||||
}
|
||||
},
|
||||
"history": [],
|
||||
"path": [],
|
||||
"messages": [],
|
||||
"reference": {},
|
||||
"answer": []
|
||||
}
|
@ -148,4 +148,5 @@ mistralai==0.4.2
|
||||
boto3==1.34.140
|
||||
duckduckgo_search==6.1.9
|
||||
google-generativeai==0.7.2
|
||||
groq==0.9.0
|
||||
groq==0.9.0
|
||||
wikipedia==1.4.0
|
||||
|
@ -150,3 +150,4 @@ boto3==1.34.140
|
||||
duckduckgo_search==6.1.9
|
||||
google-generativeai==0.7.2
|
||||
groq==0.9.0
|
||||
wikipedia==1.4.0
|
||||
|
@ -134,4 +134,5 @@ mistralai==0.4.2
|
||||
boto3==1.34.140
|
||||
duckduckgo_search==6.1.9
|
||||
google-generativeai==0.7.2
|
||||
groq==0.9.0
|
||||
groq==0.9.0
|
||||
wikipedia==1.4.0
|
||||
|
Loading…
x
Reference in New Issue
Block a user