add CrossRef builtin tool: doi query and title query (#7406)

This commit is contained in:
RookieAgent 2024-08-19 19:14:20 +08:00 committed by GitHub
parent 53cf756207
commit 4ff4859036
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 371 additions and 0 deletions

View File

@ -0,0 +1,49 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- Generator: Adobe Illustrator 19.2.1, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
viewBox="0 0 200 130.2" style="enable-background:new 0 0 200 130.2;" xml:space="preserve">
<style type="text/css">
.st0{fill:#3EB1C8;}
.st1{fill:#D8D2C4;}
.st2{fill:#4F5858;}
.st3{fill:#FFC72C;}
.st4{fill:#EF3340;}
</style>
<g>
<polygon class="st0" points="111.8,95.5 111.8,66.8 135.4,59 177.2,73.3 "/>
<polygon class="st1" points="153.6,36.8 111.8,51.2 135.4,59 177.2,44.6 "/>
<polygon class="st2" points="135.4,59 177.2,44.6 177.2,73.3 "/>
<polygon class="st3" points="177.2,0.3 177.2,29 153.6,36.8 111.8,22.5 "/>
<polygon class="st4" points="153.6,36.8 111.8,51.2 111.8,22.5 "/>
<g>
<g>
<g>
<g>
<path class="st2" d="M26.3,104.8c-0.5-3.7-4.1-6.5-8.1-6.5c-7.3,0-10.1,6.2-10.1,12.7c0,6.2,2.8,12.4,10.1,12.4
c5,0,7.8-3.4,8.4-8.3h7.9c-0.8,9.2-7.2,15.2-16.3,15.2C6.8,130.2,0,121.7,0,111c0-11,6.8-19.6,18.2-19.6c8.2,0,15,4.8,16,13.3
H26.3z"/>
<path class="st2" d="M37.4,102.5h7v5h0.1c1.4-3.4,5-5.7,8.6-5.7c0.5,0,1.1,0.1,1.6,0.3v6.9c-0.7-0.2-1.8-0.3-2.6-0.3
c-5.4,0-7.3,3.9-7.3,8.6v12.1h-7.4V102.5z"/>
<path class="st2" d="M68.7,101.8c8.5,0,13.9,5.6,13.9,14.2c0,8.5-5.5,14.1-13.9,14.1c-8.4,0-13.9-5.6-13.9-14.1
C54.9,107.4,60.3,101.8,68.7,101.8z M68.7,124.5c5,0,6.5-4.3,6.5-8.6c0-4.3-1.5-8.6-6.5-8.6c-5,0-6.5,4.3-6.5,8.6
C62.2,120.2,63.8,124.5,68.7,124.5z"/>
<path class="st2" d="M91.2,120.6c0.1,3.2,2.8,4.5,5.7,4.5c2.1,0,4.8-0.8,4.8-3.4c0-2.2-3.1-3-8.4-4.2c-4.3-0.9-8.5-2.4-8.5-7.2
c0-6.9,5.9-8.6,11.7-8.6c5.9,0,11.3,2,11.8,8.6h-7c-0.2-2.9-2.4-3.6-5-3.6c-1.7,0-4.1,0.3-4.1,2.5c0,2.6,4.2,3,8.4,4
c4.3,1,8.5,2.5,8.5,7.5c0,7.1-6.1,9.3-12.3,9.3c-6.2,0-12.3-2.3-12.6-9.5H91.2z"/>
<path class="st2" d="M118.1,120.6c0.1,3.2,2.8,4.5,5.7,4.5c2.1,0,4.8-0.8,4.8-3.4c0-2.2-3.1-3-8.4-4.2
c-4.3-0.9-8.5-2.4-8.5-7.2c0-6.9,5.9-8.6,11.7-8.6c5.9,0,11.3,2,11.8,8.6h-7c-0.2-2.9-2.4-3.6-5-3.6c-1.7,0-4.1,0.3-4.1,2.5
c0,2.6,4.2,3,8.4,4c4.3,1,8.5,2.5,8.5,7.5c0,7.1-6.1,9.3-12.3,9.3c-6.2,0-12.3-2.3-12.6-9.5H118.1z"/>
<path class="st2" d="M138.4,102.5h7v5h0.1c1.4-3.4,5-5.7,8.6-5.7c0.5,0,1.1,0.1,1.6,0.3v6.9c-0.7-0.2-1.8-0.3-2.6-0.3
c-5.4,0-7.3,3.9-7.3,8.6v12.1h-7.4V102.5z"/>
<path class="st2" d="M163.7,117.7c0.2,4.7,2.5,6.8,6.6,6.8c3,0,5.3-1.8,5.8-3.5h6.5c-2.1,6.3-6.5,9-12.6,9
c-8.5,0-13.7-5.8-13.7-14.1c0-8,5.6-14.2,13.7-14.2c9.1,0,13.6,7.7,13,15.9H163.7z M175.7,113.1c-0.7-3.7-2.3-5.7-5.9-5.7
c-4.7,0-6,3.6-6.1,5.7H175.7z"/>
<path class="st2" d="M187.2,107.5h-4.4v-4.9h4.4v-2.1c0-4.7,3-8.2,9-8.2c1.3,0,2.6,0.2,3.9,0.2V98c-0.9-0.1-1.8-0.2-2.7-0.2
c-2,0-2.8,0.8-2.8,3.1v1.6h5.1v4.9h-5.1v21.9h-7.4V107.5z"/>
</g>
</g>
</g>
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 3.0 KiB

View File

@ -0,0 +1,20 @@
from core.tools.errors import ToolProviderCredentialValidationError
from core.tools.provider.builtin.crossref.tools.query_doi import CrossRefQueryDOITool
from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController
class CrossRefProvider(BuiltinToolProviderController):
def _validate_credentials(self, credentials: dict) -> None:
try:
CrossRefQueryDOITool().fork_tool_runtime(
runtime={
"credentials": credentials,
}
).invoke(
user_id='',
tool_parameters={
"doi": '10.1007/s00894-022-05373-8',
},
)
except Exception as e:
raise ToolProviderCredentialValidationError(str(e))

View File

@ -0,0 +1,29 @@
identity:
author: Sakura4036
name: crossref
label:
en_US: CrossRef
zh_Hans: CrossRef
description:
en_US: Crossref is a cross-publisher reference linking registration query system using DOI technology created in 2000. Crossref establishes cross-database links between the reference list and citation full text of papers, making it very convenient for readers to access the full text of papers.
zh_Hans: Crossref是于2000年创建的使用DOI技术的跨出版商参考文献链接注册查询系统。Crossref建立了在论文的参考文献列表和引文全文之间的跨数据库链接使得读者能够非常便捷地获取文献全文。
icon: icon.svg
tags:
- search
credentials_for_provider:
mailto:
type: text-input
required: true
label:
en_US: email address
zh_Hans: email地址
pt_BR: email address
placeholder:
en_US: Please input your email address
zh_Hans: 请输入你的email地址
pt_BR: Please input your email address
help:
en_US: According to the requirements of Crossref, an email address is required
zh_Hans: 根据Crossref的要求需要提供一个邮箱地址
pt_BR: According to the requirements of Crossref, an email address is required
url: https://api.crossref.org/swagger-ui/index.html

View File

@ -0,0 +1,25 @@
from typing import Any, Union
import requests
from core.tools.entities.tool_entities import ToolInvokeMessage
from core.tools.errors import ToolParameterValidationError
from core.tools.tool.builtin_tool import BuiltinTool
class CrossRefQueryDOITool(BuiltinTool):
"""
Tool for querying the metadata of a publication using its DOI.
"""
def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
doi = tool_parameters.get('doi')
if not doi:
raise ToolParameterValidationError('doi is required.')
# doc: https://github.com/CrossRef/rest-api-doc
url = f"https://api.crossref.org/works/{doi}"
response = requests.get(url)
response.raise_for_status()
response = response.json()
message = response.get('message', {})
return self.create_json_message(message)

View File

@ -0,0 +1,23 @@
identity:
name: crossref_query_doi
author: Sakura4036
label:
en_US: CrossRef Query DOI
zh_Hans: CrossRef DOI 查询
pt_BR: CrossRef Query DOI
description:
human:
en_US: A tool for searching literature information using CrossRef by DOI.
zh_Hans: 一个使用CrossRef通过DOI获取文献信息的工具。
pt_BR: A tool for searching literature information using CrossRef by DOI.
llm: A tool for searching literature information using CrossRef by DOI.
parameters:
- name: doi
type: string
required: true
label:
en_US: DOI
zh_Hans: DOI
pt_BR: DOI
llm_description: DOI for searching in CrossRef
form: llm

View File

@ -0,0 +1,120 @@
import time
from typing import Any, Union
import requests
from core.tools.entities.tool_entities import ToolInvokeMessage
from core.tools.tool.builtin_tool import BuiltinTool
def convert_time_str_to_seconds(time_str: str) -> int:
"""
Convert a time string to seconds.
example: 1s -> 1, 1m30s -> 90, 1h30m -> 5400, 1h30m30s -> 5430
"""
time_str = time_str.lower().strip().replace(' ', '')
seconds = 0
if 'h' in time_str:
hours, time_str = time_str.split('h')
seconds += int(hours) * 3600
if 'm' in time_str:
minutes, time_str = time_str.split('m')
seconds += int(minutes) * 60
if 's' in time_str:
seconds += int(time_str.replace('s', ''))
return seconds
class CrossRefQueryTitleAPI:
"""
Tool for querying the metadata of a publication using its title.
Crossref API doc: https://github.com/CrossRef/rest-api-doc
"""
query_url_template: str = "https://api.crossref.org/works?query.bibliographic={query}&rows={rows}&offset={offset}&sort={sort}&order={order}&mailto={mailto}"
rate_limit: int = 50
rate_interval: float = 1
max_limit: int = 1000
def __init__(self, mailto: str):
self.mailto = mailto
def _query(self, query: str, rows: int = 5, offset: int = 0, sort: str = 'relevance', order: str = 'desc', fuzzy_query: bool = False) -> list[dict]:
"""
Query the metadata of a publication using its title.
:param query: the title of the publication
:param rows: the number of results to return
:param sort: the sort field
:param order: the sort order
:param fuzzy_query: whether to return all items that match the query
"""
url = self.query_url_template.format(query=query, rows=rows, offset=offset, sort=sort, order=order, mailto=self.mailto)
response = requests.get(url)
response.raise_for_status()
rate_limit = int(response.headers['x-ratelimit-limit'])
# convert time string to seconds
rate_interval = convert_time_str_to_seconds(response.headers['x-ratelimit-interval'])
self.rate_limit = rate_limit
self.rate_interval = rate_interval
response = response.json()
if response['status'] != 'ok':
return []
message = response['message']
if fuzzy_query:
# fuzzy query return all items
return message['items']
else:
for paper in message['items']:
title = paper['title'][0]
if title.lower() != query.lower():
continue
return [paper]
return []
def query(self, query: str, rows: int = 5, sort: str = 'relevance', order: str = 'desc', fuzzy_query: bool = False) -> list[dict]:
"""
Query the metadata of a publication using its title.
:param query: the title of the publication
:param rows: the number of results to return
:param sort: the sort field
:param order: the sort order
:param fuzzy_query: whether to return all items that match the query
"""
rows = min(rows, self.max_limit)
if rows > self.rate_limit:
# query multiple times
query_times = rows // self.rate_limit + 1
results = []
for i in range(query_times):
result = self._query(query, rows=self.rate_limit, offset=i * self.rate_limit, sort=sort, order=order, fuzzy_query=fuzzy_query)
if fuzzy_query:
results.extend(result)
else:
# fuzzy_query=False, only one result
if result:
return result
time.sleep(self.rate_interval)
return results
else:
# query once
return self._query(query, rows, sort=sort, order=order, fuzzy_query=fuzzy_query)
class CrossRefQueryTitleTool(BuiltinTool):
"""
Tool for querying the metadata of a publication using its title.
"""
def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
query = tool_parameters.get('query')
fuzzy_query = tool_parameters.get('fuzzy_query', False)
rows = tool_parameters.get('rows', 3)
sort = tool_parameters.get('sort', 'relevance')
order = tool_parameters.get('order', 'desc')
mailto = self.runtime.credentials['mailto']
result = CrossRefQueryTitleAPI(mailto).query(query, rows, sort, order, fuzzy_query)
return [self.create_json_message(r) for r in result]

View File

@ -0,0 +1,105 @@
identity:
name: crossref_query_title
author: Sakura4036
label:
en_US: CrossRef Title Query
zh_Hans: CrossRef 标题查询
pt_BR: CrossRef Title Query
description:
human:
en_US: A tool for querying literature information using CrossRef by title.
zh_Hans: 一个使用CrossRef通过标题搜索文献信息的工具。
pt_BR: A tool for querying literature information using CrossRef by title.
llm: A tool for querying literature information using CrossRef by title.
parameters:
- name: query
type: string
required: true
label:
en_US: 标题
zh_Hans: 查询语句
pt_BR: 标题
human_description:
en_US: Query bibliographic information, useful for citation look up. Includes titles, authors, ISSNs and publication years
zh_Hans: 用于搜索文献信息有助于查找引用。包括标题作者ISSN和出版年份
pt_BR: Query bibliographic information, useful for citation look up. Includes titles, authors, ISSNs and publication years
llm_description: key words for querying in Web of Science
form: llm
- name: fuzzy_query
type: boolean
default: false
label:
en_US: Whether to fuzzy search
zh_Hans: 是否模糊搜索
pt_BR: Whether to fuzzy search
human_description:
en_US: used for selecting the query type, fuzzy query returns more results, precise query returns 1 or none
zh_Hans: 用于选择搜索类型模糊搜索返回更多结果精确搜索返回1条结果或无
pt_BR: used for selecting the query type, fuzzy query returns more results, precise query returns 1 or none
form: form
- name: limit
type: number
required: false
label:
en_US: max query number
zh_Hans: 最大搜索数
pt_BR: max query number
human_description:
en_US: max query number(fuzzy search returns the maximum number of results or precise search the maximum number of matches)
zh_Hans: 最大搜索数(模糊搜索返回的最大结果数或精确搜索最大匹配数)
pt_BR: max query number(fuzzy search returns the maximum number of results or precise search the maximum number of matches)
form: llm
default: 50
- name: sort
type: select
required: true
options:
- value: relevance
label:
en_US: relevance
zh_Hans: 相关性
pt_BR: relevance
- value: published
label:
en_US: publication date
zh_Hans: 出版日期
pt_BR: publication date
- value: references-count
label:
en_US: references-count
zh_Hans: 引用次数
pt_BR: references-count
default: relevance
label:
en_US: sorting field
zh_Hans: 排序字段
pt_BR: sorting field
human_description:
en_US: Sorting of query results
zh_Hans: 检索结果的排序字段
pt_BR: Sorting of query results
form: form
- name: order
type: select
required: true
options:
- value: desc
label:
en_US: descending
zh_Hans: 降序
pt_BR: descending
- value: asc
label:
en_US: ascending
zh_Hans: 升序
pt_BR: ascending
default: desc
label:
en_US: Order
zh_Hans: 排序
pt_BR: Order
human_description:
en_US: Order of query results
zh_Hans: 检索结果的排序方式
pt_BR: Order of query results
form: form