From 4ff4859036e8253fec7208589766e05c870171c0 Mon Sep 17 00:00:00 2001 From: RookieAgent <42060616+Sakura4036@users.noreply.github.com> Date: Mon, 19 Aug 2024 19:14:20 +0800 Subject: [PATCH] add CrossRef builtin tool: doi query and title query (#7406) --- .../builtin/crossref/_assets/icon.svg | 49 +++++++ .../provider/builtin/crossref/crossref.py | 20 +++ .../provider/builtin/crossref/crossref.yaml | 29 +++++ .../builtin/crossref/tools/query_doi.py | 25 ++++ .../builtin/crossref/tools/query_doi.yaml | 23 ++++ .../builtin/crossref/tools/query_title.py | 120 ++++++++++++++++++ .../builtin/crossref/tools/query_title.yaml | 105 +++++++++++++++ 7 files changed, 371 insertions(+) create mode 100644 api/core/tools/provider/builtin/crossref/_assets/icon.svg create mode 100644 api/core/tools/provider/builtin/crossref/crossref.py create mode 100644 api/core/tools/provider/builtin/crossref/crossref.yaml create mode 100644 api/core/tools/provider/builtin/crossref/tools/query_doi.py create mode 100644 api/core/tools/provider/builtin/crossref/tools/query_doi.yaml create mode 100644 api/core/tools/provider/builtin/crossref/tools/query_title.py create mode 100644 api/core/tools/provider/builtin/crossref/tools/query_title.yaml diff --git a/api/core/tools/provider/builtin/crossref/_assets/icon.svg b/api/core/tools/provider/builtin/crossref/_assets/icon.svg new file mode 100644 index 0000000000..aa629de7cb --- /dev/null +++ b/api/core/tools/provider/builtin/crossref/_assets/icon.svg @@ -0,0 +1,49 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/api/core/tools/provider/builtin/crossref/crossref.py b/api/core/tools/provider/builtin/crossref/crossref.py new file mode 100644 index 0000000000..404e483e0d --- /dev/null +++ b/api/core/tools/provider/builtin/crossref/crossref.py @@ -0,0 +1,20 @@ +from core.tools.errors import ToolProviderCredentialValidationError +from core.tools.provider.builtin.crossref.tools.query_doi import CrossRefQueryDOITool +from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController + + +class CrossRefProvider(BuiltinToolProviderController): + def _validate_credentials(self, credentials: dict) -> None: + try: + CrossRefQueryDOITool().fork_tool_runtime( + runtime={ + "credentials": credentials, + } + ).invoke( + user_id='', + tool_parameters={ + "doi": '10.1007/s00894-022-05373-8', + }, + ) + except Exception as e: + raise ToolProviderCredentialValidationError(str(e)) diff --git a/api/core/tools/provider/builtin/crossref/crossref.yaml b/api/core/tools/provider/builtin/crossref/crossref.yaml new file mode 100644 index 0000000000..da67fbec3a --- /dev/null +++ b/api/core/tools/provider/builtin/crossref/crossref.yaml @@ -0,0 +1,29 @@ +identity: + author: Sakura4036 + name: crossref + label: + en_US: CrossRef + zh_Hans: CrossRef + description: + en_US: Crossref is a cross-publisher reference linking registration query system using DOI technology created in 2000. Crossref establishes cross-database links between the reference list and citation full text of papers, making it very convenient for readers to access the full text of papers. + zh_Hans: Crossref是于2000年创建的使用DOI技术的跨出版商参考文献链接注册查询系统。Crossref建立了在论文的参考文献列表和引文全文之间的跨数据库链接,使得读者能够非常便捷地获取文献全文。 + icon: icon.svg + tags: + - search +credentials_for_provider: + mailto: + type: text-input + required: true + label: + en_US: email address + zh_Hans: email地址 + pt_BR: email address + placeholder: + en_US: Please input your email address + zh_Hans: 请输入你的email地址 + pt_BR: Please input your email address + help: + en_US: According to the requirements of Crossref, an email address is required + zh_Hans: 根据Crossref的要求,需要提供一个邮箱地址 + pt_BR: According to the requirements of Crossref, an email address is required + url: https://api.crossref.org/swagger-ui/index.html diff --git a/api/core/tools/provider/builtin/crossref/tools/query_doi.py b/api/core/tools/provider/builtin/crossref/tools/query_doi.py new file mode 100644 index 0000000000..a43c0989e4 --- /dev/null +++ b/api/core/tools/provider/builtin/crossref/tools/query_doi.py @@ -0,0 +1,25 @@ +from typing import Any, Union + +import requests + +from core.tools.entities.tool_entities import ToolInvokeMessage +from core.tools.errors import ToolParameterValidationError +from core.tools.tool.builtin_tool import BuiltinTool + + +class CrossRefQueryDOITool(BuiltinTool): + """ + Tool for querying the metadata of a publication using its DOI. + """ + def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]: + doi = tool_parameters.get('doi') + if not doi: + raise ToolParameterValidationError('doi is required.') + # doc: https://github.com/CrossRef/rest-api-doc + url = f"https://api.crossref.org/works/{doi}" + response = requests.get(url) + response.raise_for_status() + response = response.json() + message = response.get('message', {}) + + return self.create_json_message(message) diff --git a/api/core/tools/provider/builtin/crossref/tools/query_doi.yaml b/api/core/tools/provider/builtin/crossref/tools/query_doi.yaml new file mode 100644 index 0000000000..9c16da25ed --- /dev/null +++ b/api/core/tools/provider/builtin/crossref/tools/query_doi.yaml @@ -0,0 +1,23 @@ +identity: + name: crossref_query_doi + author: Sakura4036 + label: + en_US: CrossRef Query DOI + zh_Hans: CrossRef DOI 查询 + pt_BR: CrossRef Query DOI +description: + human: + en_US: A tool for searching literature information using CrossRef by DOI. + zh_Hans: 一个使用CrossRef通过DOI获取文献信息的工具。 + pt_BR: A tool for searching literature information using CrossRef by DOI. + llm: A tool for searching literature information using CrossRef by DOI. +parameters: + - name: doi + type: string + required: true + label: + en_US: DOI + zh_Hans: DOI + pt_BR: DOI + llm_description: DOI for searching in CrossRef + form: llm diff --git a/api/core/tools/provider/builtin/crossref/tools/query_title.py b/api/core/tools/provider/builtin/crossref/tools/query_title.py new file mode 100644 index 0000000000..946aa6dc94 --- /dev/null +++ b/api/core/tools/provider/builtin/crossref/tools/query_title.py @@ -0,0 +1,120 @@ +import time +from typing import Any, Union + +import requests + +from core.tools.entities.tool_entities import ToolInvokeMessage +from core.tools.tool.builtin_tool import BuiltinTool + + +def convert_time_str_to_seconds(time_str: str) -> int: + """ + Convert a time string to seconds. + example: 1s -> 1, 1m30s -> 90, 1h30m -> 5400, 1h30m30s -> 5430 + """ + time_str = time_str.lower().strip().replace(' ', '') + seconds = 0 + if 'h' in time_str: + hours, time_str = time_str.split('h') + seconds += int(hours) * 3600 + if 'm' in time_str: + minutes, time_str = time_str.split('m') + seconds += int(minutes) * 60 + if 's' in time_str: + seconds += int(time_str.replace('s', '')) + return seconds + + +class CrossRefQueryTitleAPI: + """ + Tool for querying the metadata of a publication using its title. + Crossref API doc: https://github.com/CrossRef/rest-api-doc + """ + query_url_template: str = "https://api.crossref.org/works?query.bibliographic={query}&rows={rows}&offset={offset}&sort={sort}&order={order}&mailto={mailto}" + rate_limit: int = 50 + rate_interval: float = 1 + max_limit: int = 1000 + + def __init__(self, mailto: str): + self.mailto = mailto + + def _query(self, query: str, rows: int = 5, offset: int = 0, sort: str = 'relevance', order: str = 'desc', fuzzy_query: bool = False) -> list[dict]: + """ + Query the metadata of a publication using its title. + :param query: the title of the publication + :param rows: the number of results to return + :param sort: the sort field + :param order: the sort order + :param fuzzy_query: whether to return all items that match the query + """ + url = self.query_url_template.format(query=query, rows=rows, offset=offset, sort=sort, order=order, mailto=self.mailto) + response = requests.get(url) + response.raise_for_status() + rate_limit = int(response.headers['x-ratelimit-limit']) + # convert time string to seconds + rate_interval = convert_time_str_to_seconds(response.headers['x-ratelimit-interval']) + + self.rate_limit = rate_limit + self.rate_interval = rate_interval + + response = response.json() + if response['status'] != 'ok': + return [] + + message = response['message'] + if fuzzy_query: + # fuzzy query return all items + return message['items'] + else: + for paper in message['items']: + title = paper['title'][0] + if title.lower() != query.lower(): + continue + return [paper] + return [] + + def query(self, query: str, rows: int = 5, sort: str = 'relevance', order: str = 'desc', fuzzy_query: bool = False) -> list[dict]: + """ + Query the metadata of a publication using its title. + :param query: the title of the publication + :param rows: the number of results to return + :param sort: the sort field + :param order: the sort order + :param fuzzy_query: whether to return all items that match the query + """ + rows = min(rows, self.max_limit) + if rows > self.rate_limit: + # query multiple times + query_times = rows // self.rate_limit + 1 + results = [] + + for i in range(query_times): + result = self._query(query, rows=self.rate_limit, offset=i * self.rate_limit, sort=sort, order=order, fuzzy_query=fuzzy_query) + if fuzzy_query: + results.extend(result) + else: + # fuzzy_query=False, only one result + if result: + return result + time.sleep(self.rate_interval) + return results + else: + # query once + return self._query(query, rows, sort=sort, order=order, fuzzy_query=fuzzy_query) + + +class CrossRefQueryTitleTool(BuiltinTool): + """ + Tool for querying the metadata of a publication using its title. + """ + def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]: + query = tool_parameters.get('query') + fuzzy_query = tool_parameters.get('fuzzy_query', False) + rows = tool_parameters.get('rows', 3) + sort = tool_parameters.get('sort', 'relevance') + order = tool_parameters.get('order', 'desc') + mailto = self.runtime.credentials['mailto'] + + result = CrossRefQueryTitleAPI(mailto).query(query, rows, sort, order, fuzzy_query) + + return [self.create_json_message(r) for r in result] diff --git a/api/core/tools/provider/builtin/crossref/tools/query_title.yaml b/api/core/tools/provider/builtin/crossref/tools/query_title.yaml new file mode 100644 index 0000000000..5579c77f52 --- /dev/null +++ b/api/core/tools/provider/builtin/crossref/tools/query_title.yaml @@ -0,0 +1,105 @@ +identity: + name: crossref_query_title + author: Sakura4036 + label: + en_US: CrossRef Title Query + zh_Hans: CrossRef 标题查询 + pt_BR: CrossRef Title Query +description: + human: + en_US: A tool for querying literature information using CrossRef by title. + zh_Hans: 一个使用CrossRef通过标题搜索文献信息的工具。 + pt_BR: A tool for querying literature information using CrossRef by title. + llm: A tool for querying literature information using CrossRef by title. +parameters: + - name: query + type: string + required: true + label: + en_US: 标题 + zh_Hans: 查询语句 + pt_BR: 标题 + human_description: + en_US: Query bibliographic information, useful for citation look up. Includes titles, authors, ISSNs and publication years + zh_Hans: 用于搜索文献信息,有助于查找引用。包括标题,作者,ISSN和出版年份 + pt_BR: Query bibliographic information, useful for citation look up. Includes titles, authors, ISSNs and publication years + llm_description: key words for querying in Web of Science + form: llm + - name: fuzzy_query + type: boolean + default: false + label: + en_US: Whether to fuzzy search + zh_Hans: 是否模糊搜索 + pt_BR: Whether to fuzzy search + human_description: + en_US: used for selecting the query type, fuzzy query returns more results, precise query returns 1 or none + zh_Hans: 用于选择搜索类型,模糊搜索返回更多结果,精确搜索返回1条结果或无 + pt_BR: used for selecting the query type, fuzzy query returns more results, precise query returns 1 or none + form: form + - name: limit + type: number + required: false + label: + en_US: max query number + zh_Hans: 最大搜索数 + pt_BR: max query number + human_description: + en_US: max query number(fuzzy search returns the maximum number of results or precise search the maximum number of matches) + zh_Hans: 最大搜索数(模糊搜索返回的最大结果数或精确搜索最大匹配数) + pt_BR: max query number(fuzzy search returns the maximum number of results or precise search the maximum number of matches) + form: llm + default: 50 + - name: sort + type: select + required: true + options: + - value: relevance + label: + en_US: relevance + zh_Hans: 相关性 + pt_BR: relevance + - value: published + label: + en_US: publication date + zh_Hans: 出版日期 + pt_BR: publication date + - value: references-count + label: + en_US: references-count + zh_Hans: 引用次数 + pt_BR: references-count + default: relevance + label: + en_US: sorting field + zh_Hans: 排序字段 + pt_BR: sorting field + human_description: + en_US: Sorting of query results + zh_Hans: 检索结果的排序字段 + pt_BR: Sorting of query results + form: form + - name: order + type: select + required: true + options: + - value: desc + label: + en_US: descending + zh_Hans: 降序 + pt_BR: descending + - value: asc + label: + en_US: ascending + zh_Hans: 升序 + pt_BR: ascending + default: desc + label: + en_US: Order + zh_Hans: 排序 + pt_BR: Order + human_description: + en_US: Order of query results + zh_Hans: 检索结果的排序方式 + pt_BR: Order of query results + form: form