diff --git a/api/apps/auth/README.md b/api/apps/auth/README.md new file mode 100644 index 000000000..e2c42d0b4 --- /dev/null +++ b/api/apps/auth/README.md @@ -0,0 +1,68 @@ +# Auth + +The Auth module provides implementations of OAuth2 and OpenID Connect (OIDC) authentication for integration with third-party identity providers. + +**Features** + +- Supports both OAuth2 and OIDC authentication protocols +- Automatic OIDC configuration discovery (via `/.well-known/openid-configuration`) +- JWT token validation +- Unified user information handling + +## Usage + +```python +# OAuth2 configuration +oauth_config = { + "type": "oauth2", + "client_id": "your_client_id", + "client_secret": "your_client_secret", + "authorization_url": "https://provider.com/oauth/authorize", + "token_url": "https://provider.com/oauth/token", + "userinfo_url": "https://provider.com/oauth/userinfo", + "redirect_uri": "https://your-app.com/oauth/callback/" +} + +# OIDC configuration +oidc_config = { + "type": "oidc", + "issuer": "https://provider.com/v1/oidc", + "client_id": "your_client_id", + "client_secret": "your_client_secret", + "redirect_uri": "https://your-app.com/oauth/callback/" +} + +# Get client instance +client = get_auth_client(oauth_config) # or oidc_config +``` + +### Authentication Flow + +1. Get authorization URL: +```python +auth_url = client.get_authorization_url() +``` + +2. After user authorization, exchange authorization code for token: +```python +token_response = client.exchange_code_for_token(authorization_code) +access_token = token_response["access_token"] +``` + +3. Fetch user information: +```python +user_info = client.fetch_user_info(access_token) +``` + +## User Information Structure + +All authentication methods return user information following this structure: + +```python +{ + "email": "user@example.com", + "username": "username", + "nickname": "User Name", + "avatar_url": "https://example.com/avatar.jpg" +} +``` diff --git a/api/apps/auth/__init__.py b/api/apps/auth/__init__.py new file mode 100644 index 000000000..3a6437f30 --- /dev/null +++ b/api/apps/auth/__init__.py @@ -0,0 +1,38 @@ +# +# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from .oauth import OAuthClient +from .oidc import OIDCClient + + +CLIENT_TYPES = { + "oauth2": OAuthClient, + "oidc": OIDCClient +} + + +def get_auth_client(config)->OAuthClient: + channel_type = str(config.get("type", "")).lower() + if channel_type == "": + if config.get("issuer"): + channel_type = "oidc" + else: + channel_type = "oauth2" + client_class = CLIENT_TYPES.get(channel_type) + if not client_class: + raise ValueError(f"Unsupported type: {channel_type}") + + return client_class(config) diff --git a/api/apps/auth/oauth.py b/api/apps/auth/oauth.py new file mode 100644 index 000000000..5f2188fb2 --- /dev/null +++ b/api/apps/auth/oauth.py @@ -0,0 +1,106 @@ +# +# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import requests +import urllib.parse + + +class UserInfo: + def __init__(self, email, username, nickname, avatar_url): + self.email = email + self.username = username + self.nickname = nickname + self.avatar_url = avatar_url + + def to_dict(self): + return {key: value for key, value in self.__dict__.items()} + + +class OAuthClient: + def __init__(self, config): + """ + Initialize the OAuthClient with the provider's configuration. + """ + self.client_id = config["client_id"] + self.client_secret = config["client_secret"] + self.authorization_url = config["authorization_url"] + self.token_url = config["token_url"] + self.userinfo_url = config["userinfo_url"] + self.redirect_uri = config["redirect_uri"] + self.scope = config.get("scope", None) + + self.http_request_timeout = 7 + + + def get_authorization_url(self): + """ + Generate the authorization URL for user login. + """ + params = { + "client_id": self.client_id, + "redirect_uri": self.redirect_uri, + "response_type": "code", + } + if self.scope: + params["scope"] = self.scope + authorization_url = f"{self.authorization_url}?{urllib.parse.urlencode(params)}" + return authorization_url + + + def exchange_code_for_token(self, code): + """ + Exchange authorization code for access token. + """ + try: + payload = { + "client_id": self.client_id, + "client_secret": self.client_secret, + "code": code, + "redirect_uri": self.redirect_uri, + "grant_type": "authorization_code" + } + response = requests.post( + self.token_url, + data=payload, + headers={"Accept": "application/json"}, + timeout=self.http_request_timeout + ) + response.raise_for_status() + return response.json() + except requests.exceptions.RequestException as e: + raise ValueError(f"Failed to exchange authorization code for token: {e}") + + + def fetch_user_info(self, access_token, **kwargs): + """ + Fetch user information using access token. + """ + try: + headers = {"Authorization": f"Bearer {access_token}"} + response = requests.get(self.userinfo_url, headers=headers, timeout=self.http_request_timeout) + response.raise_for_status() + user_info = response.json() + return self.normalize_user_info(user_info) + except requests.exceptions.RequestException as e: + raise ValueError(f"Failed to fetch user info: {e}") + + + def normalize_user_info(self, user_info): + email = user_info.get("email") + username = user_info.get("username", str(email).split("@")[0]) + nickname = user_info.get("nickname", username) + avatar_url = user_info.get("picture", "") + return UserInfo(email=email, username=username, nickname=nickname, avatar_url=avatar_url) diff --git a/api/apps/auth/oidc.py b/api/apps/auth/oidc.py new file mode 100644 index 000000000..318f00ad1 --- /dev/null +++ b/api/apps/auth/oidc.py @@ -0,0 +1,102 @@ +# +# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import jwt +import requests +from .oauth import OAuthClient + + +class OIDCClient(OAuthClient): + def __init__(self, config): + """ + Initialize the OIDCClient with the provider's configuration. + Use `issuer` as the single source of truth for configuration discovery. + """ + self.issuer = config.get("issuer") + if not self.issuer: + raise ValueError("Missing issuer in configuration.") + + oidc_metadata = self._load_oidc_metadata(self.issuer) + config.update({ + 'issuer': oidc_metadata['issuer'], + 'jwks_uri': oidc_metadata['jwks_uri'], + 'authorization_url': oidc_metadata['authorization_endpoint'], + 'token_url': oidc_metadata['token_endpoint'], + 'userinfo_url': oidc_metadata['userinfo_endpoint'] + }) + + super().__init__(config) + self.jwks_uri = config['jwks_uri'] + + + def _load_oidc_metadata(self, issuer): + """ + Load OIDC metadata from `/.well-known/openid-configuration`. + """ + try: + metadata_url = f"{issuer}/.well-known/openid-configuration" + response = requests.get(metadata_url, timeout=7) + response.raise_for_status() + return response.json() + except requests.exceptions.RequestException as e: + raise ValueError(f"Failed to fetch OIDC metadata: {e}") + + + def parse_id_token(self, id_token): + """ + Parse and validate OIDC ID Token (JWT format) with signature verification. + """ + try: + # Decode JWT header to extract key ID (kid) without verifying signature + headers = jwt.get_unverified_header(id_token) + kid = headers.get("kid") + if not kid: + raise ValueError("ID Token missing 'kid' in header") + + # OIDC usually uses `RS256` for signing + alg = headers.get("alg", "RS256") + + # Use PyJWT's PyJWKClient to fetch JWKS and find signing key + jwks_url = f"{self.issuer}/.well-known/jwks.json" + jwks_cli = jwt.PyJWKClient(jwks_url) + signing_key = jwks_cli.get_signing_key_from_jwt(id_token).key + + # Decode and verify signature + decoded_token = jwt.decode( + id_token, + key=signing_key, + algorithms=[alg], + audience=self.client_id, + issuer=self.issuer, + ) + return decoded_token + except Exception as e: + raise ValueError(f"Error parsing ID Token: {e}") + + + def fetch_user_info(self, access_token, id_token=None, **kwargs): + """ + Fetch user info. + """ + user_info = {} + if id_token: + user_info = self.parse_id_token(id_token) + user_info.update(super().fetch_user_info(access_token).to_dict()) + return self.normalize_user_info(user_info) + + + def normalize_user_info(self, user_info): + return super().normalize_user_info(user_info) diff --git a/api/apps/user_app.py b/api/apps/user_app.py index 89e771448..3749c8a12 100644 --- a/api/apps/user_app.py +++ b/api/apps/user_app.py @@ -42,6 +42,7 @@ from api import settings from api.db.services.user_service import UserService, TenantService, UserTenantService from api.db.services.file_service import FileService from api.utils.api_utils import get_json_result, construct_response +from api.apps.auth import get_auth_client @manager.route("/login", methods=["POST", "GET"]) # noqa: F821 @@ -115,6 +116,96 @@ def login(): ) +@manager.route("/login/") # noqa: F821 +def oauth_login(channel): + channel_config = settings.OAUTH_CONFIG.get(channel) + if not channel_config: + raise ValueError(f"Invalid channel name: {channel}") + auth_cli = get_auth_client(channel_config) + + auth_url = auth_cli.get_authorization_url() + return redirect(auth_url) + + +@manager.route("/oauth/callback/", methods=["GET"]) # noqa: F821 +def oauth_callback(channel): + """ + Handle the OAuth/OIDC callback for various channels dynamically. + """ + try: + channel_config = settings.OAUTH_CONFIG.get(channel) + if not channel_config: + raise ValueError(f"Invalid channel name: {channel}") + auth_cli = get_auth_client(channel_config) + + # Obtain the authorization code + code = request.args.get("code") + if not code: + return redirect("/?error=missing_code") + + # Exchange authorization code for access token + token_info = auth_cli.exchange_code_for_token(code) + access_token = token_info.get("access_token") + if not access_token: + return redirect("/?error=token_failed") + + id_token = token_info.get("id_token") + + # Fetch user info + user_info = auth_cli.fetch_user_info(access_token, id_token=id_token) + if not user_info.email: + return redirect("/?error=email_missing") + + # Login or register + users = UserService.query(email=user_info.email) + user_id = get_uuid() + + if not users: + try: + try: + avatar = download_img(user_info.avatar_url) + except Exception as e: + logging.exception(e) + avatar = "" + + users = user_register( + user_id, + { + "access_token": access_token, + "email": user_info.email, + "avatar": avatar, + "nickname": user_info.nickname, + "login_channel": channel, + "last_login_time": get_format_time(), + "is_superuser": False, + }, + ) + + if not users: + raise Exception(f"Failed to register {user_info.email}") + if len(users) > 1: + raise Exception(f"Same email: {user_info.email} exists!") + + # Try to log in + user = users[0] + login_user(user) + return redirect(f"/?auth_success=true&user_id={user.get_id()}") + + except Exception as e: + rollback_user_registration(user_id) + logging.exception(e) + return redirect(f"/?error={str(e)}") + + # User exists, try to log in + user = users[0] + user.access_token = get_uuid() + login_user(user) + user.save() + return redirect(f"/?auth_success=true&user_id={user.get_id()}") + except Exception as e: + return redirect(f"/?error={str(e)}") + + @manager.route("/github_callback", methods=["GET"]) # noqa: F821 def github_callback(): """ diff --git a/api/settings.py b/api/settings.py index 30db67a51..f92e96bae 100644 --- a/api/settings.py +++ b/api/settings.py @@ -56,7 +56,7 @@ CLIENT_AUTHENTICATION = None HTTP_APP_KEY = None GITHUB_OAUTH = None FEISHU_OAUTH = None - +OAUTH_CONFIG = None DOC_ENGINE = None docStoreConn = None @@ -119,7 +119,7 @@ def init_settings(): RAG_FLOW_SERVICE_NAME, {}).get("secret_key", str(date.today())) - global AUTHENTICATION_CONF, CLIENT_AUTHENTICATION, HTTP_APP_KEY, GITHUB_OAUTH, FEISHU_OAUTH + global AUTHENTICATION_CONF, CLIENT_AUTHENTICATION, HTTP_APP_KEY, GITHUB_OAUTH, FEISHU_OAUTH, OAUTH_CONFIG # authentication AUTHENTICATION_CONF = get_base_config("authentication", {}) @@ -131,6 +131,8 @@ def init_settings(): GITHUB_OAUTH = get_base_config("oauth", {}).get("github") FEISHU_OAUTH = get_base_config("oauth", {}).get("feishu") + OAUTH_CONFIG = get_base_config("oauth", {}) + global DOC_ENGINE, docStoreConn, retrievaler, kg_retrievaler DOC_ENGINE = os.environ.get('DOC_ENGINE', "elasticsearch") # DOC_ENGINE = os.environ.get('DOC_ENGINE', "opensearch") diff --git a/conf/service_conf.yaml b/conf/service_conf.yaml index 8f249d26c..6e2a8d0d8 100644 --- a/conf/service_conf.yaml +++ b/conf/service_conf.yaml @@ -73,6 +73,13 @@ redis: # app_access_token_url: https://open.feishu.cn/open-apis/auth/v3/app_access_token/internal # user_access_token_url: https://open.feishu.cn/open-apis/authen/v1/oidc/access_token # grant_type: 'authorization_code' +# custom_channel: +# type: oidc +# issuer: https://provider.com/v1/oidc +# client_id: xxxxxxxxxxxxxxxxxxxxxxxxx +# client_secret: xxxxxxxxxxxxxxxxxxxxxxxx +# scope: "openid email profile" +# redirect_uri: https://your-app.com/oauth/callback/custom_channel # authentication: # client: # switch: false diff --git a/docker/service_conf.yaml.template b/docker/service_conf.yaml.template index aaf8fa708..c35373988 100644 --- a/docker/service_conf.yaml.template +++ b/docker/service_conf.yaml.template @@ -85,6 +85,13 @@ redis: # app_access_token_url: https://open.feishu.cn/open-apis/auth/v3/app_access_token/internal # user_access_token_url: https://open.feishu.cn/open-apis/authen/v1/oidc/access_token # grant_type: 'authorization_code' +# custom_channel: +# type: oidc +# issuer: https://provider.com/v1/oidc +# client_id: xxxxxxxxxxxxxxxxxxxxxxxxx +# client_secret: xxxxxxxxxxxxxxxxxxxxxxxx +# scope: "openid email profile" +# redirect_uri: https://your-app.com/oauth/callback/custom_channel # authentication: # client: # switch: false diff --git a/docs/configurations.md b/docs/configurations.md index 2362a1062..b930ceca3 100644 --- a/docs/configurations.md +++ b/docs/configurations.md @@ -168,6 +168,20 @@ The OAuth configuration for signing up or signing in to RAGFlow using a third-pa - `github`: The GitHub authentication settings for your application. Visit the [GitHub Developer Settings](https://github.com/settings/developers) page to obtain your client_id and secret_key. +#### OAuth/OIDC + +RAGFlow supports OAuth/OIDC authentication through the following routes: + +- `/login/`: Initiates the OAuth flow for the specified channel +- `/oauth/callback/`: Handles the OAuth callback after successful authentication + +The callback URL should be configured in your OAuth provider as: +``` +https://your-app.com/oauth/callback/ +``` + +For detailed instructions on configuring **service_conf.yaml.template**, please refer to [Usage](../api/apps/auth/README.md#usage). + ### `user_default_llm` The default LLM to use for a new RAGFlow user. It is disabled by default. To enable this feature, uncomment the corresponding lines in **service_conf.yaml.template**.