diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index b18117f5..31c848b7 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -60,7 +60,9 @@ export async function extractController( if ( (await getTeamIdSyncB(req.auth.team_id)) && req.body.origin !== "api-sdk" && - req.body.origin !== "website" + req.body.origin !== "website" && + !req.body.origin.startsWith("python-sdk@") && + !req.body.origin.startsWith("js-sdk@") ) { return await oldExtract(req, res, extractId); } diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index ab09432e..8e4eca61 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -474,11 +474,26 @@ export interface GenerateLLMsTextStatusResponse { export default class FirecrawlApp { public apiKey: string; public apiUrl: string; - + public version: string = "1.19.1"; + private isCloudService(url: string): boolean { return url.includes('api.firecrawl.dev'); } + private async getVersion(): Promise { + try { + const packageJson = await import('../package.json', { assert: { type: 'json' } }); + return packageJson.default.version; + } catch (error) { + console.error("Error getting version:", error); + return "1.19.1"; + } + } + + private async init() { + this.version = await this.getVersion(); + } + /** * Initializes a new instance of the FirecrawlApp class. * @param config - Configuration options for the FirecrawlApp instance. @@ -492,6 +507,7 @@ export default class FirecrawlApp { this.apiKey = apiKey || ''; this.apiUrl = baseUrl; + this.init(); } /** @@ -508,7 +524,7 @@ export default class FirecrawlApp { "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}`, } as AxiosRequestHeaders; - let jsonData: any = { url, ...params }; + let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` }; if (jsonData?.extract?.schema) { let schema = jsonData.extract.schema; @@ -590,7 +606,7 @@ export default class FirecrawlApp { lang: params?.lang ?? "en", country: params?.country ?? "us", location: params?.location, - origin: params?.origin ?? "api", + origin: `js-sdk@${this.version}`, timeout: params?.timeout ?? 60000, scrapeOptions: params?.scrapeOptions ?? { formats: [] }, }; @@ -662,7 +678,7 @@ export default class FirecrawlApp { idempotencyKey?: string ): Promise { const headers = this.prepareHeaders(idempotencyKey); - let jsonData: any = { url, ...params }; + let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` }; try { const response: AxiosResponse = await this.postRequest( this.apiUrl + `/v1/crawl`, @@ -691,7 +707,7 @@ export default class FirecrawlApp { idempotencyKey?: string ): Promise { const headers = this.prepareHeaders(idempotencyKey); - let jsonData: any = { url, ...params }; + let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` }; try { const response: AxiosResponse = await this.postRequest( this.apiUrl + `/v1/crawl`, @@ -867,7 +883,7 @@ export default class FirecrawlApp { */ async mapUrl(url: string, params?: MapParams): Promise { const headers = this.prepareHeaders(); - let jsonData: { url: string } & MapParams = { url, ...params }; + let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` }; try { const response: AxiosResponse = await this.postRequest( @@ -904,7 +920,7 @@ export default class FirecrawlApp { ignoreInvalidURLs?: boolean, ): Promise { const headers = this.prepareHeaders(idempotencyKey); - let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params }; + let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` }; if (jsonData?.extract?.schema) { let schema = jsonData.extract.schema; @@ -969,7 +985,7 @@ export default class FirecrawlApp { ignoreInvalidURLs?: boolean, ): Promise { const headers = this.prepareHeaders(idempotencyKey); - let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...(params ?? {}) }; + let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` }; try { const response: AxiosResponse = await this.postRequest( this.apiUrl + `/v1/batch/scrape`, @@ -1143,7 +1159,7 @@ export default class FirecrawlApp { try { const response: AxiosResponse = await this.postRequest( this.apiUrl + `/v1/extract`, - { ...jsonData, schema: jsonSchema, origin: params?.origin || "api-sdk" }, + { ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` }, headers ); @@ -1211,7 +1227,7 @@ export default class FirecrawlApp { try { const response: AxiosResponse = await this.postRequest( this.apiUrl + `/v1/extract`, - { ...jsonData, schema: jsonSchema }, + { ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` }, headers ); @@ -1497,10 +1513,11 @@ export default class FirecrawlApp { */ async asyncDeepResearch(query: string, params: DeepResearchParams): Promise { const headers = this.prepareHeaders(); + let jsonData: any = { query, ...params, origin: `js-sdk@${this.version}` }; try { const response: AxiosResponse = await this.postRequest( - `${this.apiUrl}/v1/deep-research`, - { query, ...params }, + this.apiUrl + `/v1/deep-research`, + jsonData, headers ); @@ -1632,9 +1649,10 @@ export default class FirecrawlApp { async __asyncDeepResearch(topic: string, params: DeepResearchParams): Promise { const headers = this.prepareHeaders(); try { + let jsonData: any = { topic, ...params, origin: `js-sdk@${this.version}` }; const response: AxiosResponse = await this.postRequest( `${this.apiUrl}/v1/deep-research`, - { topic, ...params }, + jsonData, headers ); @@ -1744,10 +1762,11 @@ export default class FirecrawlApp { */ async asyncGenerateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise { const headers = this.prepareHeaders(); + let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` }; try { const response: AxiosResponse = await this.postRequest( `${this.apiUrl}/v1/llmstxt`, - { url, ...params }, + jsonData, headers ); diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index 990599cc..adedce9c 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -15,6 +15,7 @@ import time from typing import Any, Dict, Optional, List, Union, Callable, Literal, TypeVar, Generic import json from datetime import datetime +import re import requests import pydantic @@ -22,6 +23,20 @@ import websockets import aiohttp import asyncio +def get_version(): + try: + from pathlib import Path + package_path = os.path.dirname(__file__) + version_file = Path(os.path.join(package_path, '__init__.py')).read_text() + version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", version_file, re.M) + if version_match: + return version_match.group(1).strip() + except Exception: + print("Failed to get version from __init__.py") + return None + +version = get_version() + logger : logging.Logger = logging.getLogger("firecrawl") T = TypeVar('T') @@ -424,6 +439,7 @@ class FirecrawlApp: if key not in ['jsonOptions']: scrape_params[key] = value + scrape_params['origin'] = f"python-sdk@{version}" endpoint = f'/v1/scrape' # Make the POST request with the prepared headers and JSON data @@ -489,10 +505,13 @@ class FirecrawlApp: search_params = params search_params.query = query + params_dict = search_params.dict(exclude_none=True) + params_dict['origin'] = f"python-sdk@{version}" + response = requests.post( f"{self.api_url}/v1/search", headers={"Authorization": f"Bearer {self.api_key}"}, - json=search_params.dict(exclude_none=True) + json=params_dict ) if response.status_code != 200: @@ -548,6 +567,7 @@ class FirecrawlApp: json_data = {'url': url} if params: json_data.update(params) + json_data['origin'] = f"python-sdk@{version}" response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers) if response.status_code == 200: try: @@ -609,6 +629,7 @@ class FirecrawlApp: json_data = {'url': url} if params: json_data.update(params) + json_data['origin'] = f"python-sdk@{version}" response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers) if response.status_code == 200: try: @@ -835,6 +856,7 @@ class FirecrawlApp: json_data = {'url': url} if params: json_data.update(params) + json_data['origin'] = f"python-sdk@{version}" # Make the POST request with the prepared headers and JSON data response = requests.post( @@ -897,6 +919,7 @@ class FirecrawlApp: json_data = {'urls': urls} if params: json_data.update(params) + json_data['origin'] = f"python-sdk@{version}" response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers) if response.status_code == 200: try: @@ -953,6 +976,7 @@ class FirecrawlApp: json_data = {'urls': urls} if params: json_data.update(params) + json_data['origin'] = f"python-sdk@{version}" response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers) if response.status_code == 200: try: @@ -1153,7 +1177,7 @@ class FirecrawlApp: 'enableWebSearch': params.get('enable_web_search', params.get('enableWebSearch', False)), 'showSources': params.get('show_sources', params.get('showSources', False)), 'schema': schema, - 'origin': 'api-sdk' + 'origin': f'python-sdk@{get_version()}' } # Only add prompt and systemPrompt if they exist @@ -1284,7 +1308,7 @@ class FirecrawlApp: **jsonData, 'allowExternalLinks': params.get('allow_external_links', False) if params else False, 'schema': schema, - 'origin': 'api-sdk' + 'origin': f'python-sdk@{version}' } try: @@ -1387,6 +1411,7 @@ class FirecrawlApp: headers = self._prepare_headers() json_data = {'url': url, **generation_params.dict(exclude_none=True)} + json_data['origin'] = f"python-sdk@{version}" try: response = self._post_request(f'{self.api_url}/v1/llmstxt', json_data, headers) @@ -1770,6 +1795,7 @@ class FirecrawlApp: headers = self._prepare_headers() json_data = {'query': query, **research_params.dict(exclude_none=True)} + json_data['origin'] = f"python-sdk@{version}" try: response = self._post_request(f'{self.api_url}/v1/deep-research', json_data, headers) @@ -2178,7 +2204,7 @@ class AsyncFirecrawlApp(FirecrawlApp): Exception: If scraping fails """ headers = self._prepare_headers() - scrape_params = {'url': url} + scrape_params = {'url': url, 'origin': f'python-sdk@{version}'} if params: extract = params.get('extract', {}) @@ -2245,6 +2271,7 @@ class AsyncFirecrawlApp(FirecrawlApp): json_data = {'urls': urls} if params: json_data.update(params) + json_data['origin'] = f"python-sdk@{version}" endpoint = f'/v1/batch/scrape' response = await self._async_post_request( @@ -2301,6 +2328,7 @@ class AsyncFirecrawlApp(FirecrawlApp): json_data = {'urls': urls} if params: json_data.update(params) + json_data['origin'] = f"python-sdk@{version}" endpoint = f'/v1/batch/scrape' return await self._async_post_request( @@ -2355,6 +2383,7 @@ class AsyncFirecrawlApp(FirecrawlApp): json_data = {'url': url} if params: json_data.update(params) + json_data['origin'] = f"python-sdk@{version}" endpoint = f'/v1/crawl' response = await self._async_post_request( @@ -2413,6 +2442,7 @@ class AsyncFirecrawlApp(FirecrawlApp): json_data = {'url': url} if params: json_data.update(params) + json_data['origin'] = f"python-sdk@{version}" endpoint = f'/v1/crawl' return await self._async_post_request( @@ -2564,6 +2594,7 @@ class AsyncFirecrawlApp(FirecrawlApp): json_data = {'url': url} if params: json_data.update(params) + json_data['origin'] = f"python-sdk@{version}" endpoint = f'/v1/map' response = await self._async_post_request( @@ -2628,7 +2659,7 @@ class AsyncFirecrawlApp(FirecrawlApp): 'enableWebSearch': params.get('enable_web_search', params.get('enableWebSearch', False)), 'showSources': params.get('show_sources', params.get('showSources', False)), 'schema': schema, - 'origin': 'api-sdk' + 'origin': f'python-sdk@{version}' } if params.get('prompt'): @@ -2876,7 +2907,7 @@ class AsyncFirecrawlApp(FirecrawlApp): **jsonData, 'allowExternalLinks': params.get('allow_external_links', False) if params else False, 'schema': schema, - 'origin': 'api-sdk' + 'origin': f'python-sdk@{version}' } try: @@ -2975,6 +3006,7 @@ class AsyncFirecrawlApp(FirecrawlApp): headers = self._prepare_headers() json_data = {'url': url, **generation_params.dict(exclude_none=True)} + json_data['origin'] = f"python-sdk@{version}" try: return await self._async_post_request( @@ -3132,7 +3164,7 @@ class AsyncFirecrawlApp(FirecrawlApp): headers = self._prepare_headers() json_data = {'query': query, **research_params.dict(exclude_none=True)} - + json_data['origin'] = f"python-sdk@{version}" try: return await self._async_post_request( f'{self.api_url}/v1/deep-research', @@ -3217,9 +3249,12 @@ class AsyncFirecrawlApp(FirecrawlApp): search_params = params search_params.query = query + search_params_dict = search_params.dict(exclude_none=True) + search_params_dict['origin'] = f"python-sdk@{version}" + return await self._async_post_request( f"{self.api_url}/v1/search", - search_params.dict(exclude_none=True), + search_params_dict, {"Authorization": f"Bearer {self.api_key}"} )