added origin to requests

This commit is contained in:
rafaelmmiller 2025-03-19 09:45:51 -03:00
parent cc255d488e
commit 4f984d3fde
3 changed files with 79 additions and 23 deletions

View File

@ -60,7 +60,9 @@ export async function extractController(
if ( if (
(await getTeamIdSyncB(req.auth.team_id)) && (await getTeamIdSyncB(req.auth.team_id)) &&
req.body.origin !== "api-sdk" && req.body.origin !== "api-sdk" &&
req.body.origin !== "website" req.body.origin !== "website" &&
!req.body.origin.startsWith("python-sdk@") &&
!req.body.origin.startsWith("js-sdk@")
) { ) {
return await oldExtract(req, res, extractId); return await oldExtract(req, res, extractId);
} }

View File

@ -474,11 +474,26 @@ export interface GenerateLLMsTextStatusResponse {
export default class FirecrawlApp { export default class FirecrawlApp {
public apiKey: string; public apiKey: string;
public apiUrl: string; public apiUrl: string;
public version: string = "1.19.1";
private isCloudService(url: string): boolean { private isCloudService(url: string): boolean {
return url.includes('api.firecrawl.dev'); return url.includes('api.firecrawl.dev');
} }
private async getVersion(): Promise<string> {
try {
const packageJson = await import('../package.json', { assert: { type: 'json' } });
return packageJson.default.version;
} catch (error) {
console.error("Error getting version:", error);
return "1.19.1";
}
}
private async init() {
this.version = await this.getVersion();
}
/** /**
* Initializes a new instance of the FirecrawlApp class. * Initializes a new instance of the FirecrawlApp class.
* @param config - Configuration options for the FirecrawlApp instance. * @param config - Configuration options for the FirecrawlApp instance.
@ -492,6 +507,7 @@ export default class FirecrawlApp {
this.apiKey = apiKey || ''; this.apiKey = apiKey || '';
this.apiUrl = baseUrl; this.apiUrl = baseUrl;
this.init();
} }
/** /**
@ -508,7 +524,7 @@ export default class FirecrawlApp {
"Content-Type": "application/json", "Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`, Authorization: `Bearer ${this.apiKey}`,
} as AxiosRequestHeaders; } as AxiosRequestHeaders;
let jsonData: any = { url, ...params }; let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
if (jsonData?.extract?.schema) { if (jsonData?.extract?.schema) {
let schema = jsonData.extract.schema; let schema = jsonData.extract.schema;
@ -590,7 +606,7 @@ export default class FirecrawlApp {
lang: params?.lang ?? "en", lang: params?.lang ?? "en",
country: params?.country ?? "us", country: params?.country ?? "us",
location: params?.location, location: params?.location,
origin: params?.origin ?? "api", origin: `js-sdk@${this.version}`,
timeout: params?.timeout ?? 60000, timeout: params?.timeout ?? 60000,
scrapeOptions: params?.scrapeOptions ?? { formats: [] }, scrapeOptions: params?.scrapeOptions ?? { formats: [] },
}; };
@ -662,7 +678,7 @@ export default class FirecrawlApp {
idempotencyKey?: string idempotencyKey?: string
): Promise<CrawlStatusResponse | ErrorResponse> { ): Promise<CrawlStatusResponse | ErrorResponse> {
const headers = this.prepareHeaders(idempotencyKey); const headers = this.prepareHeaders(idempotencyKey);
let jsonData: any = { url, ...params }; let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
try { try {
const response: AxiosResponse = await this.postRequest( const response: AxiosResponse = await this.postRequest(
this.apiUrl + `/v1/crawl`, this.apiUrl + `/v1/crawl`,
@ -691,7 +707,7 @@ export default class FirecrawlApp {
idempotencyKey?: string idempotencyKey?: string
): Promise<CrawlResponse | ErrorResponse> { ): Promise<CrawlResponse | ErrorResponse> {
const headers = this.prepareHeaders(idempotencyKey); const headers = this.prepareHeaders(idempotencyKey);
let jsonData: any = { url, ...params }; let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
try { try {
const response: AxiosResponse = await this.postRequest( const response: AxiosResponse = await this.postRequest(
this.apiUrl + `/v1/crawl`, this.apiUrl + `/v1/crawl`,
@ -867,7 +883,7 @@ export default class FirecrawlApp {
*/ */
async mapUrl(url: string, params?: MapParams): Promise<MapResponse | ErrorResponse> { async mapUrl(url: string, params?: MapParams): Promise<MapResponse | ErrorResponse> {
const headers = this.prepareHeaders(); const headers = this.prepareHeaders();
let jsonData: { url: string } & MapParams = { url, ...params }; let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
try { try {
const response: AxiosResponse = await this.postRequest( const response: AxiosResponse = await this.postRequest(
@ -904,7 +920,7 @@ export default class FirecrawlApp {
ignoreInvalidURLs?: boolean, ignoreInvalidURLs?: boolean,
): Promise<BatchScrapeStatusResponse | ErrorResponse> { ): Promise<BatchScrapeStatusResponse | ErrorResponse> {
const headers = this.prepareHeaders(idempotencyKey); const headers = this.prepareHeaders(idempotencyKey);
let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params }; let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
if (jsonData?.extract?.schema) { if (jsonData?.extract?.schema) {
let schema = jsonData.extract.schema; let schema = jsonData.extract.schema;
@ -969,7 +985,7 @@ export default class FirecrawlApp {
ignoreInvalidURLs?: boolean, ignoreInvalidURLs?: boolean,
): Promise<BatchScrapeResponse | ErrorResponse> { ): Promise<BatchScrapeResponse | ErrorResponse> {
const headers = this.prepareHeaders(idempotencyKey); const headers = this.prepareHeaders(idempotencyKey);
let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...(params ?? {}) }; let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
try { try {
const response: AxiosResponse = await this.postRequest( const response: AxiosResponse = await this.postRequest(
this.apiUrl + `/v1/batch/scrape`, this.apiUrl + `/v1/batch/scrape`,
@ -1143,7 +1159,7 @@ export default class FirecrawlApp {
try { try {
const response: AxiosResponse = await this.postRequest( const response: AxiosResponse = await this.postRequest(
this.apiUrl + `/v1/extract`, this.apiUrl + `/v1/extract`,
{ ...jsonData, schema: jsonSchema, origin: params?.origin || "api-sdk" }, { ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
headers headers
); );
@ -1211,7 +1227,7 @@ export default class FirecrawlApp {
try { try {
const response: AxiosResponse = await this.postRequest( const response: AxiosResponse = await this.postRequest(
this.apiUrl + `/v1/extract`, this.apiUrl + `/v1/extract`,
{ ...jsonData, schema: jsonSchema }, { ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
headers headers
); );
@ -1497,10 +1513,11 @@ export default class FirecrawlApp {
*/ */
async asyncDeepResearch(query: string, params: DeepResearchParams): Promise<DeepResearchResponse | ErrorResponse> { async asyncDeepResearch(query: string, params: DeepResearchParams): Promise<DeepResearchResponse | ErrorResponse> {
const headers = this.prepareHeaders(); const headers = this.prepareHeaders();
let jsonData: any = { query, ...params, origin: `js-sdk@${this.version}` };
try { try {
const response: AxiosResponse = await this.postRequest( const response: AxiosResponse = await this.postRequest(
`${this.apiUrl}/v1/deep-research`, this.apiUrl + `/v1/deep-research`,
{ query, ...params }, jsonData,
headers headers
); );
@ -1632,9 +1649,10 @@ export default class FirecrawlApp {
async __asyncDeepResearch(topic: string, params: DeepResearchParams): Promise<DeepResearchResponse | ErrorResponse> { async __asyncDeepResearch(topic: string, params: DeepResearchParams): Promise<DeepResearchResponse | ErrorResponse> {
const headers = this.prepareHeaders(); const headers = this.prepareHeaders();
try { try {
let jsonData: any = { topic, ...params, origin: `js-sdk@${this.version}` };
const response: AxiosResponse = await this.postRequest( const response: AxiosResponse = await this.postRequest(
`${this.apiUrl}/v1/deep-research`, `${this.apiUrl}/v1/deep-research`,
{ topic, ...params }, jsonData,
headers headers
); );
@ -1744,10 +1762,11 @@ export default class FirecrawlApp {
*/ */
async asyncGenerateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextResponse | ErrorResponse> { async asyncGenerateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextResponse | ErrorResponse> {
const headers = this.prepareHeaders(); const headers = this.prepareHeaders();
let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
try { try {
const response: AxiosResponse = await this.postRequest( const response: AxiosResponse = await this.postRequest(
`${this.apiUrl}/v1/llmstxt`, `${this.apiUrl}/v1/llmstxt`,
{ url, ...params }, jsonData,
headers headers
); );

View File

@ -15,6 +15,7 @@ import time
from typing import Any, Dict, Optional, List, Union, Callable, Literal, TypeVar, Generic from typing import Any, Dict, Optional, List, Union, Callable, Literal, TypeVar, Generic
import json import json
from datetime import datetime from datetime import datetime
import re
import requests import requests
import pydantic import pydantic
@ -22,6 +23,20 @@ import websockets
import aiohttp import aiohttp
import asyncio import asyncio
def get_version():
try:
from pathlib import Path
package_path = os.path.dirname(__file__)
version_file = Path(os.path.join(package_path, '__init__.py')).read_text()
version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", version_file, re.M)
if version_match:
return version_match.group(1).strip()
except Exception:
print("Failed to get version from __init__.py")
return None
version = get_version()
logger : logging.Logger = logging.getLogger("firecrawl") logger : logging.Logger = logging.getLogger("firecrawl")
T = TypeVar('T') T = TypeVar('T')
@ -424,6 +439,7 @@ class FirecrawlApp:
if key not in ['jsonOptions']: if key not in ['jsonOptions']:
scrape_params[key] = value scrape_params[key] = value
scrape_params['origin'] = f"python-sdk@{version}"
endpoint = f'/v1/scrape' endpoint = f'/v1/scrape'
# Make the POST request with the prepared headers and JSON data # Make the POST request with the prepared headers and JSON data
@ -489,10 +505,13 @@ class FirecrawlApp:
search_params = params search_params = params
search_params.query = query search_params.query = query
params_dict = search_params.dict(exclude_none=True)
params_dict['origin'] = f"python-sdk@{version}"
response = requests.post( response = requests.post(
f"{self.api_url}/v1/search", f"{self.api_url}/v1/search",
headers={"Authorization": f"Bearer {self.api_key}"}, headers={"Authorization": f"Bearer {self.api_key}"},
json=search_params.dict(exclude_none=True) json=params_dict
) )
if response.status_code != 200: if response.status_code != 200:
@ -548,6 +567,7 @@ class FirecrawlApp:
json_data = {'url': url} json_data = {'url': url}
if params: if params:
json_data.update(params) json_data.update(params)
json_data['origin'] = f"python-sdk@{version}"
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers) response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
if response.status_code == 200: if response.status_code == 200:
try: try:
@ -609,6 +629,7 @@ class FirecrawlApp:
json_data = {'url': url} json_data = {'url': url}
if params: if params:
json_data.update(params) json_data.update(params)
json_data['origin'] = f"python-sdk@{version}"
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers) response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
if response.status_code == 200: if response.status_code == 200:
try: try:
@ -835,6 +856,7 @@ class FirecrawlApp:
json_data = {'url': url} json_data = {'url': url}
if params: if params:
json_data.update(params) json_data.update(params)
json_data['origin'] = f"python-sdk@{version}"
# Make the POST request with the prepared headers and JSON data # Make the POST request with the prepared headers and JSON data
response = requests.post( response = requests.post(
@ -897,6 +919,7 @@ class FirecrawlApp:
json_data = {'urls': urls} json_data = {'urls': urls}
if params: if params:
json_data.update(params) json_data.update(params)
json_data['origin'] = f"python-sdk@{version}"
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers) response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
if response.status_code == 200: if response.status_code == 200:
try: try:
@ -953,6 +976,7 @@ class FirecrawlApp:
json_data = {'urls': urls} json_data = {'urls': urls}
if params: if params:
json_data.update(params) json_data.update(params)
json_data['origin'] = f"python-sdk@{version}"
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers) response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
if response.status_code == 200: if response.status_code == 200:
try: try:
@ -1153,7 +1177,7 @@ class FirecrawlApp:
'enableWebSearch': params.get('enable_web_search', params.get('enableWebSearch', False)), 'enableWebSearch': params.get('enable_web_search', params.get('enableWebSearch', False)),
'showSources': params.get('show_sources', params.get('showSources', False)), 'showSources': params.get('show_sources', params.get('showSources', False)),
'schema': schema, 'schema': schema,
'origin': 'api-sdk' 'origin': f'python-sdk@{get_version()}'
} }
# Only add prompt and systemPrompt if they exist # Only add prompt and systemPrompt if they exist
@ -1284,7 +1308,7 @@ class FirecrawlApp:
**jsonData, **jsonData,
'allowExternalLinks': params.get('allow_external_links', False) if params else False, 'allowExternalLinks': params.get('allow_external_links', False) if params else False,
'schema': schema, 'schema': schema,
'origin': 'api-sdk' 'origin': f'python-sdk@{version}'
} }
try: try:
@ -1387,6 +1411,7 @@ class FirecrawlApp:
headers = self._prepare_headers() headers = self._prepare_headers()
json_data = {'url': url, **generation_params.dict(exclude_none=True)} json_data = {'url': url, **generation_params.dict(exclude_none=True)}
json_data['origin'] = f"python-sdk@{version}"
try: try:
response = self._post_request(f'{self.api_url}/v1/llmstxt', json_data, headers) response = self._post_request(f'{self.api_url}/v1/llmstxt', json_data, headers)
@ -1770,6 +1795,7 @@ class FirecrawlApp:
headers = self._prepare_headers() headers = self._prepare_headers()
json_data = {'query': query, **research_params.dict(exclude_none=True)} json_data = {'query': query, **research_params.dict(exclude_none=True)}
json_data['origin'] = f"python-sdk@{version}"
try: try:
response = self._post_request(f'{self.api_url}/v1/deep-research', json_data, headers) response = self._post_request(f'{self.api_url}/v1/deep-research', json_data, headers)
@ -2178,7 +2204,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
Exception: If scraping fails Exception: If scraping fails
""" """
headers = self._prepare_headers() headers = self._prepare_headers()
scrape_params = {'url': url} scrape_params = {'url': url, 'origin': f'python-sdk@{version}'}
if params: if params:
extract = params.get('extract', {}) extract = params.get('extract', {})
@ -2245,6 +2271,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
json_data = {'urls': urls} json_data = {'urls': urls}
if params: if params:
json_data.update(params) json_data.update(params)
json_data['origin'] = f"python-sdk@{version}"
endpoint = f'/v1/batch/scrape' endpoint = f'/v1/batch/scrape'
response = await self._async_post_request( response = await self._async_post_request(
@ -2301,6 +2328,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
json_data = {'urls': urls} json_data = {'urls': urls}
if params: if params:
json_data.update(params) json_data.update(params)
json_data['origin'] = f"python-sdk@{version}"
endpoint = f'/v1/batch/scrape' endpoint = f'/v1/batch/scrape'
return await self._async_post_request( return await self._async_post_request(
@ -2355,6 +2383,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
json_data = {'url': url} json_data = {'url': url}
if params: if params:
json_data.update(params) json_data.update(params)
json_data['origin'] = f"python-sdk@{version}"
endpoint = f'/v1/crawl' endpoint = f'/v1/crawl'
response = await self._async_post_request( response = await self._async_post_request(
@ -2413,6 +2442,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
json_data = {'url': url} json_data = {'url': url}
if params: if params:
json_data.update(params) json_data.update(params)
json_data['origin'] = f"python-sdk@{version}"
endpoint = f'/v1/crawl' endpoint = f'/v1/crawl'
return await self._async_post_request( return await self._async_post_request(
@ -2564,6 +2594,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
json_data = {'url': url} json_data = {'url': url}
if params: if params:
json_data.update(params) json_data.update(params)
json_data['origin'] = f"python-sdk@{version}"
endpoint = f'/v1/map' endpoint = f'/v1/map'
response = await self._async_post_request( response = await self._async_post_request(
@ -2628,7 +2659,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
'enableWebSearch': params.get('enable_web_search', params.get('enableWebSearch', False)), 'enableWebSearch': params.get('enable_web_search', params.get('enableWebSearch', False)),
'showSources': params.get('show_sources', params.get('showSources', False)), 'showSources': params.get('show_sources', params.get('showSources', False)),
'schema': schema, 'schema': schema,
'origin': 'api-sdk' 'origin': f'python-sdk@{version}'
} }
if params.get('prompt'): if params.get('prompt'):
@ -2876,7 +2907,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
**jsonData, **jsonData,
'allowExternalLinks': params.get('allow_external_links', False) if params else False, 'allowExternalLinks': params.get('allow_external_links', False) if params else False,
'schema': schema, 'schema': schema,
'origin': 'api-sdk' 'origin': f'python-sdk@{version}'
} }
try: try:
@ -2975,6 +3006,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
headers = self._prepare_headers() headers = self._prepare_headers()
json_data = {'url': url, **generation_params.dict(exclude_none=True)} json_data = {'url': url, **generation_params.dict(exclude_none=True)}
json_data['origin'] = f"python-sdk@{version}"
try: try:
return await self._async_post_request( return await self._async_post_request(
@ -3132,7 +3164,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
headers = self._prepare_headers() headers = self._prepare_headers()
json_data = {'query': query, **research_params.dict(exclude_none=True)} json_data = {'query': query, **research_params.dict(exclude_none=True)}
json_data['origin'] = f"python-sdk@{version}"
try: try:
return await self._async_post_request( return await self._async_post_request(
f'{self.api_url}/v1/deep-research', f'{self.api_url}/v1/deep-research',
@ -3217,9 +3249,12 @@ class AsyncFirecrawlApp(FirecrawlApp):
search_params = params search_params = params
search_params.query = query search_params.query = query
search_params_dict = search_params.dict(exclude_none=True)
search_params_dict['origin'] = f"python-sdk@{version}"
return await self._async_post_request( return await self._async_post_request(
f"{self.api_url}/v1/search", f"{self.api_url}/v1/search",
search_params.dict(exclude_none=True), search_params_dict,
{"Authorization": f"Bearer {self.api_key}"} {"Authorization": f"Bearer {self.api_key}"}
) )