mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 20:08:59 +08:00
added origin to requests
This commit is contained in:
parent
cc255d488e
commit
4f984d3fde
@ -60,7 +60,9 @@ export async function extractController(
|
||||
if (
|
||||
(await getTeamIdSyncB(req.auth.team_id)) &&
|
||||
req.body.origin !== "api-sdk" &&
|
||||
req.body.origin !== "website"
|
||||
req.body.origin !== "website" &&
|
||||
!req.body.origin.startsWith("python-sdk@") &&
|
||||
!req.body.origin.startsWith("js-sdk@")
|
||||
) {
|
||||
return await oldExtract(req, res, extractId);
|
||||
}
|
||||
|
@ -474,11 +474,26 @@ export interface GenerateLLMsTextStatusResponse {
|
||||
export default class FirecrawlApp {
|
||||
public apiKey: string;
|
||||
public apiUrl: string;
|
||||
public version: string = "1.19.1";
|
||||
|
||||
private isCloudService(url: string): boolean {
|
||||
return url.includes('api.firecrawl.dev');
|
||||
}
|
||||
|
||||
private async getVersion(): Promise<string> {
|
||||
try {
|
||||
const packageJson = await import('../package.json', { assert: { type: 'json' } });
|
||||
return packageJson.default.version;
|
||||
} catch (error) {
|
||||
console.error("Error getting version:", error);
|
||||
return "1.19.1";
|
||||
}
|
||||
}
|
||||
|
||||
private async init() {
|
||||
this.version = await this.getVersion();
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes a new instance of the FirecrawlApp class.
|
||||
* @param config - Configuration options for the FirecrawlApp instance.
|
||||
@ -492,6 +507,7 @@ export default class FirecrawlApp {
|
||||
|
||||
this.apiKey = apiKey || '';
|
||||
this.apiUrl = baseUrl;
|
||||
this.init();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -508,7 +524,7 @@ export default class FirecrawlApp {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
} as AxiosRequestHeaders;
|
||||
let jsonData: any = { url, ...params };
|
||||
let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
|
||||
if (jsonData?.extract?.schema) {
|
||||
let schema = jsonData.extract.schema;
|
||||
|
||||
@ -590,7 +606,7 @@ export default class FirecrawlApp {
|
||||
lang: params?.lang ?? "en",
|
||||
country: params?.country ?? "us",
|
||||
location: params?.location,
|
||||
origin: params?.origin ?? "api",
|
||||
origin: `js-sdk@${this.version}`,
|
||||
timeout: params?.timeout ?? 60000,
|
||||
scrapeOptions: params?.scrapeOptions ?? { formats: [] },
|
||||
};
|
||||
@ -662,7 +678,7 @@ export default class FirecrawlApp {
|
||||
idempotencyKey?: string
|
||||
): Promise<CrawlStatusResponse | ErrorResponse> {
|
||||
const headers = this.prepareHeaders(idempotencyKey);
|
||||
let jsonData: any = { url, ...params };
|
||||
let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
|
||||
try {
|
||||
const response: AxiosResponse = await this.postRequest(
|
||||
this.apiUrl + `/v1/crawl`,
|
||||
@ -691,7 +707,7 @@ export default class FirecrawlApp {
|
||||
idempotencyKey?: string
|
||||
): Promise<CrawlResponse | ErrorResponse> {
|
||||
const headers = this.prepareHeaders(idempotencyKey);
|
||||
let jsonData: any = { url, ...params };
|
||||
let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
|
||||
try {
|
||||
const response: AxiosResponse = await this.postRequest(
|
||||
this.apiUrl + `/v1/crawl`,
|
||||
@ -867,7 +883,7 @@ export default class FirecrawlApp {
|
||||
*/
|
||||
async mapUrl(url: string, params?: MapParams): Promise<MapResponse | ErrorResponse> {
|
||||
const headers = this.prepareHeaders();
|
||||
let jsonData: { url: string } & MapParams = { url, ...params };
|
||||
let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
|
||||
|
||||
try {
|
||||
const response: AxiosResponse = await this.postRequest(
|
||||
@ -904,7 +920,7 @@ export default class FirecrawlApp {
|
||||
ignoreInvalidURLs?: boolean,
|
||||
): Promise<BatchScrapeStatusResponse | ErrorResponse> {
|
||||
const headers = this.prepareHeaders(idempotencyKey);
|
||||
let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params };
|
||||
let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
||||
if (jsonData?.extract?.schema) {
|
||||
let schema = jsonData.extract.schema;
|
||||
|
||||
@ -969,7 +985,7 @@ export default class FirecrawlApp {
|
||||
ignoreInvalidURLs?: boolean,
|
||||
): Promise<BatchScrapeResponse | ErrorResponse> {
|
||||
const headers = this.prepareHeaders(idempotencyKey);
|
||||
let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...(params ?? {}) };
|
||||
let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
||||
try {
|
||||
const response: AxiosResponse = await this.postRequest(
|
||||
this.apiUrl + `/v1/batch/scrape`,
|
||||
@ -1143,7 +1159,7 @@ export default class FirecrawlApp {
|
||||
try {
|
||||
const response: AxiosResponse = await this.postRequest(
|
||||
this.apiUrl + `/v1/extract`,
|
||||
{ ...jsonData, schema: jsonSchema, origin: params?.origin || "api-sdk" },
|
||||
{ ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
|
||||
headers
|
||||
);
|
||||
|
||||
@ -1211,7 +1227,7 @@ export default class FirecrawlApp {
|
||||
try {
|
||||
const response: AxiosResponse = await this.postRequest(
|
||||
this.apiUrl + `/v1/extract`,
|
||||
{ ...jsonData, schema: jsonSchema },
|
||||
{ ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
|
||||
headers
|
||||
);
|
||||
|
||||
@ -1497,10 +1513,11 @@ export default class FirecrawlApp {
|
||||
*/
|
||||
async asyncDeepResearch(query: string, params: DeepResearchParams): Promise<DeepResearchResponse | ErrorResponse> {
|
||||
const headers = this.prepareHeaders();
|
||||
let jsonData: any = { query, ...params, origin: `js-sdk@${this.version}` };
|
||||
try {
|
||||
const response: AxiosResponse = await this.postRequest(
|
||||
`${this.apiUrl}/v1/deep-research`,
|
||||
{ query, ...params },
|
||||
this.apiUrl + `/v1/deep-research`,
|
||||
jsonData,
|
||||
headers
|
||||
);
|
||||
|
||||
@ -1632,9 +1649,10 @@ export default class FirecrawlApp {
|
||||
async __asyncDeepResearch(topic: string, params: DeepResearchParams): Promise<DeepResearchResponse | ErrorResponse> {
|
||||
const headers = this.prepareHeaders();
|
||||
try {
|
||||
let jsonData: any = { topic, ...params, origin: `js-sdk@${this.version}` };
|
||||
const response: AxiosResponse = await this.postRequest(
|
||||
`${this.apiUrl}/v1/deep-research`,
|
||||
{ topic, ...params },
|
||||
jsonData,
|
||||
headers
|
||||
);
|
||||
|
||||
@ -1744,10 +1762,11 @@ export default class FirecrawlApp {
|
||||
*/
|
||||
async asyncGenerateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextResponse | ErrorResponse> {
|
||||
const headers = this.prepareHeaders();
|
||||
let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
|
||||
try {
|
||||
const response: AxiosResponse = await this.postRequest(
|
||||
`${this.apiUrl}/v1/llmstxt`,
|
||||
{ url, ...params },
|
||||
jsonData,
|
||||
headers
|
||||
);
|
||||
|
||||
|
@ -15,6 +15,7 @@ import time
|
||||
from typing import Any, Dict, Optional, List, Union, Callable, Literal, TypeVar, Generic
|
||||
import json
|
||||
from datetime import datetime
|
||||
import re
|
||||
|
||||
import requests
|
||||
import pydantic
|
||||
@ -22,6 +23,20 @@ import websockets
|
||||
import aiohttp
|
||||
import asyncio
|
||||
|
||||
def get_version():
|
||||
try:
|
||||
from pathlib import Path
|
||||
package_path = os.path.dirname(__file__)
|
||||
version_file = Path(os.path.join(package_path, '__init__.py')).read_text()
|
||||
version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", version_file, re.M)
|
||||
if version_match:
|
||||
return version_match.group(1).strip()
|
||||
except Exception:
|
||||
print("Failed to get version from __init__.py")
|
||||
return None
|
||||
|
||||
version = get_version()
|
||||
|
||||
logger : logging.Logger = logging.getLogger("firecrawl")
|
||||
|
||||
T = TypeVar('T')
|
||||
@ -424,6 +439,7 @@ class FirecrawlApp:
|
||||
if key not in ['jsonOptions']:
|
||||
scrape_params[key] = value
|
||||
|
||||
scrape_params['origin'] = f"python-sdk@{version}"
|
||||
|
||||
endpoint = f'/v1/scrape'
|
||||
# Make the POST request with the prepared headers and JSON data
|
||||
@ -489,10 +505,13 @@ class FirecrawlApp:
|
||||
search_params = params
|
||||
search_params.query = query
|
||||
|
||||
params_dict = search_params.dict(exclude_none=True)
|
||||
params_dict['origin'] = f"python-sdk@{version}"
|
||||
|
||||
response = requests.post(
|
||||
f"{self.api_url}/v1/search",
|
||||
headers={"Authorization": f"Bearer {self.api_key}"},
|
||||
json=search_params.dict(exclude_none=True)
|
||||
json=params_dict
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
@ -548,6 +567,7 @@ class FirecrawlApp:
|
||||
json_data = {'url': url}
|
||||
if params:
|
||||
json_data.update(params)
|
||||
json_data['origin'] = f"python-sdk@{version}"
|
||||
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
|
||||
if response.status_code == 200:
|
||||
try:
|
||||
@ -609,6 +629,7 @@ class FirecrawlApp:
|
||||
json_data = {'url': url}
|
||||
if params:
|
||||
json_data.update(params)
|
||||
json_data['origin'] = f"python-sdk@{version}"
|
||||
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
|
||||
if response.status_code == 200:
|
||||
try:
|
||||
@ -835,6 +856,7 @@ class FirecrawlApp:
|
||||
json_data = {'url': url}
|
||||
if params:
|
||||
json_data.update(params)
|
||||
json_data['origin'] = f"python-sdk@{version}"
|
||||
|
||||
# Make the POST request with the prepared headers and JSON data
|
||||
response = requests.post(
|
||||
@ -897,6 +919,7 @@ class FirecrawlApp:
|
||||
json_data = {'urls': urls}
|
||||
if params:
|
||||
json_data.update(params)
|
||||
json_data['origin'] = f"python-sdk@{version}"
|
||||
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
|
||||
if response.status_code == 200:
|
||||
try:
|
||||
@ -953,6 +976,7 @@ class FirecrawlApp:
|
||||
json_data = {'urls': urls}
|
||||
if params:
|
||||
json_data.update(params)
|
||||
json_data['origin'] = f"python-sdk@{version}"
|
||||
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
|
||||
if response.status_code == 200:
|
||||
try:
|
||||
@ -1153,7 +1177,7 @@ class FirecrawlApp:
|
||||
'enableWebSearch': params.get('enable_web_search', params.get('enableWebSearch', False)),
|
||||
'showSources': params.get('show_sources', params.get('showSources', False)),
|
||||
'schema': schema,
|
||||
'origin': 'api-sdk'
|
||||
'origin': f'python-sdk@{get_version()}'
|
||||
}
|
||||
|
||||
# Only add prompt and systemPrompt if they exist
|
||||
@ -1284,7 +1308,7 @@ class FirecrawlApp:
|
||||
**jsonData,
|
||||
'allowExternalLinks': params.get('allow_external_links', False) if params else False,
|
||||
'schema': schema,
|
||||
'origin': 'api-sdk'
|
||||
'origin': f'python-sdk@{version}'
|
||||
}
|
||||
|
||||
try:
|
||||
@ -1387,6 +1411,7 @@ class FirecrawlApp:
|
||||
|
||||
headers = self._prepare_headers()
|
||||
json_data = {'url': url, **generation_params.dict(exclude_none=True)}
|
||||
json_data['origin'] = f"python-sdk@{version}"
|
||||
|
||||
try:
|
||||
response = self._post_request(f'{self.api_url}/v1/llmstxt', json_data, headers)
|
||||
@ -1770,6 +1795,7 @@ class FirecrawlApp:
|
||||
|
||||
headers = self._prepare_headers()
|
||||
json_data = {'query': query, **research_params.dict(exclude_none=True)}
|
||||
json_data['origin'] = f"python-sdk@{version}"
|
||||
|
||||
try:
|
||||
response = self._post_request(f'{self.api_url}/v1/deep-research', json_data, headers)
|
||||
@ -2178,7 +2204,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
||||
Exception: If scraping fails
|
||||
"""
|
||||
headers = self._prepare_headers()
|
||||
scrape_params = {'url': url}
|
||||
scrape_params = {'url': url, 'origin': f'python-sdk@{version}'}
|
||||
|
||||
if params:
|
||||
extract = params.get('extract', {})
|
||||
@ -2245,6 +2271,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
||||
json_data = {'urls': urls}
|
||||
if params:
|
||||
json_data.update(params)
|
||||
json_data['origin'] = f"python-sdk@{version}"
|
||||
|
||||
endpoint = f'/v1/batch/scrape'
|
||||
response = await self._async_post_request(
|
||||
@ -2301,6 +2328,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
||||
json_data = {'urls': urls}
|
||||
if params:
|
||||
json_data.update(params)
|
||||
json_data['origin'] = f"python-sdk@{version}"
|
||||
|
||||
endpoint = f'/v1/batch/scrape'
|
||||
return await self._async_post_request(
|
||||
@ -2355,6 +2383,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
||||
json_data = {'url': url}
|
||||
if params:
|
||||
json_data.update(params)
|
||||
json_data['origin'] = f"python-sdk@{version}"
|
||||
|
||||
endpoint = f'/v1/crawl'
|
||||
response = await self._async_post_request(
|
||||
@ -2413,6 +2442,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
||||
json_data = {'url': url}
|
||||
if params:
|
||||
json_data.update(params)
|
||||
json_data['origin'] = f"python-sdk@{version}"
|
||||
|
||||
endpoint = f'/v1/crawl'
|
||||
return await self._async_post_request(
|
||||
@ -2564,6 +2594,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
||||
json_data = {'url': url}
|
||||
if params:
|
||||
json_data.update(params)
|
||||
json_data['origin'] = f"python-sdk@{version}"
|
||||
|
||||
endpoint = f'/v1/map'
|
||||
response = await self._async_post_request(
|
||||
@ -2628,7 +2659,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
||||
'enableWebSearch': params.get('enable_web_search', params.get('enableWebSearch', False)),
|
||||
'showSources': params.get('show_sources', params.get('showSources', False)),
|
||||
'schema': schema,
|
||||
'origin': 'api-sdk'
|
||||
'origin': f'python-sdk@{version}'
|
||||
}
|
||||
|
||||
if params.get('prompt'):
|
||||
@ -2876,7 +2907,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
||||
**jsonData,
|
||||
'allowExternalLinks': params.get('allow_external_links', False) if params else False,
|
||||
'schema': schema,
|
||||
'origin': 'api-sdk'
|
||||
'origin': f'python-sdk@{version}'
|
||||
}
|
||||
|
||||
try:
|
||||
@ -2975,6 +3006,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
||||
|
||||
headers = self._prepare_headers()
|
||||
json_data = {'url': url, **generation_params.dict(exclude_none=True)}
|
||||
json_data['origin'] = f"python-sdk@{version}"
|
||||
|
||||
try:
|
||||
return await self._async_post_request(
|
||||
@ -3132,7 +3164,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
||||
|
||||
headers = self._prepare_headers()
|
||||
json_data = {'query': query, **research_params.dict(exclude_none=True)}
|
||||
|
||||
json_data['origin'] = f"python-sdk@{version}"
|
||||
try:
|
||||
return await self._async_post_request(
|
||||
f'{self.api_url}/v1/deep-research',
|
||||
@ -3217,9 +3249,12 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
||||
search_params = params
|
||||
search_params.query = query
|
||||
|
||||
search_params_dict = search_params.dict(exclude_none=True)
|
||||
search_params_dict['origin'] = f"python-sdk@{version}"
|
||||
|
||||
return await self._async_post_request(
|
||||
f"{self.api_url}/v1/search",
|
||||
search_params.dict(exclude_none=True),
|
||||
search_params_dict,
|
||||
{"Authorization": f"Bearer {self.api_key}"}
|
||||
)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user