mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-04-22 14:09:45 +08:00
Add change tracking support to Python and JS SDKs (#1448)
* Add change tracking support to Python and JS SDKs Co-Authored-By: Nicolas Camara <nick@sideguide.dev> * Replace test API keys with TEST_API_KEY placeholder Co-Authored-By: Nicolas Camara <nick@sideguide.dev> * Replace API keys with dummy values for testing Co-Authored-By: Nicolas Camara <nick@sideguide.dev> * Use environment variables for API keys in tests Co-Authored-By: Nicolas Camara <nick@sideguide.dev> * Move JS SDK test to correct location and add dependencies Co-Authored-By: Nicolas Camara <nick@sideguide.dev> * Remove old test file location Co-Authored-By: Nicolas Camara <nick@sideguide.dev> * Update test file to use TEST_API_KEY environment variable Co-Authored-By: Nicolas Camara <nick@sideguide.dev> * Update Python SDK test to use TEST_API_KEY environment variable Co-Authored-By: Nicolas Camara <nick@sideguide.dev> * Update package.json * Update __init__.py --------- Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: Nicolas Camara <nick@sideguide.dev> Co-authored-by: Nicolas <nicolascamara29@gmail.com>
This commit is contained in:
parent
138a9757ae
commit
ef341399f0
37
apps/js-sdk/firecrawl/package-lock.json
generated
37
apps/js-sdk/firecrawl/package-lock.json
generated
@ -1,15 +1,14 @@
|
||||
{
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "1.22.0",
|
||||
"version": "1.22.1",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "1.22.0",
|
||||
"version": "1.22.1",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"axios": "^1.6.8",
|
||||
"typescript-event-target": "^1.1.1",
|
||||
"zod": "^3.23.8",
|
||||
"zod-to-json-schema": "^3.23.0"
|
||||
@ -18,10 +17,11 @@
|
||||
"@jest/globals": "^29.7.0",
|
||||
"@types/axios": "^0.14.0",
|
||||
"@types/dotenv": "^8.2.0",
|
||||
"@types/jest": "^29.5.12",
|
||||
"@types/jest": "^29.5.14",
|
||||
"@types/mocha": "^10.0.6",
|
||||
"@types/node": "^20.12.12",
|
||||
"@types/node": "^20.17.30",
|
||||
"@types/uuid": "^9.0.8",
|
||||
"axios": "^1.8.4",
|
||||
"dotenv": "^16.4.5",
|
||||
"jest": "^29.7.0",
|
||||
"ts-jest": "^29.2.2",
|
||||
@ -1812,10 +1812,11 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@types/jest": {
|
||||
"version": "29.5.12",
|
||||
"resolved": "https://registry.npmjs.org/@types/jest/-/jest-29.5.12.tgz",
|
||||
"integrity": "sha512-eDC8bTvT/QhYdxJAulQikueigY5AsdBRH2yDKW3yveW7svY3+DzN84/2NUgkw10RTiJbWqZrTtoGVdYlvFJdLw==",
|
||||
"version": "29.5.14",
|
||||
"resolved": "https://registry.npmjs.org/@types/jest/-/jest-29.5.14.tgz",
|
||||
"integrity": "sha512-ZN+4sdnLUbo8EVvVc2ao0GFW6oVrQRPn4K2lglySj7APvSrgzxHiNNK99us4WDMi57xxA2yggblIAMNhXOotLQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"expect": "^29.0.0",
|
||||
"pretty-format": "^29.0.0"
|
||||
@ -1949,12 +1950,15 @@
|
||||
"node_modules/asynckit": {
|
||||
"version": "0.4.0",
|
||||
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
|
||||
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
|
||||
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/axios": {
|
||||
"version": "1.6.8",
|
||||
"resolved": "https://registry.npmjs.org/axios/-/axios-1.6.8.tgz",
|
||||
"integrity": "sha512-v/ZHtJDU39mDpyBoFVkETcd/uNdxrWRrg3bKpOKzXFA6Bvqopts6ALSMU3y6ijYxbw2B+wPrIv46egTzJXCLGQ==",
|
||||
"version": "1.8.4",
|
||||
"resolved": "https://registry.npmjs.org/axios/-/axios-1.8.4.tgz",
|
||||
"integrity": "sha512-eBSYY4Y68NNlHbHBMdeDmKNtDgXWhQsJcGqzO3iLUM0GraQFSS9cVgPX5I9b3lbdFKyYoAEGAZF1DwhTaljNAw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"follow-redirects": "^1.15.6",
|
||||
"form-data": "^4.0.0",
|
||||
@ -2351,6 +2355,7 @@
|
||||
"version": "1.0.8",
|
||||
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
|
||||
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"delayed-stream": "~1.0.0"
|
||||
},
|
||||
@ -2467,6 +2472,7 @@
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
|
||||
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
|
||||
"dev": true,
|
||||
"engines": {
|
||||
"node": ">=0.4.0"
|
||||
}
|
||||
@ -2784,6 +2790,7 @@
|
||||
"version": "1.15.6",
|
||||
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz",
|
||||
"integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==",
|
||||
"dev": true,
|
||||
"funding": [
|
||||
{
|
||||
"type": "individual",
|
||||
@ -2831,6 +2838,7 @@
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
|
||||
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"asynckit": "^0.4.0",
|
||||
"combined-stream": "^1.0.8",
|
||||
@ -4111,6 +4119,7 @@
|
||||
"version": "1.52.0",
|
||||
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
|
||||
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
|
||||
"dev": true,
|
||||
"engines": {
|
||||
"node": ">= 0.6"
|
||||
}
|
||||
@ -4119,6 +4128,7 @@
|
||||
"version": "2.1.35",
|
||||
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
|
||||
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"mime-db": "1.52.0"
|
||||
},
|
||||
@ -4507,7 +4517,8 @@
|
||||
"node_modules/proxy-from-env": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
|
||||
"integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="
|
||||
"integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/punycode": {
|
||||
"version": "2.3.1",
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "1.22.1",
|
||||
"version": "1.23.0",
|
||||
"description": "JavaScript SDK for Firecrawl API",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
@ -25,7 +25,6 @@
|
||||
"author": "Mendable.ai",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"axios": "^1.6.8",
|
||||
"typescript-event-target": "^1.1.1",
|
||||
"zod": "^3.23.8",
|
||||
"zod-to-json-schema": "^3.23.0"
|
||||
@ -38,10 +37,11 @@
|
||||
"@jest/globals": "^29.7.0",
|
||||
"@types/axios": "^0.14.0",
|
||||
"@types/dotenv": "^8.2.0",
|
||||
"@types/jest": "^29.5.12",
|
||||
"@types/jest": "^29.5.14",
|
||||
"@types/mocha": "^10.0.6",
|
||||
"@types/node": "^20.12.12",
|
||||
"@types/node": "^20.17.30",
|
||||
"@types/uuid": "^9.0.8",
|
||||
"axios": "^1.8.4",
|
||||
"dotenv": "^16.4.5",
|
||||
"jest": "^29.7.0",
|
||||
"ts-jest": "^29.2.2",
|
||||
|
@ -0,0 +1,105 @@
|
||||
import axios from 'axios';
|
||||
import FirecrawlApp from '../../../../src/index';
|
||||
|
||||
jest.mock('axios');
|
||||
const mockedAxios = axios as jest.Mocked<typeof axios>;
|
||||
|
||||
describe('Change Tracking Tests', () => {
|
||||
beforeEach(() => {
|
||||
jest.resetAllMocks();
|
||||
});
|
||||
|
||||
it('should support basic change tracking format', async () => {
|
||||
mockedAxios.post.mockResolvedValueOnce({
|
||||
status: 200,
|
||||
data: {
|
||||
success: true,
|
||||
data: {
|
||||
markdown: 'Test markdown content',
|
||||
changeTracking: {
|
||||
previousScrapeAt: '2023-01-01T00:00:00Z',
|
||||
changeStatus: 'changed',
|
||||
visibility: 'visible'
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
const app = new FirecrawlApp({ apiKey: process.env.TEST_API_KEY || 'dummy-api-key-for-testing' });
|
||||
const result = await app.scrapeUrl('https://example.com', {
|
||||
formats: ['markdown', 'changeTracking']
|
||||
});
|
||||
|
||||
expect(mockedAxios.post).toHaveBeenCalledTimes(1);
|
||||
expect(mockedAxios.post.mock.calls[0][1].formats).toContain('changeTracking');
|
||||
|
||||
expect(result).toHaveProperty('changeTracking');
|
||||
expect(result.changeTracking?.previousScrapeAt).toBe('2023-01-01T00:00:00Z');
|
||||
expect(result.changeTracking?.changeStatus).toBe('changed');
|
||||
expect(result.changeTracking?.visibility).toBe('visible');
|
||||
});
|
||||
|
||||
it('should support change tracking options with git-diff and json modes', async () => {
|
||||
mockedAxios.post.mockResolvedValueOnce({
|
||||
status: 200,
|
||||
data: {
|
||||
success: true,
|
||||
data: {
|
||||
markdown: 'Test markdown content',
|
||||
changeTracking: {
|
||||
previousScrapeAt: '2023-01-01T00:00:00Z',
|
||||
changeStatus: 'changed',
|
||||
visibility: 'visible',
|
||||
diff: {
|
||||
text: '@@ -1,1 +1,1 @@\n-old content\n+new content',
|
||||
json: {
|
||||
files: [{
|
||||
from: null,
|
||||
to: null,
|
||||
chunks: [{
|
||||
content: '@@ -1,1 +1,1 @@',
|
||||
changes: [{
|
||||
type: 'del',
|
||||
content: '-old content',
|
||||
del: true,
|
||||
ln: 1
|
||||
}, {
|
||||
type: 'add',
|
||||
content: '+new content',
|
||||
add: true,
|
||||
ln: 1
|
||||
}]
|
||||
}]
|
||||
}]
|
||||
}
|
||||
},
|
||||
json: {
|
||||
title: {
|
||||
previous: 'Old Title',
|
||||
current: 'New Title'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
const app = new FirecrawlApp({ apiKey: process.env.TEST_API_KEY || 'dummy-api-key-for-testing' });
|
||||
const result = await app.scrapeUrl('https://example.com', {
|
||||
formats: ['markdown', 'changeTracking'],
|
||||
changeTrackingOptions: {
|
||||
modes: ['git-diff', 'json'],
|
||||
schema: { type: 'object', properties: { title: { type: 'string' } } }
|
||||
}
|
||||
});
|
||||
|
||||
expect(mockedAxios.post).toHaveBeenCalledTimes(1);
|
||||
expect(mockedAxios.post.mock.calls[0][1].formats).toContain('changeTracking');
|
||||
expect(mockedAxios.post.mock.calls[0][1].changeTrackingOptions.modes).toEqual(['git-diff', 'json']);
|
||||
|
||||
expect(result).toHaveProperty('changeTracking');
|
||||
expect(result.changeTracking?.diff?.text).toBe('@@ -1,1 +1,1 @@\n-old content\n+new content');
|
||||
expect(result.changeTracking?.json?.title.previous).toBe('Old Title');
|
||||
expect(result.changeTracking?.json?.title.current).toBe('New Title');
|
||||
});
|
||||
});
|
@ -74,7 +74,7 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
|
||||
visibility: "visible" | "hidden";
|
||||
diff?: {
|
||||
text: string;
|
||||
structured: {
|
||||
json: {
|
||||
files: Array<{
|
||||
from: string | null;
|
||||
to: string | null;
|
||||
@ -92,6 +92,7 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
|
||||
}>;
|
||||
};
|
||||
};
|
||||
json?: any;
|
||||
};
|
||||
// v1 search only
|
||||
title?: string;
|
||||
@ -160,6 +161,11 @@ export interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchem
|
||||
schema?: LLMSchema;
|
||||
systemPrompt?: string;
|
||||
}
|
||||
changeTrackingOptions?: {
|
||||
prompt?: string;
|
||||
schema?: any;
|
||||
modes?: ("json" | "git-diff")[];
|
||||
}
|
||||
actions?: ActionsSchema;
|
||||
}
|
||||
|
||||
|
@ -13,7 +13,7 @@ import os
|
||||
|
||||
from .firecrawl import FirecrawlApp # noqa
|
||||
|
||||
__version__ = "1.15.0"
|
||||
__version__ = "1.16.0"
|
||||
|
||||
# Define the logger for the Firecrawl project
|
||||
logger: logging.Logger = logging.getLogger("firecrawl")
|
||||
|
@ -75,6 +75,16 @@ class DeepResearchStatusResponse(pydantic.BaseModel):
|
||||
sources: List[Dict[str, Any]]
|
||||
summaries: List[str]
|
||||
|
||||
class ChangeTrackingData(pydantic.BaseModel):
|
||||
"""
|
||||
Data for the change tracking format.
|
||||
"""
|
||||
previousScrapeAt: Optional[str] = None
|
||||
changeStatus: str # "new" | "same" | "changed" | "removed"
|
||||
visibility: str # "visible" | "hidden"
|
||||
diff: Optional[Dict[str, Any]] = None
|
||||
json: Optional[Any] = None
|
||||
|
||||
class FirecrawlApp:
|
||||
class SearchResponse(pydantic.BaseModel):
|
||||
"""
|
||||
@ -167,9 +177,13 @@ class FirecrawlApp:
|
||||
json['schema'] = json['schema'].schema()
|
||||
scrape_params['jsonOptions'] = json
|
||||
|
||||
change_tracking = params.get("changeTrackingOptions", {})
|
||||
if change_tracking:
|
||||
scrape_params['changeTrackingOptions'] = change_tracking
|
||||
|
||||
# Include any other params directly at the top level of scrape_params
|
||||
for key, value in params.items():
|
||||
if key not in ['jsonOptions']:
|
||||
if key not in ['jsonOptions', 'changeTrackingOptions']:
|
||||
scrape_params[key] = value
|
||||
|
||||
|
||||
|
98
apps/python-sdk/tests/test_change_tracking.py
Normal file
98
apps/python-sdk/tests/test_change_tracking.py
Normal file
@ -0,0 +1,98 @@
|
||||
import unittest
|
||||
from unittest.mock import patch, MagicMock
|
||||
import json
|
||||
import os
|
||||
from firecrawl import FirecrawlApp
|
||||
|
||||
class TestChangeTracking(unittest.TestCase):
|
||||
@patch('requests.post')
|
||||
def test_change_tracking_format(self, mock_post):
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {
|
||||
'success': True,
|
||||
'data': {
|
||||
'markdown': 'Test markdown content',
|
||||
'changeTracking': {
|
||||
'previousScrapeAt': '2023-01-01T00:00:00Z',
|
||||
'changeStatus': 'changed',
|
||||
'visibility': 'visible'
|
||||
}
|
||||
}
|
||||
}
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
|
||||
result = app.scrape_url('https://example.com', {
|
||||
'formats': ['markdown', 'changeTracking']
|
||||
})
|
||||
|
||||
args, kwargs = mock_post.call_args
|
||||
self.assertEqual(kwargs['json']['formats'], ['markdown', 'changeTracking'])
|
||||
|
||||
self.assertEqual(result['changeTracking']['previousScrapeAt'], '2023-01-01T00:00:00Z')
|
||||
self.assertEqual(result['changeTracking']['changeStatus'], 'changed')
|
||||
self.assertEqual(result['changeTracking']['visibility'], 'visible')
|
||||
|
||||
@patch('requests.post')
|
||||
def test_change_tracking_options(self, mock_post):
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {
|
||||
'success': True,
|
||||
'data': {
|
||||
'markdown': 'Test markdown content',
|
||||
'changeTracking': {
|
||||
'previousScrapeAt': '2023-01-01T00:00:00Z',
|
||||
'changeStatus': 'changed',
|
||||
'visibility': 'visible',
|
||||
'diff': {
|
||||
'text': '@@ -1,1 +1,1 @@\n-old content\n+new content',
|
||||
'json': {
|
||||
'files': [{
|
||||
'from': None,
|
||||
'to': None,
|
||||
'chunks': [{
|
||||
'content': '@@ -1,1 +1,1 @@',
|
||||
'changes': [{
|
||||
'type': 'del',
|
||||
'content': '-old content',
|
||||
'del': True,
|
||||
'ln': 1
|
||||
}, {
|
||||
'type': 'add',
|
||||
'content': '+new content',
|
||||
'add': True,
|
||||
'ln': 1
|
||||
}]
|
||||
}]
|
||||
}]
|
||||
}
|
||||
},
|
||||
'json': {
|
||||
'title': {
|
||||
'previous': 'Old Title',
|
||||
'current': 'New Title'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
|
||||
result = app.scrape_url('https://example.com', {
|
||||
'formats': ['markdown', 'changeTracking'],
|
||||
'changeTrackingOptions': {
|
||||
'modes': ['git-diff', 'json'],
|
||||
'schema': {'type': 'object', 'properties': {'title': {'type': 'string'}}}
|
||||
}
|
||||
})
|
||||
|
||||
args, kwargs = mock_post.call_args
|
||||
self.assertEqual(kwargs['json']['formats'], ['markdown', 'changeTracking'])
|
||||
self.assertEqual(kwargs['json']['changeTrackingOptions']['modes'], ['git-diff', 'json'])
|
||||
|
||||
self.assertEqual(result['changeTracking']['diff']['text'], '@@ -1,1 +1,1 @@\n-old content\n+new content')
|
||||
self.assertEqual(result['changeTracking']['json']['title']['previous'], 'Old Title')
|
||||
self.assertEqual(result['changeTracking']['json']['title']['current'], 'New Title')
|
Loading…
x
Reference in New Issue
Block a user