Add change tracking support to Python and JS SDKs (#1448)

* Add change tracking support to Python and JS SDKs

Co-Authored-By: Nicolas Camara <nick@sideguide.dev>

* Replace test API keys with TEST_API_KEY placeholder

Co-Authored-By: Nicolas Camara <nick@sideguide.dev>

* Replace API keys with dummy values for testing

Co-Authored-By: Nicolas Camara <nick@sideguide.dev>

* Use environment variables for API keys in tests

Co-Authored-By: Nicolas Camara <nick@sideguide.dev>

* Move JS SDK test to correct location and add dependencies

Co-Authored-By: Nicolas Camara <nick@sideguide.dev>

* Remove old test file location

Co-Authored-By: Nicolas Camara <nick@sideguide.dev>

* Update test file to use TEST_API_KEY environment variable

Co-Authored-By: Nicolas Camara <nick@sideguide.dev>

* Update Python SDK test to use TEST_API_KEY environment variable

Co-Authored-By: Nicolas Camara <nick@sideguide.dev>

* Update package.json

* Update __init__.py

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: Nicolas Camara <nick@sideguide.dev>
Co-authored-by: Nicolas <nicolascamara29@gmail.com>
This commit is contained in:
devin-ai-integration[bot] 2025-04-12 16:47:24 -07:00 committed by GitHub
parent 138a9757ae
commit ef341399f0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 254 additions and 20 deletions

View File

@ -1,15 +1,14 @@
{ {
"name": "@mendable/firecrawl-js", "name": "@mendable/firecrawl-js",
"version": "1.22.0", "version": "1.22.1",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "@mendable/firecrawl-js", "name": "@mendable/firecrawl-js",
"version": "1.22.0", "version": "1.22.1",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"axios": "^1.6.8",
"typescript-event-target": "^1.1.1", "typescript-event-target": "^1.1.1",
"zod": "^3.23.8", "zod": "^3.23.8",
"zod-to-json-schema": "^3.23.0" "zod-to-json-schema": "^3.23.0"
@ -18,10 +17,11 @@
"@jest/globals": "^29.7.0", "@jest/globals": "^29.7.0",
"@types/axios": "^0.14.0", "@types/axios": "^0.14.0",
"@types/dotenv": "^8.2.0", "@types/dotenv": "^8.2.0",
"@types/jest": "^29.5.12", "@types/jest": "^29.5.14",
"@types/mocha": "^10.0.6", "@types/mocha": "^10.0.6",
"@types/node": "^20.12.12", "@types/node": "^20.17.30",
"@types/uuid": "^9.0.8", "@types/uuid": "^9.0.8",
"axios": "^1.8.4",
"dotenv": "^16.4.5", "dotenv": "^16.4.5",
"jest": "^29.7.0", "jest": "^29.7.0",
"ts-jest": "^29.2.2", "ts-jest": "^29.2.2",
@ -1812,10 +1812,11 @@
} }
}, },
"node_modules/@types/jest": { "node_modules/@types/jest": {
"version": "29.5.12", "version": "29.5.14",
"resolved": "https://registry.npmjs.org/@types/jest/-/jest-29.5.12.tgz", "resolved": "https://registry.npmjs.org/@types/jest/-/jest-29.5.14.tgz",
"integrity": "sha512-eDC8bTvT/QhYdxJAulQikueigY5AsdBRH2yDKW3yveW7svY3+DzN84/2NUgkw10RTiJbWqZrTtoGVdYlvFJdLw==", "integrity": "sha512-ZN+4sdnLUbo8EVvVc2ao0GFW6oVrQRPn4K2lglySj7APvSrgzxHiNNK99us4WDMi57xxA2yggblIAMNhXOotLQ==",
"dev": true, "dev": true,
"license": "MIT",
"dependencies": { "dependencies": {
"expect": "^29.0.0", "expect": "^29.0.0",
"pretty-format": "^29.0.0" "pretty-format": "^29.0.0"
@ -1949,12 +1950,15 @@
"node_modules/asynckit": { "node_modules/asynckit": {
"version": "0.4.0", "version": "0.4.0",
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
"dev": true
}, },
"node_modules/axios": { "node_modules/axios": {
"version": "1.6.8", "version": "1.8.4",
"resolved": "https://registry.npmjs.org/axios/-/axios-1.6.8.tgz", "resolved": "https://registry.npmjs.org/axios/-/axios-1.8.4.tgz",
"integrity": "sha512-v/ZHtJDU39mDpyBoFVkETcd/uNdxrWRrg3bKpOKzXFA6Bvqopts6ALSMU3y6ijYxbw2B+wPrIv46egTzJXCLGQ==", "integrity": "sha512-eBSYY4Y68NNlHbHBMdeDmKNtDgXWhQsJcGqzO3iLUM0GraQFSS9cVgPX5I9b3lbdFKyYoAEGAZF1DwhTaljNAw==",
"dev": true,
"license": "MIT",
"dependencies": { "dependencies": {
"follow-redirects": "^1.15.6", "follow-redirects": "^1.15.6",
"form-data": "^4.0.0", "form-data": "^4.0.0",
@ -2351,6 +2355,7 @@
"version": "1.0.8", "version": "1.0.8",
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
"dev": true,
"dependencies": { "dependencies": {
"delayed-stream": "~1.0.0" "delayed-stream": "~1.0.0"
}, },
@ -2467,6 +2472,7 @@
"version": "1.0.0", "version": "1.0.0",
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
"dev": true,
"engines": { "engines": {
"node": ">=0.4.0" "node": ">=0.4.0"
} }
@ -2784,6 +2790,7 @@
"version": "1.15.6", "version": "1.15.6",
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz", "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz",
"integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==", "integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==",
"dev": true,
"funding": [ "funding": [
{ {
"type": "individual", "type": "individual",
@ -2831,6 +2838,7 @@
"version": "4.0.0", "version": "4.0.0",
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz", "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==", "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
"dev": true,
"dependencies": { "dependencies": {
"asynckit": "^0.4.0", "asynckit": "^0.4.0",
"combined-stream": "^1.0.8", "combined-stream": "^1.0.8",
@ -4111,6 +4119,7 @@
"version": "1.52.0", "version": "1.52.0",
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
"dev": true,
"engines": { "engines": {
"node": ">= 0.6" "node": ">= 0.6"
} }
@ -4119,6 +4128,7 @@
"version": "2.1.35", "version": "2.1.35",
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
"dev": true,
"dependencies": { "dependencies": {
"mime-db": "1.52.0" "mime-db": "1.52.0"
}, },
@ -4507,7 +4517,8 @@
"node_modules/proxy-from-env": { "node_modules/proxy-from-env": {
"version": "1.1.0", "version": "1.1.0",
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
"integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==" "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==",
"dev": true
}, },
"node_modules/punycode": { "node_modules/punycode": {
"version": "2.3.1", "version": "2.3.1",

View File

@ -1,6 +1,6 @@
{ {
"name": "@mendable/firecrawl-js", "name": "@mendable/firecrawl-js",
"version": "1.22.1", "version": "1.23.0",
"description": "JavaScript SDK for Firecrawl API", "description": "JavaScript SDK for Firecrawl API",
"main": "dist/index.js", "main": "dist/index.js",
"types": "dist/index.d.ts", "types": "dist/index.d.ts",
@ -25,7 +25,6 @@
"author": "Mendable.ai", "author": "Mendable.ai",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"axios": "^1.6.8",
"typescript-event-target": "^1.1.1", "typescript-event-target": "^1.1.1",
"zod": "^3.23.8", "zod": "^3.23.8",
"zod-to-json-schema": "^3.23.0" "zod-to-json-schema": "^3.23.0"
@ -38,10 +37,11 @@
"@jest/globals": "^29.7.0", "@jest/globals": "^29.7.0",
"@types/axios": "^0.14.0", "@types/axios": "^0.14.0",
"@types/dotenv": "^8.2.0", "@types/dotenv": "^8.2.0",
"@types/jest": "^29.5.12", "@types/jest": "^29.5.14",
"@types/mocha": "^10.0.6", "@types/mocha": "^10.0.6",
"@types/node": "^20.12.12", "@types/node": "^20.17.30",
"@types/uuid": "^9.0.8", "@types/uuid": "^9.0.8",
"axios": "^1.8.4",
"dotenv": "^16.4.5", "dotenv": "^16.4.5",
"jest": "^29.7.0", "jest": "^29.7.0",
"ts-jest": "^29.2.2", "ts-jest": "^29.2.2",

View File

@ -0,0 +1,105 @@
import axios from 'axios';
import FirecrawlApp from '../../../../src/index';
jest.mock('axios');
const mockedAxios = axios as jest.Mocked<typeof axios>;
describe('Change Tracking Tests', () => {
beforeEach(() => {
jest.resetAllMocks();
});
it('should support basic change tracking format', async () => {
mockedAxios.post.mockResolvedValueOnce({
status: 200,
data: {
success: true,
data: {
markdown: 'Test markdown content',
changeTracking: {
previousScrapeAt: '2023-01-01T00:00:00Z',
changeStatus: 'changed',
visibility: 'visible'
}
}
}
});
const app = new FirecrawlApp({ apiKey: process.env.TEST_API_KEY || 'dummy-api-key-for-testing' });
const result = await app.scrapeUrl('https://example.com', {
formats: ['markdown', 'changeTracking']
});
expect(mockedAxios.post).toHaveBeenCalledTimes(1);
expect(mockedAxios.post.mock.calls[0][1].formats).toContain('changeTracking');
expect(result).toHaveProperty('changeTracking');
expect(result.changeTracking?.previousScrapeAt).toBe('2023-01-01T00:00:00Z');
expect(result.changeTracking?.changeStatus).toBe('changed');
expect(result.changeTracking?.visibility).toBe('visible');
});
it('should support change tracking options with git-diff and json modes', async () => {
mockedAxios.post.mockResolvedValueOnce({
status: 200,
data: {
success: true,
data: {
markdown: 'Test markdown content',
changeTracking: {
previousScrapeAt: '2023-01-01T00:00:00Z',
changeStatus: 'changed',
visibility: 'visible',
diff: {
text: '@@ -1,1 +1,1 @@\n-old content\n+new content',
json: {
files: [{
from: null,
to: null,
chunks: [{
content: '@@ -1,1 +1,1 @@',
changes: [{
type: 'del',
content: '-old content',
del: true,
ln: 1
}, {
type: 'add',
content: '+new content',
add: true,
ln: 1
}]
}]
}]
}
},
json: {
title: {
previous: 'Old Title',
current: 'New Title'
}
}
}
}
}
});
const app = new FirecrawlApp({ apiKey: process.env.TEST_API_KEY || 'dummy-api-key-for-testing' });
const result = await app.scrapeUrl('https://example.com', {
formats: ['markdown', 'changeTracking'],
changeTrackingOptions: {
modes: ['git-diff', 'json'],
schema: { type: 'object', properties: { title: { type: 'string' } } }
}
});
expect(mockedAxios.post).toHaveBeenCalledTimes(1);
expect(mockedAxios.post.mock.calls[0][1].formats).toContain('changeTracking');
expect(mockedAxios.post.mock.calls[0][1].changeTrackingOptions.modes).toEqual(['git-diff', 'json']);
expect(result).toHaveProperty('changeTracking');
expect(result.changeTracking?.diff?.text).toBe('@@ -1,1 +1,1 @@\n-old content\n+new content');
expect(result.changeTracking?.json?.title.previous).toBe('Old Title');
expect(result.changeTracking?.json?.title.current).toBe('New Title');
});
});

View File

@ -74,7 +74,7 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
visibility: "visible" | "hidden"; visibility: "visible" | "hidden";
diff?: { diff?: {
text: string; text: string;
structured: { json: {
files: Array<{ files: Array<{
from: string | null; from: string | null;
to: string | null; to: string | null;
@ -92,6 +92,7 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
}>; }>;
}; };
}; };
json?: any;
}; };
// v1 search only // v1 search only
title?: string; title?: string;
@ -160,6 +161,11 @@ export interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchem
schema?: LLMSchema; schema?: LLMSchema;
systemPrompt?: string; systemPrompt?: string;
} }
changeTrackingOptions?: {
prompt?: string;
schema?: any;
modes?: ("json" | "git-diff")[];
}
actions?: ActionsSchema; actions?: ActionsSchema;
} }

View File

@ -13,7 +13,7 @@ import os
from .firecrawl import FirecrawlApp # noqa from .firecrawl import FirecrawlApp # noqa
__version__ = "1.15.0" __version__ = "1.16.0"
# Define the logger for the Firecrawl project # Define the logger for the Firecrawl project
logger: logging.Logger = logging.getLogger("firecrawl") logger: logging.Logger = logging.getLogger("firecrawl")

View File

@ -75,6 +75,16 @@ class DeepResearchStatusResponse(pydantic.BaseModel):
sources: List[Dict[str, Any]] sources: List[Dict[str, Any]]
summaries: List[str] summaries: List[str]
class ChangeTrackingData(pydantic.BaseModel):
"""
Data for the change tracking format.
"""
previousScrapeAt: Optional[str] = None
changeStatus: str # "new" | "same" | "changed" | "removed"
visibility: str # "visible" | "hidden"
diff: Optional[Dict[str, Any]] = None
json: Optional[Any] = None
class FirecrawlApp: class FirecrawlApp:
class SearchResponse(pydantic.BaseModel): class SearchResponse(pydantic.BaseModel):
""" """
@ -167,9 +177,13 @@ class FirecrawlApp:
json['schema'] = json['schema'].schema() json['schema'] = json['schema'].schema()
scrape_params['jsonOptions'] = json scrape_params['jsonOptions'] = json
change_tracking = params.get("changeTrackingOptions", {})
if change_tracking:
scrape_params['changeTrackingOptions'] = change_tracking
# Include any other params directly at the top level of scrape_params # Include any other params directly at the top level of scrape_params
for key, value in params.items(): for key, value in params.items():
if key not in ['jsonOptions']: if key not in ['jsonOptions', 'changeTrackingOptions']:
scrape_params[key] = value scrape_params[key] = value

View File

@ -0,0 +1,98 @@
import unittest
from unittest.mock import patch, MagicMock
import json
import os
from firecrawl import FirecrawlApp
class TestChangeTracking(unittest.TestCase):
@patch('requests.post')
def test_change_tracking_format(self, mock_post):
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
'success': True,
'data': {
'markdown': 'Test markdown content',
'changeTracking': {
'previousScrapeAt': '2023-01-01T00:00:00Z',
'changeStatus': 'changed',
'visibility': 'visible'
}
}
}
mock_post.return_value = mock_response
app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
result = app.scrape_url('https://example.com', {
'formats': ['markdown', 'changeTracking']
})
args, kwargs = mock_post.call_args
self.assertEqual(kwargs['json']['formats'], ['markdown', 'changeTracking'])
self.assertEqual(result['changeTracking']['previousScrapeAt'], '2023-01-01T00:00:00Z')
self.assertEqual(result['changeTracking']['changeStatus'], 'changed')
self.assertEqual(result['changeTracking']['visibility'], 'visible')
@patch('requests.post')
def test_change_tracking_options(self, mock_post):
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
'success': True,
'data': {
'markdown': 'Test markdown content',
'changeTracking': {
'previousScrapeAt': '2023-01-01T00:00:00Z',
'changeStatus': 'changed',
'visibility': 'visible',
'diff': {
'text': '@@ -1,1 +1,1 @@\n-old content\n+new content',
'json': {
'files': [{
'from': None,
'to': None,
'chunks': [{
'content': '@@ -1,1 +1,1 @@',
'changes': [{
'type': 'del',
'content': '-old content',
'del': True,
'ln': 1
}, {
'type': 'add',
'content': '+new content',
'add': True,
'ln': 1
}]
}]
}]
}
},
'json': {
'title': {
'previous': 'Old Title',
'current': 'New Title'
}
}
}
}
}
mock_post.return_value = mock_response
app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
result = app.scrape_url('https://example.com', {
'formats': ['markdown', 'changeTracking'],
'changeTrackingOptions': {
'modes': ['git-diff', 'json'],
'schema': {'type': 'object', 'properties': {'title': {'type': 'string'}}}
}
})
args, kwargs = mock_post.call_args
self.assertEqual(kwargs['json']['formats'], ['markdown', 'changeTracking'])
self.assertEqual(kwargs['json']['changeTrackingOptions']['modes'], ['git-diff', 'json'])
self.assertEqual(result['changeTracking']['diff']['text'], '@@ -1,1 +1,1 @@\n-old content\n+new content')
self.assertEqual(result['changeTracking']['json']['title']['previous'], 'Old Title')
self.assertEqual(result['changeTracking']['json']['title']['current'], 'New Title')