mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-04-22 22:19:44 +08:00
Add change tracking support to Python and JS SDKs (#1448)
* Add change tracking support to Python and JS SDKs Co-Authored-By: Nicolas Camara <nick@sideguide.dev> * Replace test API keys with TEST_API_KEY placeholder Co-Authored-By: Nicolas Camara <nick@sideguide.dev> * Replace API keys with dummy values for testing Co-Authored-By: Nicolas Camara <nick@sideguide.dev> * Use environment variables for API keys in tests Co-Authored-By: Nicolas Camara <nick@sideguide.dev> * Move JS SDK test to correct location and add dependencies Co-Authored-By: Nicolas Camara <nick@sideguide.dev> * Remove old test file location Co-Authored-By: Nicolas Camara <nick@sideguide.dev> * Update test file to use TEST_API_KEY environment variable Co-Authored-By: Nicolas Camara <nick@sideguide.dev> * Update Python SDK test to use TEST_API_KEY environment variable Co-Authored-By: Nicolas Camara <nick@sideguide.dev> * Update package.json * Update __init__.py --------- Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: Nicolas Camara <nick@sideguide.dev> Co-authored-by: Nicolas <nicolascamara29@gmail.com>
This commit is contained in:
parent
138a9757ae
commit
ef341399f0
37
apps/js-sdk/firecrawl/package-lock.json
generated
37
apps/js-sdk/firecrawl/package-lock.json
generated
@ -1,15 +1,14 @@
|
|||||||
{
|
{
|
||||||
"name": "@mendable/firecrawl-js",
|
"name": "@mendable/firecrawl-js",
|
||||||
"version": "1.22.0",
|
"version": "1.22.1",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "@mendable/firecrawl-js",
|
"name": "@mendable/firecrawl-js",
|
||||||
"version": "1.22.0",
|
"version": "1.22.1",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"axios": "^1.6.8",
|
|
||||||
"typescript-event-target": "^1.1.1",
|
"typescript-event-target": "^1.1.1",
|
||||||
"zod": "^3.23.8",
|
"zod": "^3.23.8",
|
||||||
"zod-to-json-schema": "^3.23.0"
|
"zod-to-json-schema": "^3.23.0"
|
||||||
@ -18,10 +17,11 @@
|
|||||||
"@jest/globals": "^29.7.0",
|
"@jest/globals": "^29.7.0",
|
||||||
"@types/axios": "^0.14.0",
|
"@types/axios": "^0.14.0",
|
||||||
"@types/dotenv": "^8.2.0",
|
"@types/dotenv": "^8.2.0",
|
||||||
"@types/jest": "^29.5.12",
|
"@types/jest": "^29.5.14",
|
||||||
"@types/mocha": "^10.0.6",
|
"@types/mocha": "^10.0.6",
|
||||||
"@types/node": "^20.12.12",
|
"@types/node": "^20.17.30",
|
||||||
"@types/uuid": "^9.0.8",
|
"@types/uuid": "^9.0.8",
|
||||||
|
"axios": "^1.8.4",
|
||||||
"dotenv": "^16.4.5",
|
"dotenv": "^16.4.5",
|
||||||
"jest": "^29.7.0",
|
"jest": "^29.7.0",
|
||||||
"ts-jest": "^29.2.2",
|
"ts-jest": "^29.2.2",
|
||||||
@ -1812,10 +1812,11 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@types/jest": {
|
"node_modules/@types/jest": {
|
||||||
"version": "29.5.12",
|
"version": "29.5.14",
|
||||||
"resolved": "https://registry.npmjs.org/@types/jest/-/jest-29.5.12.tgz",
|
"resolved": "https://registry.npmjs.org/@types/jest/-/jest-29.5.14.tgz",
|
||||||
"integrity": "sha512-eDC8bTvT/QhYdxJAulQikueigY5AsdBRH2yDKW3yveW7svY3+DzN84/2NUgkw10RTiJbWqZrTtoGVdYlvFJdLw==",
|
"integrity": "sha512-ZN+4sdnLUbo8EVvVc2ao0GFW6oVrQRPn4K2lglySj7APvSrgzxHiNNK99us4WDMi57xxA2yggblIAMNhXOotLQ==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"expect": "^29.0.0",
|
"expect": "^29.0.0",
|
||||||
"pretty-format": "^29.0.0"
|
"pretty-format": "^29.0.0"
|
||||||
@ -1949,12 +1950,15 @@
|
|||||||
"node_modules/asynckit": {
|
"node_modules/asynckit": {
|
||||||
"version": "0.4.0",
|
"version": "0.4.0",
|
||||||
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
|
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
|
||||||
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
|
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
|
||||||
|
"dev": true
|
||||||
},
|
},
|
||||||
"node_modules/axios": {
|
"node_modules/axios": {
|
||||||
"version": "1.6.8",
|
"version": "1.8.4",
|
||||||
"resolved": "https://registry.npmjs.org/axios/-/axios-1.6.8.tgz",
|
"resolved": "https://registry.npmjs.org/axios/-/axios-1.8.4.tgz",
|
||||||
"integrity": "sha512-v/ZHtJDU39mDpyBoFVkETcd/uNdxrWRrg3bKpOKzXFA6Bvqopts6ALSMU3y6ijYxbw2B+wPrIv46egTzJXCLGQ==",
|
"integrity": "sha512-eBSYY4Y68NNlHbHBMdeDmKNtDgXWhQsJcGqzO3iLUM0GraQFSS9cVgPX5I9b3lbdFKyYoAEGAZF1DwhTaljNAw==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"follow-redirects": "^1.15.6",
|
"follow-redirects": "^1.15.6",
|
||||||
"form-data": "^4.0.0",
|
"form-data": "^4.0.0",
|
||||||
@ -2351,6 +2355,7 @@
|
|||||||
"version": "1.0.8",
|
"version": "1.0.8",
|
||||||
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
|
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
|
||||||
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
|
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
|
||||||
|
"dev": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"delayed-stream": "~1.0.0"
|
"delayed-stream": "~1.0.0"
|
||||||
},
|
},
|
||||||
@ -2467,6 +2472,7 @@
|
|||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
|
||||||
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
|
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
|
||||||
|
"dev": true,
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=0.4.0"
|
"node": ">=0.4.0"
|
||||||
}
|
}
|
||||||
@ -2784,6 +2790,7 @@
|
|||||||
"version": "1.15.6",
|
"version": "1.15.6",
|
||||||
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz",
|
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz",
|
||||||
"integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==",
|
"integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==",
|
||||||
|
"dev": true,
|
||||||
"funding": [
|
"funding": [
|
||||||
{
|
{
|
||||||
"type": "individual",
|
"type": "individual",
|
||||||
@ -2831,6 +2838,7 @@
|
|||||||
"version": "4.0.0",
|
"version": "4.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
|
||||||
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
|
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
|
||||||
|
"dev": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"asynckit": "^0.4.0",
|
"asynckit": "^0.4.0",
|
||||||
"combined-stream": "^1.0.8",
|
"combined-stream": "^1.0.8",
|
||||||
@ -4111,6 +4119,7 @@
|
|||||||
"version": "1.52.0",
|
"version": "1.52.0",
|
||||||
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
|
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
|
||||||
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
|
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
|
||||||
|
"dev": true,
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">= 0.6"
|
"node": ">= 0.6"
|
||||||
}
|
}
|
||||||
@ -4119,6 +4128,7 @@
|
|||||||
"version": "2.1.35",
|
"version": "2.1.35",
|
||||||
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
|
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
|
||||||
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
|
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
|
||||||
|
"dev": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"mime-db": "1.52.0"
|
"mime-db": "1.52.0"
|
||||||
},
|
},
|
||||||
@ -4507,7 +4517,8 @@
|
|||||||
"node_modules/proxy-from-env": {
|
"node_modules/proxy-from-env": {
|
||||||
"version": "1.1.0",
|
"version": "1.1.0",
|
||||||
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
|
||||||
"integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="
|
"integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==",
|
||||||
|
"dev": true
|
||||||
},
|
},
|
||||||
"node_modules/punycode": {
|
"node_modules/punycode": {
|
||||||
"version": "2.3.1",
|
"version": "2.3.1",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@mendable/firecrawl-js",
|
"name": "@mendable/firecrawl-js",
|
||||||
"version": "1.22.1",
|
"version": "1.23.0",
|
||||||
"description": "JavaScript SDK for Firecrawl API",
|
"description": "JavaScript SDK for Firecrawl API",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"types": "dist/index.d.ts",
|
"types": "dist/index.d.ts",
|
||||||
@ -25,7 +25,6 @@
|
|||||||
"author": "Mendable.ai",
|
"author": "Mendable.ai",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"axios": "^1.6.8",
|
|
||||||
"typescript-event-target": "^1.1.1",
|
"typescript-event-target": "^1.1.1",
|
||||||
"zod": "^3.23.8",
|
"zod": "^3.23.8",
|
||||||
"zod-to-json-schema": "^3.23.0"
|
"zod-to-json-schema": "^3.23.0"
|
||||||
@ -38,10 +37,11 @@
|
|||||||
"@jest/globals": "^29.7.0",
|
"@jest/globals": "^29.7.0",
|
||||||
"@types/axios": "^0.14.0",
|
"@types/axios": "^0.14.0",
|
||||||
"@types/dotenv": "^8.2.0",
|
"@types/dotenv": "^8.2.0",
|
||||||
"@types/jest": "^29.5.12",
|
"@types/jest": "^29.5.14",
|
||||||
"@types/mocha": "^10.0.6",
|
"@types/mocha": "^10.0.6",
|
||||||
"@types/node": "^20.12.12",
|
"@types/node": "^20.17.30",
|
||||||
"@types/uuid": "^9.0.8",
|
"@types/uuid": "^9.0.8",
|
||||||
|
"axios": "^1.8.4",
|
||||||
"dotenv": "^16.4.5",
|
"dotenv": "^16.4.5",
|
||||||
"jest": "^29.7.0",
|
"jest": "^29.7.0",
|
||||||
"ts-jest": "^29.2.2",
|
"ts-jest": "^29.2.2",
|
||||||
|
@ -0,0 +1,105 @@
|
|||||||
|
import axios from 'axios';
|
||||||
|
import FirecrawlApp from '../../../../src/index';
|
||||||
|
|
||||||
|
jest.mock('axios');
|
||||||
|
const mockedAxios = axios as jest.Mocked<typeof axios>;
|
||||||
|
|
||||||
|
describe('Change Tracking Tests', () => {
|
||||||
|
beforeEach(() => {
|
||||||
|
jest.resetAllMocks();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should support basic change tracking format', async () => {
|
||||||
|
mockedAxios.post.mockResolvedValueOnce({
|
||||||
|
status: 200,
|
||||||
|
data: {
|
||||||
|
success: true,
|
||||||
|
data: {
|
||||||
|
markdown: 'Test markdown content',
|
||||||
|
changeTracking: {
|
||||||
|
previousScrapeAt: '2023-01-01T00:00:00Z',
|
||||||
|
changeStatus: 'changed',
|
||||||
|
visibility: 'visible'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const app = new FirecrawlApp({ apiKey: process.env.TEST_API_KEY || 'dummy-api-key-for-testing' });
|
||||||
|
const result = await app.scrapeUrl('https://example.com', {
|
||||||
|
formats: ['markdown', 'changeTracking']
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(mockedAxios.post).toHaveBeenCalledTimes(1);
|
||||||
|
expect(mockedAxios.post.mock.calls[0][1].formats).toContain('changeTracking');
|
||||||
|
|
||||||
|
expect(result).toHaveProperty('changeTracking');
|
||||||
|
expect(result.changeTracking?.previousScrapeAt).toBe('2023-01-01T00:00:00Z');
|
||||||
|
expect(result.changeTracking?.changeStatus).toBe('changed');
|
||||||
|
expect(result.changeTracking?.visibility).toBe('visible');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should support change tracking options with git-diff and json modes', async () => {
|
||||||
|
mockedAxios.post.mockResolvedValueOnce({
|
||||||
|
status: 200,
|
||||||
|
data: {
|
||||||
|
success: true,
|
||||||
|
data: {
|
||||||
|
markdown: 'Test markdown content',
|
||||||
|
changeTracking: {
|
||||||
|
previousScrapeAt: '2023-01-01T00:00:00Z',
|
||||||
|
changeStatus: 'changed',
|
||||||
|
visibility: 'visible',
|
||||||
|
diff: {
|
||||||
|
text: '@@ -1,1 +1,1 @@\n-old content\n+new content',
|
||||||
|
json: {
|
||||||
|
files: [{
|
||||||
|
from: null,
|
||||||
|
to: null,
|
||||||
|
chunks: [{
|
||||||
|
content: '@@ -1,1 +1,1 @@',
|
||||||
|
changes: [{
|
||||||
|
type: 'del',
|
||||||
|
content: '-old content',
|
||||||
|
del: true,
|
||||||
|
ln: 1
|
||||||
|
}, {
|
||||||
|
type: 'add',
|
||||||
|
content: '+new content',
|
||||||
|
add: true,
|
||||||
|
ln: 1
|
||||||
|
}]
|
||||||
|
}]
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
json: {
|
||||||
|
title: {
|
||||||
|
previous: 'Old Title',
|
||||||
|
current: 'New Title'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const app = new FirecrawlApp({ apiKey: process.env.TEST_API_KEY || 'dummy-api-key-for-testing' });
|
||||||
|
const result = await app.scrapeUrl('https://example.com', {
|
||||||
|
formats: ['markdown', 'changeTracking'],
|
||||||
|
changeTrackingOptions: {
|
||||||
|
modes: ['git-diff', 'json'],
|
||||||
|
schema: { type: 'object', properties: { title: { type: 'string' } } }
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(mockedAxios.post).toHaveBeenCalledTimes(1);
|
||||||
|
expect(mockedAxios.post.mock.calls[0][1].formats).toContain('changeTracking');
|
||||||
|
expect(mockedAxios.post.mock.calls[0][1].changeTrackingOptions.modes).toEqual(['git-diff', 'json']);
|
||||||
|
|
||||||
|
expect(result).toHaveProperty('changeTracking');
|
||||||
|
expect(result.changeTracking?.diff?.text).toBe('@@ -1,1 +1,1 @@\n-old content\n+new content');
|
||||||
|
expect(result.changeTracking?.json?.title.previous).toBe('Old Title');
|
||||||
|
expect(result.changeTracking?.json?.title.current).toBe('New Title');
|
||||||
|
});
|
||||||
|
});
|
@ -74,7 +74,7 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
|
|||||||
visibility: "visible" | "hidden";
|
visibility: "visible" | "hidden";
|
||||||
diff?: {
|
diff?: {
|
||||||
text: string;
|
text: string;
|
||||||
structured: {
|
json: {
|
||||||
files: Array<{
|
files: Array<{
|
||||||
from: string | null;
|
from: string | null;
|
||||||
to: string | null;
|
to: string | null;
|
||||||
@ -92,6 +92,7 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
|
|||||||
}>;
|
}>;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
json?: any;
|
||||||
};
|
};
|
||||||
// v1 search only
|
// v1 search only
|
||||||
title?: string;
|
title?: string;
|
||||||
@ -160,6 +161,11 @@ export interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchem
|
|||||||
schema?: LLMSchema;
|
schema?: LLMSchema;
|
||||||
systemPrompt?: string;
|
systemPrompt?: string;
|
||||||
}
|
}
|
||||||
|
changeTrackingOptions?: {
|
||||||
|
prompt?: string;
|
||||||
|
schema?: any;
|
||||||
|
modes?: ("json" | "git-diff")[];
|
||||||
|
}
|
||||||
actions?: ActionsSchema;
|
actions?: ActionsSchema;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -13,7 +13,7 @@ import os
|
|||||||
|
|
||||||
from .firecrawl import FirecrawlApp # noqa
|
from .firecrawl import FirecrawlApp # noqa
|
||||||
|
|
||||||
__version__ = "1.15.0"
|
__version__ = "1.16.0"
|
||||||
|
|
||||||
# Define the logger for the Firecrawl project
|
# Define the logger for the Firecrawl project
|
||||||
logger: logging.Logger = logging.getLogger("firecrawl")
|
logger: logging.Logger = logging.getLogger("firecrawl")
|
||||||
|
@ -75,6 +75,16 @@ class DeepResearchStatusResponse(pydantic.BaseModel):
|
|||||||
sources: List[Dict[str, Any]]
|
sources: List[Dict[str, Any]]
|
||||||
summaries: List[str]
|
summaries: List[str]
|
||||||
|
|
||||||
|
class ChangeTrackingData(pydantic.BaseModel):
|
||||||
|
"""
|
||||||
|
Data for the change tracking format.
|
||||||
|
"""
|
||||||
|
previousScrapeAt: Optional[str] = None
|
||||||
|
changeStatus: str # "new" | "same" | "changed" | "removed"
|
||||||
|
visibility: str # "visible" | "hidden"
|
||||||
|
diff: Optional[Dict[str, Any]] = None
|
||||||
|
json: Optional[Any] = None
|
||||||
|
|
||||||
class FirecrawlApp:
|
class FirecrawlApp:
|
||||||
class SearchResponse(pydantic.BaseModel):
|
class SearchResponse(pydantic.BaseModel):
|
||||||
"""
|
"""
|
||||||
@ -167,9 +177,13 @@ class FirecrawlApp:
|
|||||||
json['schema'] = json['schema'].schema()
|
json['schema'] = json['schema'].schema()
|
||||||
scrape_params['jsonOptions'] = json
|
scrape_params['jsonOptions'] = json
|
||||||
|
|
||||||
|
change_tracking = params.get("changeTrackingOptions", {})
|
||||||
|
if change_tracking:
|
||||||
|
scrape_params['changeTrackingOptions'] = change_tracking
|
||||||
|
|
||||||
# Include any other params directly at the top level of scrape_params
|
# Include any other params directly at the top level of scrape_params
|
||||||
for key, value in params.items():
|
for key, value in params.items():
|
||||||
if key not in ['jsonOptions']:
|
if key not in ['jsonOptions', 'changeTrackingOptions']:
|
||||||
scrape_params[key] = value
|
scrape_params[key] = value
|
||||||
|
|
||||||
|
|
||||||
|
98
apps/python-sdk/tests/test_change_tracking.py
Normal file
98
apps/python-sdk/tests/test_change_tracking.py
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
import unittest
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from firecrawl import FirecrawlApp
|
||||||
|
|
||||||
|
class TestChangeTracking(unittest.TestCase):
|
||||||
|
@patch('requests.post')
|
||||||
|
def test_change_tracking_format(self, mock_post):
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.json.return_value = {
|
||||||
|
'success': True,
|
||||||
|
'data': {
|
||||||
|
'markdown': 'Test markdown content',
|
||||||
|
'changeTracking': {
|
||||||
|
'previousScrapeAt': '2023-01-01T00:00:00Z',
|
||||||
|
'changeStatus': 'changed',
|
||||||
|
'visibility': 'visible'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mock_post.return_value = mock_response
|
||||||
|
|
||||||
|
app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
|
||||||
|
result = app.scrape_url('https://example.com', {
|
||||||
|
'formats': ['markdown', 'changeTracking']
|
||||||
|
})
|
||||||
|
|
||||||
|
args, kwargs = mock_post.call_args
|
||||||
|
self.assertEqual(kwargs['json']['formats'], ['markdown', 'changeTracking'])
|
||||||
|
|
||||||
|
self.assertEqual(result['changeTracking']['previousScrapeAt'], '2023-01-01T00:00:00Z')
|
||||||
|
self.assertEqual(result['changeTracking']['changeStatus'], 'changed')
|
||||||
|
self.assertEqual(result['changeTracking']['visibility'], 'visible')
|
||||||
|
|
||||||
|
@patch('requests.post')
|
||||||
|
def test_change_tracking_options(self, mock_post):
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.json.return_value = {
|
||||||
|
'success': True,
|
||||||
|
'data': {
|
||||||
|
'markdown': 'Test markdown content',
|
||||||
|
'changeTracking': {
|
||||||
|
'previousScrapeAt': '2023-01-01T00:00:00Z',
|
||||||
|
'changeStatus': 'changed',
|
||||||
|
'visibility': 'visible',
|
||||||
|
'diff': {
|
||||||
|
'text': '@@ -1,1 +1,1 @@\n-old content\n+new content',
|
||||||
|
'json': {
|
||||||
|
'files': [{
|
||||||
|
'from': None,
|
||||||
|
'to': None,
|
||||||
|
'chunks': [{
|
||||||
|
'content': '@@ -1,1 +1,1 @@',
|
||||||
|
'changes': [{
|
||||||
|
'type': 'del',
|
||||||
|
'content': '-old content',
|
||||||
|
'del': True,
|
||||||
|
'ln': 1
|
||||||
|
}, {
|
||||||
|
'type': 'add',
|
||||||
|
'content': '+new content',
|
||||||
|
'add': True,
|
||||||
|
'ln': 1
|
||||||
|
}]
|
||||||
|
}]
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
'json': {
|
||||||
|
'title': {
|
||||||
|
'previous': 'Old Title',
|
||||||
|
'current': 'New Title'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mock_post.return_value = mock_response
|
||||||
|
|
||||||
|
app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
|
||||||
|
result = app.scrape_url('https://example.com', {
|
||||||
|
'formats': ['markdown', 'changeTracking'],
|
||||||
|
'changeTrackingOptions': {
|
||||||
|
'modes': ['git-diff', 'json'],
|
||||||
|
'schema': {'type': 'object', 'properties': {'title': {'type': 'string'}}}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
args, kwargs = mock_post.call_args
|
||||||
|
self.assertEqual(kwargs['json']['formats'], ['markdown', 'changeTracking'])
|
||||||
|
self.assertEqual(kwargs['json']['changeTrackingOptions']['modes'], ['git-diff', 'json'])
|
||||||
|
|
||||||
|
self.assertEqual(result['changeTracking']['diff']['text'], '@@ -1,1 +1,1 @@\n-old content\n+new content')
|
||||||
|
self.assertEqual(result['changeTracking']['json']['title']['previous'], 'Old Title')
|
||||||
|
self.assertEqual(result['changeTracking']['json']['title']['current'], 'New Title')
|
Loading…
x
Reference in New Issue
Block a user