diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index 6668d33b..061f5c9c 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -69,7 +69,7 @@ export async function extractController( }); } - return res.status(202).json({ + return res.status(200).json({ success: true, id: extractId, urlTrace: [], diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 474eea83..644be7f6 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -887,18 +887,32 @@ export default class FirecrawlApp { { ...jsonData, schema: jsonSchema }, headers ); + if (response.status === 200) { - const responseData = response.data as ExtractResponse; - if (responseData.success) { - return { - success: true, - data: responseData.data, - warning: responseData.warning, - error: responseData.error - }; - } else { - throw new FirecrawlError(`Failed to scrape URL. Error: ${responseData.error}`, response.status); - } + const jobId = response.data.id; + let extractStatus; + do { + const statusResponse: AxiosResponse = await this.getRequest( + `${this.apiUrl}/v1/extract/${jobId}`, + headers + ); + extractStatus = statusResponse.data; + if (extractStatus.status === "completed") { + if (extractStatus.success) { + return { + success: true, + data: extractStatus.data, + warning: extractStatus.warning, + error: extractStatus.error + }; + } else { + throw new FirecrawlError(`Failed to extract data. Error: ${extractStatus.error}`, statusResponse.status); + } + } else if (extractStatus.status === "failed" || extractStatus.status === "cancelled") { + throw new FirecrawlError(`Extract job ${extractStatus.status}. Error: ${extractStatus.error}`, statusResponse.status); + } + await new Promise(resolve => setTimeout(resolve, 1000)); // Polling interval + } while (extractStatus.status !== "completed"); } else { this.handleError(response, "extract"); } diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index d3216405..e844b733 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -542,6 +542,7 @@ class FirecrawlApp: } try: + # Send the initial extract request response = self._post_request( f'{self.api_url}/v1/extract', request_data, @@ -550,7 +551,29 @@ class FirecrawlApp: if response.status_code == 200: data = response.json() if data['success']: - return data + job_id = data.get('id') + if not job_id: + raise Exception('Job ID not returned from extract request.') + + # Poll for the extract status + while True: + status_response = self._get_request( + f'{self.api_url}/v1/extract/{job_id}', + headers + ) + if status_response.status_code == 200: + status_data = status_response.json() + if status_data['status'] == 'completed': + if status_data['success']: + return status_data + else: + raise Exception(f'Failed to extract. Error: {status_data["error"]}') + elif status_data['status'] in ['failed', 'cancelled']: + raise Exception(f'Extract job {status_data["status"]}. Error: {status_data["error"]}') + else: + self._handle_error(status_response, "extract-status") + + time.sleep(2) # Polling interval else: raise Exception(f'Failed to extract. Error: {data["error"]}') else: