firecrawl/apps/api/src/lib/extract/helpers/merge-null-val-objs.ts
Nicolas 6634d236bf
(feat/fire-1) FIRE-1 (#1462)
* wip

* integrating smart-scrape

* integrate smartscrape into llmExtract

* wip

* smart scrape multiple links

* fixes

* fix

* wip

* it worked!

* wip. there's a bug on the batchExtract TypeError: Converting circular structure to JSON

* wip

* retry model

* retry models

* feat/scrape+json+extract interfaces ready

* vertex -> googleapi

* fix/transformArrayToObject. required params on schema is still a bug

* change model

* o3-mini -> gemini

* Update extractSmartScrape.ts

* sessionId

* sessionId

* Nick: f-0 start

* Update extraction-service-f0.ts

* Update types.ts

* Nick:

* Update queue-worker.ts

* Nick: new interface

* rename analyzeSchemaAndPrompt -> F0

* refactor: rename agent ID to model in types and extract logic

* agent

* id->model

* id->model

* refactor: standardize agent model handling and validation across extraction logic

* livecast agent

* (feat/f1) sdks (#1459)

* feat: add FIRE-1 agent support to Python and JavaScript SDKs

Co-Authored-By: hello@sideguide.dev <hello@sideguide.dev>

* feat: add FIRE-1 agent support to scrape methods in both SDKs

Co-Authored-By: hello@sideguide.dev <hello@sideguide.dev>

* feat: add prompt and sessionId to AgentOptions interface

Co-Authored-By: hello@sideguide.dev <hello@sideguide.dev>

* Update index.ts

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: hello@sideguide.dev <hello@sideguide.dev>
Co-authored-by: Nicolas <nicolascamara29@gmail.com>

* feat(v1): rate limits

* Update types.ts

* Update llmExtract.ts

* add cost tracking

* remove

* Update requests.http

* fix smart scrape cost calc

* log sm cost

* fix counts

* fix

* expose cost tracking

* models fix

* temp: skipLibcheck

* get rid of it

* fix ts

* dont skip lib check

* Update extractSmartScrape.ts

* Update queue-worker.ts

* Update smartScrape.ts

* Update requests.http

* fix(rate-limiter):

* types: fire-1 refine

* bill 150

* fix credits used on crawl

* ban from crawl

* route cost limit warning

* Update generic-ai.ts

* genres

* Update llmExtract.ts

* test server diff

* cletu

---------

Co-authored-by: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com>
Co-authored-by: Thomas Kosmas <thomas510111@gmail.com>
Co-authored-by: Ademílson F. Tonato <ademilsonft@outlook.com>
Co-authored-by: devin-ai-integration[bot] <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: hello@sideguide.dev <hello@sideguide.dev>
Co-authored-by: Gergő Móricz <mo.geryy@gmail.com>
2025-04-15 00:19:45 -07:00

159 lines
4.4 KiB
TypeScript

import { deduplicateObjectsArray } from "./deduplicate-objs-array";
/**
* Convert "null" strings to actual null values for easier comparison.
*/
function unifyValue(val: any): any {
return val === "null" ? null : val;
}
/**
* Convert all "null" strings in an object to actual null values.
*/
function unifyItemValues<T extends object>(item: T): T {
const unifiedItem: any = {};
for (const key of Object.keys(item)) {
unifiedItem[key] = unifyValue(item[key]);
}
return unifiedItem;
}
/**
* Check if two objects are mergeable by comparing their non-null values
*/
export function areMergeable(obj1: any, obj2: any): boolean {
const allKeys = new Set([...Object.keys(obj1), ...Object.keys(obj2)]);
let matchingNonNullValues = 0;
let nonNullComparisons = 0;
for (const key of allKeys) {
const val1 = obj1[key];
const val2 = obj2[key];
// Skip array comparisons - they'll be merged separately
if (Array.isArray(val1) || Array.isArray(val2)) {
continue;
}
// If both values exist and are not null
if (val1 !== null && val2 !== null) {
nonNullComparisons++;
if (val1 === val2) {
matchingNonNullValues++;
}
}
}
// Objects are mergeable if they have at least one matching non-null value
// and all their non-null values match when both objects have them
return nonNullComparisons > 0 && matchingNonNullValues === nonNullComparisons;
}
/**
* Merge arrays and remove duplicates
*/
function mergeArrays(arr1: any[], arr2: any[]): any[] {
const combined = [...arr1, ...arr2];
return combined.filter((item, index) => {
const stringified = JSON.stringify(item);
return (
combined.findIndex((other) => JSON.stringify(other) === stringified) ===
index
);
});
}
/**
* Merge two objects, taking non-null values over null values
*/
function mergeObjects(obj1: any, obj2: any): any {
const result = { ...obj1 };
for (const key in obj2) {
if (obj2.hasOwnProperty(key)) {
// If obj2's value is non-null, it should override obj1's value
if (obj2[key] !== null) {
if (Array.isArray(obj2[key])) {
// If both are arrays, merge them
if (Array.isArray(result[key])) {
result[key] = mergeArrays(result[key], obj2[key]);
} else {
// If only obj2's value is an array, use it
result[key] = [...obj2[key]];
}
} else if (typeof obj2[key] === "object") {
// If both are objects (but not arrays), merge them
if (typeof result[key] === "object" && !Array.isArray(result[key])) {
result[key] = mergeObjects(result[key], obj2[key]);
} else {
result[key] = { ...obj2[key] };
}
} else {
// For primitive values, obj2's non-null value always wins
result[key] = obj2[key];
}
}
}
}
return result;
}
/**
* Merges arrays of objects by combining those that are identical except for
* null-equivalent fields, filling in null fields with the corresponding
* non-null fields from the other object.
*/
export function mergeNullValObjs(objArray: { [key: string]: any[] }): {
[key: string]: any[];
} {
const result: { [key: string]: any[] } = {};
for (const key in objArray) {
if (Array.isArray(objArray[key])) {
// If array contains only primitive values, return as is
if (
objArray[key].every((item) => typeof item !== "object" || item === null)
) {
result[key] = [...objArray[key]];
continue;
}
const items = objArray[key].map(unifyItemValues);
const mergedItems: any[] = [];
for (const item of items) {
let merged = false;
for (let i = 0; i < mergedItems.length; i++) {
if (areMergeable(mergedItems[i], item)) {
mergedItems[i] = mergeObjects(mergedItems[i], item);
merged = true;
break;
}
}
if (!merged) {
mergedItems.push({ ...item });
}
}
// Final deduplication pass
result[key] = deduplicateObjectsArray({ [key]: mergedItems })[key];
} else {
console.warn(
`Expected an array at objArray[${key}], but found:`,
objArray[key],
);
// create an array if it doesn't exist
if (objArray[key] === undefined) {
objArray[key] = [];
}
return objArray;
}
}
return result;
}