mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-02 22:50:37 +08:00

* wip * integrating smart-scrape * integrate smartscrape into llmExtract * wip * smart scrape multiple links * fixes * fix * wip * it worked! * wip. there's a bug on the batchExtract TypeError: Converting circular structure to JSON * wip * retry model * retry models * feat/scrape+json+extract interfaces ready * vertex -> googleapi * fix/transformArrayToObject. required params on schema is still a bug * change model * o3-mini -> gemini * Update extractSmartScrape.ts * sessionId * sessionId * Nick: f-0 start * Update extraction-service-f0.ts * Update types.ts * Nick: * Update queue-worker.ts * Nick: new interface * rename analyzeSchemaAndPrompt -> F0 * refactor: rename agent ID to model in types and extract logic * agent * id->model * id->model * refactor: standardize agent model handling and validation across extraction logic * livecast agent * (feat/f1) sdks (#1459) * feat: add FIRE-1 agent support to Python and JavaScript SDKs Co-Authored-By: hello@sideguide.dev <hello@sideguide.dev> * feat: add FIRE-1 agent support to scrape methods in both SDKs Co-Authored-By: hello@sideguide.dev <hello@sideguide.dev> * feat: add prompt and sessionId to AgentOptions interface Co-Authored-By: hello@sideguide.dev <hello@sideguide.dev> * Update index.ts --------- Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: hello@sideguide.dev <hello@sideguide.dev> Co-authored-by: Nicolas <nicolascamara29@gmail.com> * feat(v1): rate limits * Update types.ts * Update llmExtract.ts * add cost tracking * remove * Update requests.http * fix smart scrape cost calc * log sm cost * fix counts * fix * expose cost tracking * models fix * temp: skipLibcheck * get rid of it * fix ts * dont skip lib check * Update extractSmartScrape.ts * Update queue-worker.ts * Update smartScrape.ts * Update requests.http * fix(rate-limiter): * types: fire-1 refine * bill 150 * fix credits used on crawl * ban from crawl * route cost limit warning * Update generic-ai.ts * genres * Update llmExtract.ts * test server diff * cletu --------- Co-authored-by: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com> Co-authored-by: Thomas Kosmas <thomas510111@gmail.com> Co-authored-by: Ademílson F. Tonato <ademilsonft@outlook.com> Co-authored-by: devin-ai-integration[bot] <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: hello@sideguide.dev <hello@sideguide.dev> Co-authored-by: Gergő Móricz <mo.geryy@gmail.com>
159 lines
4.4 KiB
TypeScript
159 lines
4.4 KiB
TypeScript
import { deduplicateObjectsArray } from "./deduplicate-objs-array";
|
|
|
|
/**
|
|
* Convert "null" strings to actual null values for easier comparison.
|
|
*/
|
|
function unifyValue(val: any): any {
|
|
return val === "null" ? null : val;
|
|
}
|
|
|
|
/**
|
|
* Convert all "null" strings in an object to actual null values.
|
|
*/
|
|
function unifyItemValues<T extends object>(item: T): T {
|
|
const unifiedItem: any = {};
|
|
for (const key of Object.keys(item)) {
|
|
unifiedItem[key] = unifyValue(item[key]);
|
|
}
|
|
return unifiedItem;
|
|
}
|
|
|
|
/**
|
|
* Check if two objects are mergeable by comparing their non-null values
|
|
*/
|
|
export function areMergeable(obj1: any, obj2: any): boolean {
|
|
const allKeys = new Set([...Object.keys(obj1), ...Object.keys(obj2)]);
|
|
let matchingNonNullValues = 0;
|
|
let nonNullComparisons = 0;
|
|
|
|
for (const key of allKeys) {
|
|
const val1 = obj1[key];
|
|
const val2 = obj2[key];
|
|
|
|
// Skip array comparisons - they'll be merged separately
|
|
if (Array.isArray(val1) || Array.isArray(val2)) {
|
|
continue;
|
|
}
|
|
|
|
// If both values exist and are not null
|
|
if (val1 !== null && val2 !== null) {
|
|
nonNullComparisons++;
|
|
if (val1 === val2) {
|
|
matchingNonNullValues++;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Objects are mergeable if they have at least one matching non-null value
|
|
// and all their non-null values match when both objects have them
|
|
return nonNullComparisons > 0 && matchingNonNullValues === nonNullComparisons;
|
|
}
|
|
|
|
/**
|
|
* Merge arrays and remove duplicates
|
|
*/
|
|
function mergeArrays(arr1: any[], arr2: any[]): any[] {
|
|
const combined = [...arr1, ...arr2];
|
|
return combined.filter((item, index) => {
|
|
const stringified = JSON.stringify(item);
|
|
return (
|
|
combined.findIndex((other) => JSON.stringify(other) === stringified) ===
|
|
index
|
|
);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Merge two objects, taking non-null values over null values
|
|
*/
|
|
function mergeObjects(obj1: any, obj2: any): any {
|
|
const result = { ...obj1 };
|
|
|
|
for (const key in obj2) {
|
|
if (obj2.hasOwnProperty(key)) {
|
|
// If obj2's value is non-null, it should override obj1's value
|
|
if (obj2[key] !== null) {
|
|
if (Array.isArray(obj2[key])) {
|
|
// If both are arrays, merge them
|
|
if (Array.isArray(result[key])) {
|
|
result[key] = mergeArrays(result[key], obj2[key]);
|
|
} else {
|
|
// If only obj2's value is an array, use it
|
|
result[key] = [...obj2[key]];
|
|
}
|
|
} else if (typeof obj2[key] === "object") {
|
|
// If both are objects (but not arrays), merge them
|
|
if (typeof result[key] === "object" && !Array.isArray(result[key])) {
|
|
result[key] = mergeObjects(result[key], obj2[key]);
|
|
} else {
|
|
result[key] = { ...obj2[key] };
|
|
}
|
|
} else {
|
|
// For primitive values, obj2's non-null value always wins
|
|
result[key] = obj2[key];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Merges arrays of objects by combining those that are identical except for
|
|
* null-equivalent fields, filling in null fields with the corresponding
|
|
* non-null fields from the other object.
|
|
*/
|
|
export function mergeNullValObjs(objArray: { [key: string]: any[] }): {
|
|
[key: string]: any[];
|
|
} {
|
|
const result: { [key: string]: any[] } = {};
|
|
|
|
for (const key in objArray) {
|
|
if (Array.isArray(objArray[key])) {
|
|
// If array contains only primitive values, return as is
|
|
if (
|
|
objArray[key].every((item) => typeof item !== "object" || item === null)
|
|
) {
|
|
result[key] = [...objArray[key]];
|
|
continue;
|
|
}
|
|
|
|
const items = objArray[key].map(unifyItemValues);
|
|
const mergedItems: any[] = [];
|
|
|
|
for (const item of items) {
|
|
let merged = false;
|
|
|
|
for (let i = 0; i < mergedItems.length; i++) {
|
|
if (areMergeable(mergedItems[i], item)) {
|
|
mergedItems[i] = mergeObjects(mergedItems[i], item);
|
|
merged = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!merged) {
|
|
mergedItems.push({ ...item });
|
|
}
|
|
}
|
|
|
|
// Final deduplication pass
|
|
result[key] = deduplicateObjectsArray({ [key]: mergedItems })[key];
|
|
} else {
|
|
console.warn(
|
|
`Expected an array at objArray[${key}], but found:`,
|
|
objArray[key],
|
|
);
|
|
|
|
// create an array if it doesn't exist
|
|
if (objArray[key] === undefined) {
|
|
objArray[key] = [];
|
|
}
|
|
return objArray;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|