mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-19 19:09:08 +08:00
async saving to index
This commit is contained in:
parent
d1b5e2ef47
commit
60525220a2
@ -20,56 +20,64 @@ export async function sendDocumentToIndex(meta: Meta, document: Document) {
|
|||||||
return document;
|
return document;
|
||||||
}
|
}
|
||||||
|
|
||||||
const normalizedURL = normalizeURLForIndex(meta.url);
|
(async () => {
|
||||||
const urlHash = await hashURL(normalizedURL);
|
try {
|
||||||
|
const normalizedURL = normalizeURLForIndex(meta.url);
|
||||||
|
const urlHash = await hashURL(normalizedURL);
|
||||||
|
|
||||||
const urlSplits = generateURLSplits(normalizedURL);
|
const urlSplits = generateURLSplits(normalizedURL);
|
||||||
const urlSplitsHash = await Promise.all(urlSplits.map(split => hashURL(split)));
|
const urlSplitsHash = await Promise.all(urlSplits.map(split => hashURL(split)));
|
||||||
|
|
||||||
const indexId = crypto.randomUUID();
|
const indexId = crypto.randomUUID();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await saveIndexToGCS(indexId, {
|
await saveIndexToGCS(indexId, {
|
||||||
url: normalizedURL,
|
url: normalizedURL,
|
||||||
html: document.rawHtml!,
|
html: document.rawHtml!,
|
||||||
statusCode: document.metadata.statusCode,
|
statusCode: document.metadata.statusCode,
|
||||||
error: document.metadata.error,
|
error: document.metadata.error,
|
||||||
screenshot: document.screenshot,
|
screenshot: document.screenshot,
|
||||||
numPages: document.metadata.numPages,
|
numPages: document.metadata.numPages,
|
||||||
});
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
meta.logger.error("Failed to save document to index", {
|
meta.logger.error("Failed to save document to index", {
|
||||||
error,
|
error,
|
||||||
});
|
});
|
||||||
return document;
|
return document;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await addIndexInsertJob({
|
await addIndexInsertJob({
|
||||||
id: indexId,
|
id: indexId,
|
||||||
url: normalizedURL,
|
url: normalizedURL,
|
||||||
url_hash: urlHash,
|
url_hash: urlHash,
|
||||||
url_splits: urlSplits,
|
url_splits: urlSplits,
|
||||||
url_splits_hash: urlSplitsHash,
|
url_splits_hash: urlSplitsHash,
|
||||||
original_url: document.metadata.sourceURL ?? meta.url,
|
original_url: document.metadata.sourceURL ?? meta.url,
|
||||||
resolved_url: document.metadata.url ?? document.metadata.sourceURL ?? meta.url,
|
resolved_url: document.metadata.url ?? document.metadata.sourceURL ?? meta.url,
|
||||||
has_screenshot: document.screenshot !== undefined && meta.featureFlags.has("screenshot"),
|
has_screenshot: document.screenshot !== undefined && meta.featureFlags.has("screenshot"),
|
||||||
has_screenshot_fullscreen: document.screenshot !== undefined && meta.featureFlags.has("screenshot@fullScreen"),
|
has_screenshot_fullscreen: document.screenshot !== undefined && meta.featureFlags.has("screenshot@fullScreen"),
|
||||||
is_mobile: meta.options.mobile,
|
is_mobile: meta.options.mobile,
|
||||||
block_ads: meta.options.blockAds,
|
block_ads: meta.options.blockAds,
|
||||||
location_country: meta.options.location?.country ?? null,
|
location_country: meta.options.location?.country ?? null,
|
||||||
location_languages: meta.options.location?.languages ?? null,
|
location_languages: meta.options.location?.languages ?? null,
|
||||||
status: document.metadata.statusCode,
|
status: document.metadata.statusCode,
|
||||||
...(urlSplitsHash.slice(0, 10).reduce((a,x,i) => ({
|
...(urlSplitsHash.slice(0, 10).reduce((a,x,i) => ({
|
||||||
...a,
|
...a,
|
||||||
[`url_split_${i}_hash`]: x,
|
[`url_split_${i}_hash`]: x,
|
||||||
}), {})),
|
}), {})),
|
||||||
});
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
meta.logger.error("Failed to add document to index insert queue", {
|
meta.logger.error("Failed to add document to index insert queue", {
|
||||||
error,
|
error,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
} catch (error) {
|
||||||
|
meta.logger.error("Failed to save document to index (outer)", {
|
||||||
|
error,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
})();
|
||||||
|
|
||||||
return document;
|
return document;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user