diff --git a/backend/functions/src/cloud-functions/crawler.ts b/backend/functions/src/cloud-functions/crawler.ts index e24ffde..9cc12fc 100644 --- a/backend/functions/src/cloud-functions/crawler.ts +++ b/backend/functions/src/cloud-functions/crawler.ts @@ -49,6 +49,11 @@ export interface FormattedPage { pageshot?: Buffer; links?: { [k: string]: string; }; images?: { [k: string]: string; }; + usage?: { + total_tokens?: number; + totalTokens?: number; + tokens?: number; + }; toString: () => string; } @@ -743,7 +748,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`; } const formatted = await this.formatSnapshot(crawlerOptions.respondWith, scrapped, urlToCrawl); - chargeAmount = this.getChargeAmount(formatted); + chargeAmount = this.assignChargeAmount(formatted); sseStream.write({ event: 'data', data: formatted, @@ -771,7 +776,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`; } const formatted = await this.formatSnapshot(crawlerOptions.respondWith, scrapped, urlToCrawl); - chargeAmount = this.getChargeAmount(formatted); + chargeAmount = this.assignChargeAmount(formatted); if (crawlerOptions.timeout === undefined) { return formatted; @@ -783,7 +788,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`; } const formatted = await this.formatSnapshot(crawlerOptions.respondWith, lastScrapped, urlToCrawl); - chargeAmount = this.getChargeAmount(formatted); + chargeAmount = this.assignChargeAmount(formatted); return formatted; } @@ -795,7 +800,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`; } const formatted = await this.formatSnapshot(crawlerOptions.respondWith, scrapped, urlToCrawl); - chargeAmount = this.getChargeAmount(formatted); + chargeAmount = this.assignChargeAmount(formatted); if (crawlerOptions.timeout === undefined) { if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) { @@ -820,7 +825,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`; } const formatted = await this.formatSnapshot(crawlerOptions.respondWith, lastScrapped, urlToCrawl); - chargeAmount = this.getChargeAmount(formatted); + chargeAmount = this.assignChargeAmount(formatted); if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) { return assignTransferProtocolMeta(`${formatted}`, @@ -1005,25 +1010,31 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`; } } - getChargeAmount(formatted: FormattedPage) { + assignChargeAmount(formatted: FormattedPage) { if (!formatted) { return undefined; } const textContent = formatted?.content || formatted?.description || formatted?.text || formatted?.html; + let amount; + do { + if (typeof textContent === 'string') { + amount = estimateToken(textContent); + break; + } - if (typeof textContent === 'string') { - return estimateToken(textContent); - } + const imageContent = formatted.screenshotUrl || formatted.screenshot; - const imageContent = formatted.screenshotUrl || formatted.screenshot; + if (imageContent) { + // OpenAI image token count for 1024x1024 image + amount = 765; + break; + } + } while (false); - if (imageContent) { - // OpenAI image token count for 1024x1024 image - return 765; - } + Object.assign(formatted, { usage: { tokens: amount } }); - return undefined; + return amount; } diff --git a/backend/functions/src/cloud-functions/searcher.ts b/backend/functions/src/cloud-functions/searcher.ts index 00dd898..11e5afe 100644 --- a/backend/functions/src/cloud-functions/searcher.ts +++ b/backend/functions/src/cloud-functions/searcher.ts @@ -178,7 +178,7 @@ export class SearcherHost extends RPCHost { continue; } - chargeAmount = this.getChargeAmount(scrapped); + chargeAmount = this.assignChargeAmount(scrapped); sseStream.write({ event: 'data', data: scrapped, @@ -211,7 +211,7 @@ export class SearcherHost extends RPCHost { if (!lastScrapped) { return; } - chargeAmount = this.getChargeAmount(lastScrapped); + chargeAmount = this.assignChargeAmount(lastScrapped); rpcReflect.return(lastScrapped); earlyReturn = true; }, ((crawlerOptions.timeout || 0) * 1000) || this.reasonableDelayMs); @@ -228,7 +228,7 @@ export class SearcherHost extends RPCHost { if (earlyReturnTimer) { clearTimeout(earlyReturnTimer); } - chargeAmount = this.getChargeAmount(scrapped); + chargeAmount = this.assignChargeAmount(scrapped); return scrapped; } @@ -242,7 +242,7 @@ export class SearcherHost extends RPCHost { } if (!earlyReturn) { - chargeAmount = this.getChargeAmount(lastScrapped); + chargeAmount = this.assignChargeAmount(lastScrapped); } return lastScrapped; @@ -257,7 +257,7 @@ export class SearcherHost extends RPCHost { if (!lastScrapped) { return; } - chargeAmount = this.getChargeAmount(lastScrapped); + chargeAmount = this.assignChargeAmount(lastScrapped); rpcReflect.return(assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null })); earlyReturn = true; }, ((crawlerOptions.timeout || 0) * 1000) || this.reasonableDelayMs); @@ -278,7 +278,7 @@ export class SearcherHost extends RPCHost { clearTimeout(earlyReturnTimer); } - chargeAmount = this.getChargeAmount(scrapped); + chargeAmount = this.assignChargeAmount(scrapped); return assignTransferProtocolMeta(`${scrapped}`, { contentType: 'text/plain', envelope: null }); } @@ -292,7 +292,7 @@ export class SearcherHost extends RPCHost { } if (!earlyReturn) { - chargeAmount = this.getChargeAmount(lastScrapped); + chargeAmount = this.assignChargeAmount(lastScrapped); } return assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null }); @@ -423,9 +423,9 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n')}\n` : ''}`; return resultArray; } - getChargeAmount(formatted: FormattedPage[]) { + assignChargeAmount(formatted: FormattedPage[]) { return _.sum( - formatted.map((x) => this.crawler.getChargeAmount(x) || 0) + formatted.map((x) => this.crawler.assignChargeAmount(x) || 0) ); }