feat: return usage tokens in json

This commit is contained in:
Yanlong Wang 2024-08-16 20:32:38 +08:00
parent c7860e615c
commit fb5bd58ee4
No known key found for this signature in database
GPG Key ID: C0A623C0BADF9F37
2 changed files with 35 additions and 24 deletions

View File

@ -49,6 +49,11 @@ export interface FormattedPage {
pageshot?: Buffer; pageshot?: Buffer;
links?: { [k: string]: string; }; links?: { [k: string]: string; };
images?: { [k: string]: string; }; images?: { [k: string]: string; };
usage?: {
total_tokens?: number;
totalTokens?: number;
tokens?: number;
};
toString: () => string; toString: () => string;
} }
@ -743,7 +748,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
} }
const formatted = await this.formatSnapshot(crawlerOptions.respondWith, scrapped, urlToCrawl); const formatted = await this.formatSnapshot(crawlerOptions.respondWith, scrapped, urlToCrawl);
chargeAmount = this.getChargeAmount(formatted); chargeAmount = this.assignChargeAmount(formatted);
sseStream.write({ sseStream.write({
event: 'data', event: 'data',
data: formatted, data: formatted,
@ -771,7 +776,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
} }
const formatted = await this.formatSnapshot(crawlerOptions.respondWith, scrapped, urlToCrawl); const formatted = await this.formatSnapshot(crawlerOptions.respondWith, scrapped, urlToCrawl);
chargeAmount = this.getChargeAmount(formatted); chargeAmount = this.assignChargeAmount(formatted);
if (crawlerOptions.timeout === undefined) { if (crawlerOptions.timeout === undefined) {
return formatted; return formatted;
@ -783,7 +788,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
} }
const formatted = await this.formatSnapshot(crawlerOptions.respondWith, lastScrapped, urlToCrawl); const formatted = await this.formatSnapshot(crawlerOptions.respondWith, lastScrapped, urlToCrawl);
chargeAmount = this.getChargeAmount(formatted); chargeAmount = this.assignChargeAmount(formatted);
return formatted; return formatted;
} }
@ -795,7 +800,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
} }
const formatted = await this.formatSnapshot(crawlerOptions.respondWith, scrapped, urlToCrawl); const formatted = await this.formatSnapshot(crawlerOptions.respondWith, scrapped, urlToCrawl);
chargeAmount = this.getChargeAmount(formatted); chargeAmount = this.assignChargeAmount(formatted);
if (crawlerOptions.timeout === undefined) { if (crawlerOptions.timeout === undefined) {
if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) { if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) {
@ -820,7 +825,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
} }
const formatted = await this.formatSnapshot(crawlerOptions.respondWith, lastScrapped, urlToCrawl); const formatted = await this.formatSnapshot(crawlerOptions.respondWith, lastScrapped, urlToCrawl);
chargeAmount = this.getChargeAmount(formatted); chargeAmount = this.assignChargeAmount(formatted);
if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) { if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) {
return assignTransferProtocolMeta(`${formatted}`, return assignTransferProtocolMeta(`${formatted}`,
@ -1005,25 +1010,31 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
} }
} }
getChargeAmount(formatted: FormattedPage) { assignChargeAmount(formatted: FormattedPage) {
if (!formatted) { if (!formatted) {
return undefined; return undefined;
} }
const textContent = formatted?.content || formatted?.description || formatted?.text || formatted?.html; const textContent = formatted?.content || formatted?.description || formatted?.text || formatted?.html;
let amount;
do {
if (typeof textContent === 'string') { if (typeof textContent === 'string') {
return estimateToken(textContent); amount = estimateToken(textContent);
break;
} }
const imageContent = formatted.screenshotUrl || formatted.screenshot; const imageContent = formatted.screenshotUrl || formatted.screenshot;
if (imageContent) { if (imageContent) {
// OpenAI image token count for 1024x1024 image // OpenAI image token count for 1024x1024 image
return 765; amount = 765;
break;
} }
} while (false);
return undefined; Object.assign(formatted, { usage: { tokens: amount } });
return amount;
} }

View File

@ -178,7 +178,7 @@ export class SearcherHost extends RPCHost {
continue; continue;
} }
chargeAmount = this.getChargeAmount(scrapped); chargeAmount = this.assignChargeAmount(scrapped);
sseStream.write({ sseStream.write({
event: 'data', event: 'data',
data: scrapped, data: scrapped,
@ -211,7 +211,7 @@ export class SearcherHost extends RPCHost {
if (!lastScrapped) { if (!lastScrapped) {
return; return;
} }
chargeAmount = this.getChargeAmount(lastScrapped); chargeAmount = this.assignChargeAmount(lastScrapped);
rpcReflect.return(lastScrapped); rpcReflect.return(lastScrapped);
earlyReturn = true; earlyReturn = true;
}, ((crawlerOptions.timeout || 0) * 1000) || this.reasonableDelayMs); }, ((crawlerOptions.timeout || 0) * 1000) || this.reasonableDelayMs);
@ -228,7 +228,7 @@ export class SearcherHost extends RPCHost {
if (earlyReturnTimer) { if (earlyReturnTimer) {
clearTimeout(earlyReturnTimer); clearTimeout(earlyReturnTimer);
} }
chargeAmount = this.getChargeAmount(scrapped); chargeAmount = this.assignChargeAmount(scrapped);
return scrapped; return scrapped;
} }
@ -242,7 +242,7 @@ export class SearcherHost extends RPCHost {
} }
if (!earlyReturn) { if (!earlyReturn) {
chargeAmount = this.getChargeAmount(lastScrapped); chargeAmount = this.assignChargeAmount(lastScrapped);
} }
return lastScrapped; return lastScrapped;
@ -257,7 +257,7 @@ export class SearcherHost extends RPCHost {
if (!lastScrapped) { if (!lastScrapped) {
return; return;
} }
chargeAmount = this.getChargeAmount(lastScrapped); chargeAmount = this.assignChargeAmount(lastScrapped);
rpcReflect.return(assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null })); rpcReflect.return(assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null }));
earlyReturn = true; earlyReturn = true;
}, ((crawlerOptions.timeout || 0) * 1000) || this.reasonableDelayMs); }, ((crawlerOptions.timeout || 0) * 1000) || this.reasonableDelayMs);
@ -278,7 +278,7 @@ export class SearcherHost extends RPCHost {
clearTimeout(earlyReturnTimer); clearTimeout(earlyReturnTimer);
} }
chargeAmount = this.getChargeAmount(scrapped); chargeAmount = this.assignChargeAmount(scrapped);
return assignTransferProtocolMeta(`${scrapped}`, { contentType: 'text/plain', envelope: null }); return assignTransferProtocolMeta(`${scrapped}`, { contentType: 'text/plain', envelope: null });
} }
@ -292,7 +292,7 @@ export class SearcherHost extends RPCHost {
} }
if (!earlyReturn) { if (!earlyReturn) {
chargeAmount = this.getChargeAmount(lastScrapped); chargeAmount = this.assignChargeAmount(lastScrapped);
} }
return assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null }); return assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null });
@ -423,9 +423,9 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n')}\n` : ''}`;
return resultArray; return resultArray;
} }
getChargeAmount(formatted: FormattedPage[]) { assignChargeAmount(formatted: FormattedPage[]) {
return _.sum( return _.sum(
formatted.map((x) => this.crawler.getChargeAmount(x) || 0) formatted.map((x) => this.crawler.assignChargeAmount(x) || 0)
); );
} }