feat: return usage tokens in json

This commit is contained in:
Yanlong Wang 2024-08-16 20:32:38 +08:00
parent c7860e615c
commit fb5bd58ee4
No known key found for this signature in database
GPG Key ID: C0A623C0BADF9F37
2 changed files with 35 additions and 24 deletions

View File

@ -49,6 +49,11 @@ export interface FormattedPage {
pageshot?: Buffer;
links?: { [k: string]: string; };
images?: { [k: string]: string; };
usage?: {
total_tokens?: number;
totalTokens?: number;
tokens?: number;
};
toString: () => string;
}
@ -743,7 +748,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
}
const formatted = await this.formatSnapshot(crawlerOptions.respondWith, scrapped, urlToCrawl);
chargeAmount = this.getChargeAmount(formatted);
chargeAmount = this.assignChargeAmount(formatted);
sseStream.write({
event: 'data',
data: formatted,
@ -771,7 +776,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
}
const formatted = await this.formatSnapshot(crawlerOptions.respondWith, scrapped, urlToCrawl);
chargeAmount = this.getChargeAmount(formatted);
chargeAmount = this.assignChargeAmount(formatted);
if (crawlerOptions.timeout === undefined) {
return formatted;
@ -783,7 +788,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
}
const formatted = await this.formatSnapshot(crawlerOptions.respondWith, lastScrapped, urlToCrawl);
chargeAmount = this.getChargeAmount(formatted);
chargeAmount = this.assignChargeAmount(formatted);
return formatted;
}
@ -795,7 +800,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
}
const formatted = await this.formatSnapshot(crawlerOptions.respondWith, scrapped, urlToCrawl);
chargeAmount = this.getChargeAmount(formatted);
chargeAmount = this.assignChargeAmount(formatted);
if (crawlerOptions.timeout === undefined) {
if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) {
@ -820,7 +825,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
}
const formatted = await this.formatSnapshot(crawlerOptions.respondWith, lastScrapped, urlToCrawl);
chargeAmount = this.getChargeAmount(formatted);
chargeAmount = this.assignChargeAmount(formatted);
if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) {
return assignTransferProtocolMeta(`${formatted}`,
@ -1005,25 +1010,31 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
}
}
getChargeAmount(formatted: FormattedPage) {
assignChargeAmount(formatted: FormattedPage) {
if (!formatted) {
return undefined;
}
const textContent = formatted?.content || formatted?.description || formatted?.text || formatted?.html;
let amount;
do {
if (typeof textContent === 'string') {
return estimateToken(textContent);
amount = estimateToken(textContent);
break;
}
const imageContent = formatted.screenshotUrl || formatted.screenshot;
if (imageContent) {
// OpenAI image token count for 1024x1024 image
return 765;
amount = 765;
break;
}
} while (false);
return undefined;
Object.assign(formatted, { usage: { tokens: amount } });
return amount;
}

View File

@ -178,7 +178,7 @@ export class SearcherHost extends RPCHost {
continue;
}
chargeAmount = this.getChargeAmount(scrapped);
chargeAmount = this.assignChargeAmount(scrapped);
sseStream.write({
event: 'data',
data: scrapped,
@ -211,7 +211,7 @@ export class SearcherHost extends RPCHost {
if (!lastScrapped) {
return;
}
chargeAmount = this.getChargeAmount(lastScrapped);
chargeAmount = this.assignChargeAmount(lastScrapped);
rpcReflect.return(lastScrapped);
earlyReturn = true;
}, ((crawlerOptions.timeout || 0) * 1000) || this.reasonableDelayMs);
@ -228,7 +228,7 @@ export class SearcherHost extends RPCHost {
if (earlyReturnTimer) {
clearTimeout(earlyReturnTimer);
}
chargeAmount = this.getChargeAmount(scrapped);
chargeAmount = this.assignChargeAmount(scrapped);
return scrapped;
}
@ -242,7 +242,7 @@ export class SearcherHost extends RPCHost {
}
if (!earlyReturn) {
chargeAmount = this.getChargeAmount(lastScrapped);
chargeAmount = this.assignChargeAmount(lastScrapped);
}
return lastScrapped;
@ -257,7 +257,7 @@ export class SearcherHost extends RPCHost {
if (!lastScrapped) {
return;
}
chargeAmount = this.getChargeAmount(lastScrapped);
chargeAmount = this.assignChargeAmount(lastScrapped);
rpcReflect.return(assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null }));
earlyReturn = true;
}, ((crawlerOptions.timeout || 0) * 1000) || this.reasonableDelayMs);
@ -278,7 +278,7 @@ export class SearcherHost extends RPCHost {
clearTimeout(earlyReturnTimer);
}
chargeAmount = this.getChargeAmount(scrapped);
chargeAmount = this.assignChargeAmount(scrapped);
return assignTransferProtocolMeta(`${scrapped}`, { contentType: 'text/plain', envelope: null });
}
@ -292,7 +292,7 @@ export class SearcherHost extends RPCHost {
}
if (!earlyReturn) {
chargeAmount = this.getChargeAmount(lastScrapped);
chargeAmount = this.assignChargeAmount(lastScrapped);
}
return assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null });
@ -423,9 +423,9 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n')}\n` : ''}`;
return resultArray;
}
getChargeAmount(formatted: FormattedPage[]) {
assignChargeAmount(formatted: FormattedPage[]) {
return _.sum(
formatted.map((x) => this.crawler.getChargeAmount(x) || 0)
formatted.map((x) => this.crawler.assignChargeAmount(x) || 0)
);
}