mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-16 03:35:54 +08:00
feat: return usage tokens in json
This commit is contained in:
parent
c7860e615c
commit
fb5bd58ee4
@ -49,6 +49,11 @@ export interface FormattedPage {
|
||||
pageshot?: Buffer;
|
||||
links?: { [k: string]: string; };
|
||||
images?: { [k: string]: string; };
|
||||
usage?: {
|
||||
total_tokens?: number;
|
||||
totalTokens?: number;
|
||||
tokens?: number;
|
||||
};
|
||||
|
||||
toString: () => string;
|
||||
}
|
||||
@ -743,7 +748,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
||||
}
|
||||
|
||||
const formatted = await this.formatSnapshot(crawlerOptions.respondWith, scrapped, urlToCrawl);
|
||||
chargeAmount = this.getChargeAmount(formatted);
|
||||
chargeAmount = this.assignChargeAmount(formatted);
|
||||
sseStream.write({
|
||||
event: 'data',
|
||||
data: formatted,
|
||||
@ -771,7 +776,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
||||
}
|
||||
|
||||
const formatted = await this.formatSnapshot(crawlerOptions.respondWith, scrapped, urlToCrawl);
|
||||
chargeAmount = this.getChargeAmount(formatted);
|
||||
chargeAmount = this.assignChargeAmount(formatted);
|
||||
|
||||
if (crawlerOptions.timeout === undefined) {
|
||||
return formatted;
|
||||
@ -783,7 +788,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
||||
}
|
||||
|
||||
const formatted = await this.formatSnapshot(crawlerOptions.respondWith, lastScrapped, urlToCrawl);
|
||||
chargeAmount = this.getChargeAmount(formatted);
|
||||
chargeAmount = this.assignChargeAmount(formatted);
|
||||
|
||||
return formatted;
|
||||
}
|
||||
@ -795,7 +800,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
||||
}
|
||||
|
||||
const formatted = await this.formatSnapshot(crawlerOptions.respondWith, scrapped, urlToCrawl);
|
||||
chargeAmount = this.getChargeAmount(formatted);
|
||||
chargeAmount = this.assignChargeAmount(formatted);
|
||||
|
||||
if (crawlerOptions.timeout === undefined) {
|
||||
if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) {
|
||||
@ -820,7 +825,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
||||
}
|
||||
|
||||
const formatted = await this.formatSnapshot(crawlerOptions.respondWith, lastScrapped, urlToCrawl);
|
||||
chargeAmount = this.getChargeAmount(formatted);
|
||||
chargeAmount = this.assignChargeAmount(formatted);
|
||||
if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) {
|
||||
|
||||
return assignTransferProtocolMeta(`${formatted}`,
|
||||
@ -1005,25 +1010,31 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
||||
}
|
||||
}
|
||||
|
||||
getChargeAmount(formatted: FormattedPage) {
|
||||
assignChargeAmount(formatted: FormattedPage) {
|
||||
if (!formatted) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const textContent = formatted?.content || formatted?.description || formatted?.text || formatted?.html;
|
||||
|
||||
let amount;
|
||||
do {
|
||||
if (typeof textContent === 'string') {
|
||||
return estimateToken(textContent);
|
||||
amount = estimateToken(textContent);
|
||||
break;
|
||||
}
|
||||
|
||||
const imageContent = formatted.screenshotUrl || formatted.screenshot;
|
||||
|
||||
if (imageContent) {
|
||||
// OpenAI image token count for 1024x1024 image
|
||||
return 765;
|
||||
amount = 765;
|
||||
break;
|
||||
}
|
||||
} while (false);
|
||||
|
||||
return undefined;
|
||||
Object.assign(formatted, { usage: { tokens: amount } });
|
||||
|
||||
return amount;
|
||||
}
|
||||
|
||||
|
||||
|
@ -178,7 +178,7 @@ export class SearcherHost extends RPCHost {
|
||||
continue;
|
||||
}
|
||||
|
||||
chargeAmount = this.getChargeAmount(scrapped);
|
||||
chargeAmount = this.assignChargeAmount(scrapped);
|
||||
sseStream.write({
|
||||
event: 'data',
|
||||
data: scrapped,
|
||||
@ -211,7 +211,7 @@ export class SearcherHost extends RPCHost {
|
||||
if (!lastScrapped) {
|
||||
return;
|
||||
}
|
||||
chargeAmount = this.getChargeAmount(lastScrapped);
|
||||
chargeAmount = this.assignChargeAmount(lastScrapped);
|
||||
rpcReflect.return(lastScrapped);
|
||||
earlyReturn = true;
|
||||
}, ((crawlerOptions.timeout || 0) * 1000) || this.reasonableDelayMs);
|
||||
@ -228,7 +228,7 @@ export class SearcherHost extends RPCHost {
|
||||
if (earlyReturnTimer) {
|
||||
clearTimeout(earlyReturnTimer);
|
||||
}
|
||||
chargeAmount = this.getChargeAmount(scrapped);
|
||||
chargeAmount = this.assignChargeAmount(scrapped);
|
||||
|
||||
return scrapped;
|
||||
}
|
||||
@ -242,7 +242,7 @@ export class SearcherHost extends RPCHost {
|
||||
}
|
||||
|
||||
if (!earlyReturn) {
|
||||
chargeAmount = this.getChargeAmount(lastScrapped);
|
||||
chargeAmount = this.assignChargeAmount(lastScrapped);
|
||||
}
|
||||
|
||||
return lastScrapped;
|
||||
@ -257,7 +257,7 @@ export class SearcherHost extends RPCHost {
|
||||
if (!lastScrapped) {
|
||||
return;
|
||||
}
|
||||
chargeAmount = this.getChargeAmount(lastScrapped);
|
||||
chargeAmount = this.assignChargeAmount(lastScrapped);
|
||||
rpcReflect.return(assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null }));
|
||||
earlyReturn = true;
|
||||
}, ((crawlerOptions.timeout || 0) * 1000) || this.reasonableDelayMs);
|
||||
@ -278,7 +278,7 @@ export class SearcherHost extends RPCHost {
|
||||
clearTimeout(earlyReturnTimer);
|
||||
}
|
||||
|
||||
chargeAmount = this.getChargeAmount(scrapped);
|
||||
chargeAmount = this.assignChargeAmount(scrapped);
|
||||
|
||||
return assignTransferProtocolMeta(`${scrapped}`, { contentType: 'text/plain', envelope: null });
|
||||
}
|
||||
@ -292,7 +292,7 @@ export class SearcherHost extends RPCHost {
|
||||
}
|
||||
|
||||
if (!earlyReturn) {
|
||||
chargeAmount = this.getChargeAmount(lastScrapped);
|
||||
chargeAmount = this.assignChargeAmount(lastScrapped);
|
||||
}
|
||||
|
||||
return assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null });
|
||||
@ -423,9 +423,9 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n')}\n` : ''}`;
|
||||
return resultArray;
|
||||
}
|
||||
|
||||
getChargeAmount(formatted: FormattedPage[]) {
|
||||
assignChargeAmount(formatted: FormattedPage[]) {
|
||||
return _.sum(
|
||||
formatted.map((x) => this.crawler.getChargeAmount(x) || 0)
|
||||
formatted.map((x) => this.crawler.assignChargeAmount(x) || 0)
|
||||
);
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user