mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-16 14:16:00 +08:00
feat: return usage tokens in json
This commit is contained in:
parent
c7860e615c
commit
fb5bd58ee4
@ -49,6 +49,11 @@ export interface FormattedPage {
|
|||||||
pageshot?: Buffer;
|
pageshot?: Buffer;
|
||||||
links?: { [k: string]: string; };
|
links?: { [k: string]: string; };
|
||||||
images?: { [k: string]: string; };
|
images?: { [k: string]: string; };
|
||||||
|
usage?: {
|
||||||
|
total_tokens?: number;
|
||||||
|
totalTokens?: number;
|
||||||
|
tokens?: number;
|
||||||
|
};
|
||||||
|
|
||||||
toString: () => string;
|
toString: () => string;
|
||||||
}
|
}
|
||||||
@ -743,7 +748,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
|||||||
}
|
}
|
||||||
|
|
||||||
const formatted = await this.formatSnapshot(crawlerOptions.respondWith, scrapped, urlToCrawl);
|
const formatted = await this.formatSnapshot(crawlerOptions.respondWith, scrapped, urlToCrawl);
|
||||||
chargeAmount = this.getChargeAmount(formatted);
|
chargeAmount = this.assignChargeAmount(formatted);
|
||||||
sseStream.write({
|
sseStream.write({
|
||||||
event: 'data',
|
event: 'data',
|
||||||
data: formatted,
|
data: formatted,
|
||||||
@ -771,7 +776,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
|||||||
}
|
}
|
||||||
|
|
||||||
const formatted = await this.formatSnapshot(crawlerOptions.respondWith, scrapped, urlToCrawl);
|
const formatted = await this.formatSnapshot(crawlerOptions.respondWith, scrapped, urlToCrawl);
|
||||||
chargeAmount = this.getChargeAmount(formatted);
|
chargeAmount = this.assignChargeAmount(formatted);
|
||||||
|
|
||||||
if (crawlerOptions.timeout === undefined) {
|
if (crawlerOptions.timeout === undefined) {
|
||||||
return formatted;
|
return formatted;
|
||||||
@ -783,7 +788,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
|||||||
}
|
}
|
||||||
|
|
||||||
const formatted = await this.formatSnapshot(crawlerOptions.respondWith, lastScrapped, urlToCrawl);
|
const formatted = await this.formatSnapshot(crawlerOptions.respondWith, lastScrapped, urlToCrawl);
|
||||||
chargeAmount = this.getChargeAmount(formatted);
|
chargeAmount = this.assignChargeAmount(formatted);
|
||||||
|
|
||||||
return formatted;
|
return formatted;
|
||||||
}
|
}
|
||||||
@ -795,7 +800,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
|||||||
}
|
}
|
||||||
|
|
||||||
const formatted = await this.formatSnapshot(crawlerOptions.respondWith, scrapped, urlToCrawl);
|
const formatted = await this.formatSnapshot(crawlerOptions.respondWith, scrapped, urlToCrawl);
|
||||||
chargeAmount = this.getChargeAmount(formatted);
|
chargeAmount = this.assignChargeAmount(formatted);
|
||||||
|
|
||||||
if (crawlerOptions.timeout === undefined) {
|
if (crawlerOptions.timeout === undefined) {
|
||||||
if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) {
|
if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) {
|
||||||
@ -820,7 +825,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
|||||||
}
|
}
|
||||||
|
|
||||||
const formatted = await this.formatSnapshot(crawlerOptions.respondWith, lastScrapped, urlToCrawl);
|
const formatted = await this.formatSnapshot(crawlerOptions.respondWith, lastScrapped, urlToCrawl);
|
||||||
chargeAmount = this.getChargeAmount(formatted);
|
chargeAmount = this.assignChargeAmount(formatted);
|
||||||
if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) {
|
if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) {
|
||||||
|
|
||||||
return assignTransferProtocolMeta(`${formatted}`,
|
return assignTransferProtocolMeta(`${formatted}`,
|
||||||
@ -1005,25 +1010,31 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
getChargeAmount(formatted: FormattedPage) {
|
assignChargeAmount(formatted: FormattedPage) {
|
||||||
if (!formatted) {
|
if (!formatted) {
|
||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
const textContent = formatted?.content || formatted?.description || formatted?.text || formatted?.html;
|
const textContent = formatted?.content || formatted?.description || formatted?.text || formatted?.html;
|
||||||
|
let amount;
|
||||||
|
do {
|
||||||
|
if (typeof textContent === 'string') {
|
||||||
|
amount = estimateToken(textContent);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (typeof textContent === 'string') {
|
const imageContent = formatted.screenshotUrl || formatted.screenshot;
|
||||||
return estimateToken(textContent);
|
|
||||||
}
|
|
||||||
|
|
||||||
const imageContent = formatted.screenshotUrl || formatted.screenshot;
|
if (imageContent) {
|
||||||
|
// OpenAI image token count for 1024x1024 image
|
||||||
|
amount = 765;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} while (false);
|
||||||
|
|
||||||
if (imageContent) {
|
Object.assign(formatted, { usage: { tokens: amount } });
|
||||||
// OpenAI image token count for 1024x1024 image
|
|
||||||
return 765;
|
|
||||||
}
|
|
||||||
|
|
||||||
return undefined;
|
return amount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -178,7 +178,7 @@ export class SearcherHost extends RPCHost {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
chargeAmount = this.getChargeAmount(scrapped);
|
chargeAmount = this.assignChargeAmount(scrapped);
|
||||||
sseStream.write({
|
sseStream.write({
|
||||||
event: 'data',
|
event: 'data',
|
||||||
data: scrapped,
|
data: scrapped,
|
||||||
@ -211,7 +211,7 @@ export class SearcherHost extends RPCHost {
|
|||||||
if (!lastScrapped) {
|
if (!lastScrapped) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
chargeAmount = this.getChargeAmount(lastScrapped);
|
chargeAmount = this.assignChargeAmount(lastScrapped);
|
||||||
rpcReflect.return(lastScrapped);
|
rpcReflect.return(lastScrapped);
|
||||||
earlyReturn = true;
|
earlyReturn = true;
|
||||||
}, ((crawlerOptions.timeout || 0) * 1000) || this.reasonableDelayMs);
|
}, ((crawlerOptions.timeout || 0) * 1000) || this.reasonableDelayMs);
|
||||||
@ -228,7 +228,7 @@ export class SearcherHost extends RPCHost {
|
|||||||
if (earlyReturnTimer) {
|
if (earlyReturnTimer) {
|
||||||
clearTimeout(earlyReturnTimer);
|
clearTimeout(earlyReturnTimer);
|
||||||
}
|
}
|
||||||
chargeAmount = this.getChargeAmount(scrapped);
|
chargeAmount = this.assignChargeAmount(scrapped);
|
||||||
|
|
||||||
return scrapped;
|
return scrapped;
|
||||||
}
|
}
|
||||||
@ -242,7 +242,7 @@ export class SearcherHost extends RPCHost {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!earlyReturn) {
|
if (!earlyReturn) {
|
||||||
chargeAmount = this.getChargeAmount(lastScrapped);
|
chargeAmount = this.assignChargeAmount(lastScrapped);
|
||||||
}
|
}
|
||||||
|
|
||||||
return lastScrapped;
|
return lastScrapped;
|
||||||
@ -257,7 +257,7 @@ export class SearcherHost extends RPCHost {
|
|||||||
if (!lastScrapped) {
|
if (!lastScrapped) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
chargeAmount = this.getChargeAmount(lastScrapped);
|
chargeAmount = this.assignChargeAmount(lastScrapped);
|
||||||
rpcReflect.return(assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null }));
|
rpcReflect.return(assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null }));
|
||||||
earlyReturn = true;
|
earlyReturn = true;
|
||||||
}, ((crawlerOptions.timeout || 0) * 1000) || this.reasonableDelayMs);
|
}, ((crawlerOptions.timeout || 0) * 1000) || this.reasonableDelayMs);
|
||||||
@ -278,7 +278,7 @@ export class SearcherHost extends RPCHost {
|
|||||||
clearTimeout(earlyReturnTimer);
|
clearTimeout(earlyReturnTimer);
|
||||||
}
|
}
|
||||||
|
|
||||||
chargeAmount = this.getChargeAmount(scrapped);
|
chargeAmount = this.assignChargeAmount(scrapped);
|
||||||
|
|
||||||
return assignTransferProtocolMeta(`${scrapped}`, { contentType: 'text/plain', envelope: null });
|
return assignTransferProtocolMeta(`${scrapped}`, { contentType: 'text/plain', envelope: null });
|
||||||
}
|
}
|
||||||
@ -292,7 +292,7 @@ export class SearcherHost extends RPCHost {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!earlyReturn) {
|
if (!earlyReturn) {
|
||||||
chargeAmount = this.getChargeAmount(lastScrapped);
|
chargeAmount = this.assignChargeAmount(lastScrapped);
|
||||||
}
|
}
|
||||||
|
|
||||||
return assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null });
|
return assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null });
|
||||||
@ -423,9 +423,9 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n')}\n` : ''}`;
|
|||||||
return resultArray;
|
return resultArray;
|
||||||
}
|
}
|
||||||
|
|
||||||
getChargeAmount(formatted: FormattedPage[]) {
|
assignChargeAmount(formatted: FormattedPage[]) {
|
||||||
return _.sum(
|
return _.sum(
|
||||||
formatted.map((x) => this.crawler.getChargeAmount(x) || 0)
|
formatted.map((x) => this.crawler.assignChargeAmount(x) || 0)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user