mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-15 14:56:01 +08:00
fix: respond with markdown
This commit is contained in:
parent
69231ad59e
commit
a6a5b7c530
@ -74,7 +74,7 @@ export class CrawlerHost extends RPCHost {
|
|||||||
return turnDownService;
|
return turnDownService;
|
||||||
}
|
}
|
||||||
|
|
||||||
async formatSnapshot(mode: string | 'markdown' | 'full-markdown' | 'html' | 'text' | 'screenshot', snapshot: PageSnapshot & {
|
async formatSnapshot(mode: string | 'markdown' | 'html' | 'text' | 'screenshot', snapshot: PageSnapshot & {
|
||||||
screenshotUrl?: string;
|
screenshotUrl?: string;
|
||||||
}, nominalUrl?: URL) {
|
}, nominalUrl?: URL) {
|
||||||
if (mode === 'screenshot') {
|
if (mode === 'screenshot') {
|
||||||
@ -112,8 +112,8 @@ export class CrawlerHost extends RPCHost {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
const toBeTurnedToMd = mode === 'full-markdown' ? snapshot.html : snapshot.parsed?.content;
|
const toBeTurnedToMd = mode === 'markdown' ? snapshot.html : snapshot.parsed?.content;
|
||||||
let turnDownService = mode === 'markdown' ? this.getTurndown('without any rule') : this.getTurndown();
|
let turnDownService = mode === 'markdown' ? this.getTurndown() : this.getTurndown('without any rule');
|
||||||
for (const plugin of this.turnDownPlugins) {
|
for (const plugin of this.turnDownPlugins) {
|
||||||
turnDownService = turnDownService.use(plugin);
|
turnDownService = turnDownService.use(plugin);
|
||||||
}
|
}
|
||||||
@ -198,7 +198,7 @@ export class CrawlerHost extends RPCHost {
|
|||||||
mixins.push(`Published Time: ${this.publishedTime}`);
|
mixins.push(`Published Time: ${this.publishedTime}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mode === 'full-markdown') {
|
if (mode === 'markdown') {
|
||||||
return this.content;
|
return this.content;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -253,14 +253,12 @@ ${this.content}
|
|||||||
schema: { type: 'string' }
|
schema: { type: 'string' }
|
||||||
},
|
},
|
||||||
'X-Respond-With': {
|
'X-Respond-With': {
|
||||||
description: `Specifies the form factor of the crawled data you prefer. \n\n` +
|
description: `Specifies the (non-default) form factor of the crawled data you prefer. \n\n` +
|
||||||
`Supported formats:\n` +
|
`Supported formats:\n` +
|
||||||
`- markdown\n` +
|
`- markdown\n` +
|
||||||
`- full-markdown\n` +
|
|
||||||
`- html\n` +
|
`- html\n` +
|
||||||
`- text\n` +
|
`- text\n` +
|
||||||
`- screenshot\n\n` +
|
`- screenshot\n`
|
||||||
`Defaults to: markdown`
|
|
||||||
,
|
,
|
||||||
in: 'header',
|
in: 'header',
|
||||||
schema: { type: 'string' }
|
schema: { type: 'string' }
|
||||||
@ -322,7 +320,7 @@ ${this.content}
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
const customMode = ctx.req.get('x-respond-with') || 'markdown';
|
const customMode = ctx.req.get('x-respond-with') || 'default';
|
||||||
const noCache = Boolean(ctx.req.get('x-no-cache'));
|
const noCache = Boolean(ctx.req.get('x-no-cache'));
|
||||||
const cookies: CookieParam[] = [];
|
const cookies: CookieParam[] = [];
|
||||||
const setCookieHeaders = ctx.req.headers['x-set-cookie'];
|
const setCookieHeaders = ctx.req.headers['x-set-cookie'];
|
||||||
|
Loading…
x
Reference in New Issue
Block a user