This commit is contained in:
yanlong.wang 2024-04-10 19:43:53 +08:00
parent 89d6d49f06
commit b46e859a30
No known key found for this signature in database
GPG Key ID: C0A623C0BADF9F37
2 changed files with 5 additions and 9 deletions

View File

@ -41,7 +41,6 @@ export class CrawlerHost extends RPCHost {
try { try {
for await (const scrapped of this.puppeteerControl.scrap(url)) { for await (const scrapped of this.puppeteerControl.scrap(url)) {
this.logger.info(`Scrapped: ${scrapped.snapshot}`);
const content = typeof scrapped.snapshot === 'string' ? scrapped.snapshot : (scrapped.snapshot as any)?.content; const content = typeof scrapped.snapshot === 'string' ? scrapped.snapshot : (scrapped.snapshot as any)?.content;
if (!content) { if (!content) {
continue; continue;

View File

@ -42,7 +42,7 @@ export class PuppeteerControl extends AsyncService {
await this.browser.close(); await this.browser.close();
} }
this.browser = await puppeteer.launch({ this.browser = await puppeteer.launch({
headless: false, headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'], args: ['--no-sandbox', '--disable-setuid-sandbox'],
}); });
this.browser.once('disconnected', () => { this.browser.once('disconnected', () => {
@ -67,8 +67,7 @@ export class PuppeteerControl extends AsyncService {
await page.evaluateOnNewDocument(READABILITY_JS); await page.evaluateOnNewDocument(READABILITY_JS);
await page.evaluateOnNewDocument(() => { await page.evaluateOnNewDocument(() => {
// @ts-expect-error function giveSnapshot() {
window.giveSnapshot() = () => {
// @ts-expect-error // @ts-expect-error
return new Readability(document.cloneNode(true)).parse(); return new Readability(document.cloneNode(true)).parse();
}; };
@ -79,9 +78,7 @@ export class PuppeteerControl extends AsyncService {
return; return;
} }
// @ts-expect-error const parsed = giveSnapshot();
const parsed = window.giveSnapshot();
console.log(parsed);
if (parsed) { if (parsed) {
// @ts-expect-error // @ts-expect-error
window.reportSnapshot(parsed); window.reportSnapshot(parsed);
@ -91,7 +88,7 @@ export class PuppeteerControl extends AsyncService {
} }
aftershot = setTimeout(() => { aftershot = setTimeout(() => {
// @ts-expect-error // @ts-expect-error
window.reportSnapshot(window.giveSnapshot()); window.reportSnapshot(giveSnapshot());
}, 500); }, 500);
} }
}; };
@ -130,7 +127,7 @@ export class PuppeteerControl extends AsyncService {
const screenshot = await page.screenshot(); const screenshot = await page.screenshot();
if (finalized) { if (finalized) {
await gotoPromise; await gotoPromise;
snapshot = await page.evaluate('window.giveSnapshot()'); snapshot = await page.evaluate('new Readability(document.cloneNode(true)).parse()');
yield { snapshot, screenshot }; yield { snapshot, screenshot };
break; break;
} }