mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 16:28:59 +08:00
Merge pull request #1018 from mendableai/feat/add-favicon-metadata
[FIR-37] feat: extract and return favicon URL during scraping
This commit is contained in:
commit
05d5f84d87
@ -8,6 +8,7 @@ export function extractMetadata(
|
|||||||
): Partial<Document["metadata"]> {
|
): Partial<Document["metadata"]> {
|
||||||
let title: string | undefined = undefined;
|
let title: string | undefined = undefined;
|
||||||
let description: string | undefined = undefined;
|
let description: string | undefined = undefined;
|
||||||
|
let favicon: string | undefined = undefined;
|
||||||
let language: string | undefined = undefined;
|
let language: string | undefined = undefined;
|
||||||
let keywords: string | undefined = undefined;
|
let keywords: string | undefined = undefined;
|
||||||
let robots: string | undefined = undefined;
|
let robots: string | undefined = undefined;
|
||||||
@ -42,6 +43,12 @@ export function extractMetadata(
|
|||||||
try {
|
try {
|
||||||
title = soup("title").first().text().trim() || undefined;
|
title = soup("title").first().text().trim() || undefined;
|
||||||
description = soup('meta[name="description"]').attr("content") || undefined;
|
description = soup('meta[name="description"]').attr("content") || undefined;
|
||||||
|
|
||||||
|
const faviconLink = soup('link[rel="icon"]').attr("href") || soup('link[rel*="icon"]').first().attr("href") || undefined;
|
||||||
|
if (faviconLink) {
|
||||||
|
const baseUrl = new URL(meta.url).origin;
|
||||||
|
favicon = faviconLink.startsWith('http') ? faviconLink : `${baseUrl}${faviconLink}`;
|
||||||
|
}
|
||||||
|
|
||||||
// Assuming the language is part of the URL as per the regex pattern
|
// Assuming the language is part of the URL as per the regex pattern
|
||||||
language = soup("html").attr("lang") || undefined;
|
language = soup("html").attr("lang") || undefined;
|
||||||
@ -121,6 +128,7 @@ export function extractMetadata(
|
|||||||
return {
|
return {
|
||||||
title,
|
title,
|
||||||
description,
|
description,
|
||||||
|
favicon,
|
||||||
language,
|
language,
|
||||||
keywords,
|
keywords,
|
||||||
robots,
|
robots,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user