mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-10 21:09:01 +08:00
Update index.test.ts
This commit is contained in:
parent
60245343c9
commit
2eb81545fa
@ -1,96 +1,170 @@
|
||||
import { WebScraperDataProvider } from '../index';
|
||||
import { WebScraperDataProvider } from "../index";
|
||||
|
||||
describe('WebScraperDataProvider', () => {
|
||||
describe('replaceImgPathsWithAbsolutePaths', () => {
|
||||
it('should replace image paths with absolute paths', () => {
|
||||
describe("WebScraperDataProvider", () => {
|
||||
describe("replaceImgPathsWithAbsolutePaths", () => {
|
||||
it("should replace image paths with absolute paths", () => {
|
||||
const webScraperDataProvider = new WebScraperDataProvider();
|
||||
const documents = [
|
||||
{
|
||||
metadata: { sourceURL: 'https://example.com/page' },
|
||||
content: '',
|
||||
metadata: { sourceURL: "https://example.com/page" },
|
||||
content: "",
|
||||
},
|
||||
{
|
||||
metadata: { sourceURL: 'https://example.com/another-page' },
|
||||
content: '',
|
||||
metadata: { sourceURL: "https://example.com/another-page" },
|
||||
content: "",
|
||||
},
|
||||
{
|
||||
metadata: { sourceURL: 'https://example.com/data-image' },
|
||||
content: '',
|
||||
}
|
||||
metadata: { sourceURL: "https://example.com/data-image" },
|
||||
content: "",
|
||||
},
|
||||
];
|
||||
|
||||
const expectedDocuments = [
|
||||
{
|
||||
metadata: { sourceURL: 'https://example.com/page' },
|
||||
content: '',
|
||||
metadata: { sourceURL: "https://example.com/page" },
|
||||
content: "",
|
||||
},
|
||||
{
|
||||
metadata: { sourceURL: 'https://example.com/another-page' },
|
||||
content: '',
|
||||
metadata: { sourceURL: "https://example.com/another-page" },
|
||||
content: "",
|
||||
},
|
||||
{
|
||||
metadata: { sourceURL: 'https://example.com/data-image' },
|
||||
content: '',
|
||||
}
|
||||
metadata: { sourceURL: "https://example.com/data-image" },
|
||||
content: "",
|
||||
},
|
||||
];
|
||||
|
||||
const result = webScraperDataProvider.replaceImgPathsWithAbsolutePaths(documents);
|
||||
const result =
|
||||
webScraperDataProvider.replaceImgPathsWithAbsolutePaths(documents);
|
||||
expect(result).toEqual(expectedDocuments);
|
||||
});
|
||||
|
||||
it('should handle absolute URLs without modification', () => {
|
||||
it("should handle absolute URLs without modification", () => {
|
||||
const webScraperDataProvider = new WebScraperDataProvider();
|
||||
const documents = [
|
||||
{
|
||||
metadata: { sourceURL: 'https://example.com/page' },
|
||||
content: '',
|
||||
metadata: { sourceURL: "https://example.com/page" },
|
||||
content: "",
|
||||
},
|
||||
{
|
||||
metadata: { sourceURL: 'https://example.com/another-page' },
|
||||
content: '',
|
||||
}
|
||||
metadata: { sourceURL: "https://example.com/another-page" },
|
||||
content:
|
||||
"",
|
||||
},
|
||||
];
|
||||
|
||||
const expectedDocuments = [
|
||||
{
|
||||
metadata: { sourceURL: 'https://example.com/page' },
|
||||
content: '',
|
||||
metadata: { sourceURL: "https://example.com/page" },
|
||||
content: "",
|
||||
},
|
||||
{
|
||||
metadata: { sourceURL: 'https://example.com/another-page' },
|
||||
content: '',
|
||||
}
|
||||
metadata: { sourceURL: "https://example.com/another-page" },
|
||||
content:
|
||||
"",
|
||||
},
|
||||
];
|
||||
|
||||
const result = webScraperDataProvider.replaceImgPathsWithAbsolutePaths(documents);
|
||||
const result =
|
||||
webScraperDataProvider.replaceImgPathsWithAbsolutePaths(documents);
|
||||
expect(result).toEqual(expectedDocuments);
|
||||
});
|
||||
|
||||
it('should not replace non-image content within the documents', () => {
|
||||
it("should not replace non-image content within the documents", () => {
|
||||
const webScraperDataProvider = new WebScraperDataProvider();
|
||||
const documents = [
|
||||
{
|
||||
metadata: { sourceURL: 'https://example.com/page' },
|
||||
content: 'This is a test.  Here is a link: [Example](https://example.com).',
|
||||
metadata: { sourceURL: "https://example.com/page" },
|
||||
content:
|
||||
"This is a test.  Here is a link: [Example](https://example.com).",
|
||||
},
|
||||
{
|
||||
metadata: { sourceURL: 'https://example.com/another-page' },
|
||||
content: 'Another test.  Here is some **bold text**.',
|
||||
}
|
||||
metadata: { sourceURL: "https://example.com/another-page" },
|
||||
content:
|
||||
"Another test.  Here is some **bold text**.",
|
||||
},
|
||||
];
|
||||
|
||||
const expectedDocuments = [
|
||||
{
|
||||
metadata: { sourceURL: 'https://example.com/page' },
|
||||
content: 'This is a test.  Here is a link: [Example](https://example.com).',
|
||||
metadata: { sourceURL: "https://example.com/page" },
|
||||
content:
|
||||
"This is a test.  Here is a link: [Example](https://example.com).",
|
||||
},
|
||||
{
|
||||
metadata: { sourceURL: 'https://example.com/another-page' },
|
||||
content: 'Another test.  Here is some **bold text**.',
|
||||
}
|
||||
metadata: { sourceURL: "https://example.com/another-page" },
|
||||
content:
|
||||
"Another test.  Here is some **bold text**.",
|
||||
},
|
||||
];
|
||||
|
||||
const result = webScraperDataProvider.replaceImgPathsWithAbsolutePaths(documents);
|
||||
const result =
|
||||
webScraperDataProvider.replaceImgPathsWithAbsolutePaths(documents);
|
||||
expect(result).toEqual(expectedDocuments);
|
||||
});
|
||||
it("should replace multiple image paths within the documents", () => {
|
||||
const webScraperDataProvider = new WebScraperDataProvider();
|
||||
const documents = [
|
||||
{
|
||||
metadata: { sourceURL: "https://example.com/page" },
|
||||
content:
|
||||
"This is a test.  Here is a link: [Example](https://example.com). ",
|
||||
},
|
||||
{
|
||||
metadata: { sourceURL: "https://example.com/another-page" },
|
||||
content:
|
||||
"Another test.  Here is some **bold text**. ",
|
||||
},
|
||||
];
|
||||
|
||||
const expectedDocuments = [
|
||||
{
|
||||
metadata: { sourceURL: "https://example.com/page" },
|
||||
content:
|
||||
"This is a test.  Here is a link: [Example](https://example.com). ",
|
||||
},
|
||||
{
|
||||
metadata: { sourceURL: "https://example.com/another-page" },
|
||||
content:
|
||||
"Another test.  Here is some **bold text**. ",
|
||||
},
|
||||
];
|
||||
|
||||
const result =
|
||||
webScraperDataProvider.replaceImgPathsWithAbsolutePaths(documents);
|
||||
expect(result).toEqual(expectedDocuments);
|
||||
});
|
||||
|
||||
it("should replace image paths within the documents with complex URLs", () => {
|
||||
const webScraperDataProvider = new WebScraperDataProvider();
|
||||
const documents = [
|
||||
{
|
||||
metadata: { sourceURL: "https://example.com/page/subpage" },
|
||||
content:
|
||||
"This is a test.  Here is a link: [Example](https://example.com). ",
|
||||
},
|
||||
{
|
||||
metadata: { sourceURL: "https://example.com/another-page/subpage" },
|
||||
content:
|
||||
"Another test.  Here is some **bold text**. ",
|
||||
},
|
||||
];
|
||||
|
||||
const expectedDocuments = [
|
||||
{
|
||||
metadata: { sourceURL: "https://example.com/page/subpage" },
|
||||
content:
|
||||
"This is a test.  Here is a link: [Example](https://example.com). ",
|
||||
},
|
||||
{
|
||||
metadata: { sourceURL: "https://example.com/another-page/subpage" },
|
||||
content:
|
||||
"Another test.  Here is some **bold text**. ",
|
||||
},
|
||||
];
|
||||
|
||||
const result =
|
||||
webScraperDataProvider.replaceImgPathsWithAbsolutePaths(documents);
|
||||
expect(result).toEqual(expectedDocuments);
|
||||
});
|
||||
});
|
||||
|
Loading…
x
Reference in New Issue
Block a user