@@ -50,7 +56,7 @@ const Panel: FC
= ({
{t(`common.dataSource.${type}.title`)}
{isWebsite && (
- {t('common.dataSource.website.with')} { provider === DataSourceProvider.fireCrawl ? '🔥 Firecrawl' : 'Jina Reader'}
+ {t('common.dataSource.website.with')} {getProviderName()}
)}
diff --git a/web/i18n/en-US/dataset-creation.ts b/web/i18n/en-US/dataset-creation.ts
index 72eb44c3de..c55a939ea3 100644
--- a/web/i18n/en-US/dataset-creation.ts
+++ b/web/i18n/en-US/dataset-creation.ts
@@ -15,6 +15,11 @@ const translation = {
apiKeyPlaceholder: 'API key from firecrawl.dev',
getApiKeyLinkText: 'Get your API key from firecrawl.dev',
},
+ watercrawl: {
+ configWatercrawl: 'Configure Watercrawl',
+ apiKeyPlaceholder: 'API key from watercrawl.dev',
+ getApiKeyLinkText: 'Get your API key from watercrawl.dev',
+ },
jinaReader: {
configJinaReader: 'Configure Jina Reader',
apiKeyPlaceholder: 'API key from jina.ai',
@@ -64,15 +69,21 @@ const translation = {
chooseProvider: 'Select a provider',
fireCrawlNotConfigured: 'Firecrawl is not configured',
fireCrawlNotConfiguredDescription: 'Configure Firecrawl with API key to use it.',
+ watercrawlNotConfigured: 'Watercrawl is not configured',
+ watercrawlNotConfiguredDescription: 'Configure Watercrawl with API key to use it.',
jinaReaderNotConfigured: 'Jina Reader is not configured',
jinaReaderNotConfiguredDescription: 'Set up Jina Reader by entering your free API key for access.',
configure: 'Configure',
configureFirecrawl: 'Configure Firecrawl',
+ configureWatercrawl: 'Configure Watercrawl',
configureJinaReader: 'Configure Jina Reader',
run: 'Run',
firecrawlTitle: 'Extract web content with 🔥Firecrawl',
firecrawlDoc: 'Firecrawl docs',
firecrawlDocLink: 'https://docs.dify.ai/guides/knowledge-base/sync-from-website',
+ watercrawlTitle: 'Extract web content with Watercrawl',
+ watercrawlDoc: 'Watercrawl docs',
+ watercrawlDocLink: 'https://docs.dify.ai/guides/knowledge-base/sync-from-website',
jinaReaderTitle: 'Convert the entire site to Markdown',
jinaReaderDoc: 'Learn more about Jina Reader',
jinaReaderDocLink: 'https://jina.ai/reader',
diff --git a/web/models/common.ts b/web/models/common.ts
index 4086220e2e..0ee164aad8 100644
--- a/web/models/common.ts
+++ b/web/models/common.ts
@@ -178,6 +178,7 @@ export enum DataSourceCategory {
export enum DataSourceProvider {
fireCrawl = 'firecrawl',
jinaReader = 'jinareader',
+ waterCrawl = 'watercrawl',
}
export type FirecrawlConfig = {
@@ -185,6 +186,11 @@ export type FirecrawlConfig = {
base_url: string
}
+export type WatercrawlConfig = {
+ api_key: string
+ base_url: string
+}
+
export type DataSourceItem = {
id: string
category: DataSourceCategory
diff --git a/web/service/datasets.ts b/web/service/datasets.ts
index 53b55b375b..f9edb2eeaf 100644
--- a/web/service/datasets.ts
+++ b/web/service/datasets.ts
@@ -253,6 +253,25 @@ export const checkJinaReaderTaskStatus: Fetcher
= (jobId
})
}
+export const createWatercrawlTask: Fetcher> = (body) => {
+ return post('website/crawl', {
+ body: {
+ ...body,
+ provider: DataSourceProvider.waterCrawl,
+ },
+ })
+}
+
+export const checkWatercrawlTaskStatus: Fetcher = (jobId: string) => {
+ return get(`website/crawl/status/${jobId}`, {
+ params: {
+ provider: DataSourceProvider.waterCrawl,
+ },
+ }, {
+ silent: true,
+ })
+}
+
type FileTypesRes = {
allowed_extensions: string[]
}