mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-12 03:39:01 +08:00
feat: add switches for jina firecrawl watercrawl (#18153)
This commit is contained in:
parent
b247ef85bf
commit
e1455cecd8
@ -174,6 +174,12 @@ CELERY_MIN_WORKERS=
|
||||
API_TOOL_DEFAULT_CONNECT_TIMEOUT=10
|
||||
API_TOOL_DEFAULT_READ_TIMEOUT=60
|
||||
|
||||
# -------------------------------
|
||||
# Datasource Configuration
|
||||
# --------------------------------
|
||||
ENABLE_WEBSITE_JINAREADER=true
|
||||
ENABLE_WEBSITE_FIRECRAWL=true
|
||||
ENABLE_WEBSITE_WATERCRAWL=true
|
||||
|
||||
# ------------------------------
|
||||
# Database Configuration
|
||||
|
@ -75,7 +75,9 @@ services:
|
||||
MAX_TOOLS_NUM: ${MAX_TOOLS_NUM:-10}
|
||||
MAX_PARALLEL_LIMIT: ${MAX_PARALLEL_LIMIT:-10}
|
||||
MAX_ITERATIONS_NUM: ${MAX_ITERATIONS_NUM:-5}
|
||||
|
||||
ENABLE_WEBSITE_JINAREADER: ${ENABLE_WEBSITE_JINAREADER:-true}
|
||||
ENABLE_WEBSITE_FIRECRAWL: ${ENABLE_WEBSITE_FIRECRAWL:-true}
|
||||
ENABLE_WEBSITE_WATERCRAWL: ${ENABLE_WEBSITE_WATERCRAWL:-true}
|
||||
# The postgres database.
|
||||
db:
|
||||
image: postgres:15-alpine
|
||||
|
@ -43,6 +43,9 @@ x-shared-env: &shared-api-worker-env
|
||||
CELERY_MIN_WORKERS: ${CELERY_MIN_WORKERS:-}
|
||||
API_TOOL_DEFAULT_CONNECT_TIMEOUT: ${API_TOOL_DEFAULT_CONNECT_TIMEOUT:-10}
|
||||
API_TOOL_DEFAULT_READ_TIMEOUT: ${API_TOOL_DEFAULT_READ_TIMEOUT:-60}
|
||||
ENABLE_WEBSITE_JINAREADER: ${ENABLE_WEBSITE_JINAREADER:-true}
|
||||
ENABLE_WEBSITE_FIRECRAWL: ${ENABLE_WEBSITE_FIRECRAWL:-true}
|
||||
ENABLE_WEBSITE_WATERCRAWL: ${ENABLE_WEBSITE_WATERCRAWL:-true}
|
||||
DB_USERNAME: ${DB_USERNAME:-postgres}
|
||||
DB_PASSWORD: ${DB_PASSWORD:-difyai123456}
|
||||
DB_HOST: ${DB_HOST:-db}
|
||||
@ -543,7 +546,9 @@ services:
|
||||
MAX_TOOLS_NUM: ${MAX_TOOLS_NUM:-10}
|
||||
MAX_PARALLEL_LIMIT: ${MAX_PARALLEL_LIMIT:-10}
|
||||
MAX_ITERATIONS_NUM: ${MAX_ITERATIONS_NUM:-5}
|
||||
|
||||
ENABLE_WEBSITE_JINAREADER: ${ENABLE_WEBSITE_JINAREADER:-true}
|
||||
ENABLE_WEBSITE_FIRECRAWL: ${ENABLE_WEBSITE_FIRECRAWL:-true}
|
||||
ENABLE_WEBSITE_WATERCRAWL: ${ENABLE_WEBSITE_WATERCRAWL:-true}
|
||||
# The postgres database.
|
||||
db:
|
||||
image: postgres:15-alpine
|
||||
|
@ -49,3 +49,8 @@ NEXT_PUBLIC_MAX_PARALLEL_LIMIT=10
|
||||
|
||||
# The maximum number of iterations for agent setting
|
||||
NEXT_PUBLIC_MAX_ITERATIONS_NUM=5
|
||||
|
||||
NEXT_PUBLIC_ENABLE_WEBSITE_JINAREADER=true
|
||||
NEXT_PUBLIC_ENABLE_WEBSITE_FIRECRAWL=true
|
||||
NEXT_PUBLIC_ENABLE_WEBSITE_WATERCRAWL=true
|
||||
|
||||
|
@ -20,7 +20,7 @@ import { useProviderContext } from '@/context/provider-context'
|
||||
import VectorSpaceFull from '@/app/components/billing/vector-space-full'
|
||||
import classNames from '@/utils/classnames'
|
||||
import { Icon3Dots } from '@/app/components/base/icons/src/vender/line/others'
|
||||
|
||||
import { ENABLE_WEBSITE_FIRECRAWL, ENABLE_WEBSITE_JINAREADER, ENABLE_WEBSITE_WATERCRAWL } from '@/config'
|
||||
type IStepOneProps = {
|
||||
datasetId?: string
|
||||
dataSourceType?: DataSourceType
|
||||
@ -126,9 +126,7 @@ const StepOne = ({
|
||||
return true
|
||||
if (files.some(file => !file.file.id))
|
||||
return true
|
||||
if (isShowVectorSpaceFull)
|
||||
return true
|
||||
return false
|
||||
return isShowVectorSpaceFull
|
||||
}, [files, isShowVectorSpaceFull])
|
||||
|
||||
return (
|
||||
@ -193,7 +191,8 @@ const StepOne = ({
|
||||
{t('datasetCreation.stepOne.dataSourceType.notion')}
|
||||
</span>
|
||||
</div>
|
||||
<div
|
||||
{(ENABLE_WEBSITE_FIRECRAWL || ENABLE_WEBSITE_JINAREADER || ENABLE_WEBSITE_WATERCRAWL) && (
|
||||
<div
|
||||
className={cn(
|
||||
s.dataSourceItem,
|
||||
'system-sm-medium',
|
||||
@ -201,7 +200,7 @@ const StepOne = ({
|
||||
dataSourceTypeDisable && dataSourceType !== DataSourceType.WEB && s.disabled,
|
||||
)}
|
||||
onClick={() => changeType(DataSourceType.WEB)}
|
||||
>
|
||||
>
|
||||
<span className={cn(s.datasetIcon, s.web)} />
|
||||
<span
|
||||
title={t('datasetCreation.stepOne.dataSourceType.web')}
|
||||
@ -209,7 +208,8 @@ const StepOne = ({
|
||||
>
|
||||
{t('datasetCreation.stepOne.dataSourceType.web')}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
@ -12,6 +12,7 @@ import { useModalContext } from '@/context/modal-context'
|
||||
import type { CrawlOptions, CrawlResultItem } from '@/models/datasets'
|
||||
import { fetchDataSources } from '@/service/datasets'
|
||||
import { type DataSourceItem, DataSourceProvider } from '@/models/common'
|
||||
import { ENABLE_WEBSITE_FIRECRAWL, ENABLE_WEBSITE_JINAREADER, ENABLE_WEBSITE_WATERCRAWL } from '@/config'
|
||||
|
||||
type Props = {
|
||||
onPreview: (payload: CrawlResultItem) => void
|
||||
@ -84,7 +85,7 @@ const Website: FC<Props> = ({
|
||||
{t('datasetCreation.stepOne.website.chooseProvider')}
|
||||
</div>
|
||||
<div className="flex space-x-2">
|
||||
<button
|
||||
{ENABLE_WEBSITE_JINAREADER && <button
|
||||
className={cn('flex items-center justify-center rounded-lg px-4 py-2',
|
||||
selectedProvider === DataSourceProvider.jinaReader
|
||||
? 'system-sm-medium border-[1.5px] border-components-option-card-option-selected-border bg-components-option-card-option-selected-bg text-text-primary'
|
||||
@ -95,8 +96,8 @@ const Website: FC<Props> = ({
|
||||
>
|
||||
<span className={cn(s.jinaLogo, 'mr-2')}/>
|
||||
<span>Jina Reader</span>
|
||||
</button>
|
||||
<button
|
||||
</button>}
|
||||
{ENABLE_WEBSITE_FIRECRAWL && <button
|
||||
className={cn('rounded-lg px-4 py-2',
|
||||
selectedProvider === DataSourceProvider.fireCrawl
|
||||
? 'system-sm-medium border-[1.5px] border-components-option-card-option-selected-border bg-components-option-card-option-selected-bg text-text-primary'
|
||||
@ -106,8 +107,8 @@ const Website: FC<Props> = ({
|
||||
onClick={() => setSelectedProvider(DataSourceProvider.fireCrawl)}
|
||||
>
|
||||
🔥 Firecrawl
|
||||
</button>
|
||||
<button
|
||||
</button>}
|
||||
{ENABLE_WEBSITE_WATERCRAWL && <button
|
||||
className={cn('flex items-center justify-center rounded-lg px-4 py-2',
|
||||
selectedProvider === DataSourceProvider.waterCrawl
|
||||
? 'system-sm-medium border-[1.5px] border-components-option-card-option-selected-border bg-components-option-card-option-selected-bg text-text-primary'
|
||||
@ -118,7 +119,7 @@ const Website: FC<Props> = ({
|
||||
>
|
||||
<span className={cn(s.watercrawlLogo, 'mr-2')}/>
|
||||
<span>WaterCrawl</span>
|
||||
</button>
|
||||
</button>}
|
||||
</div>
|
||||
</div>
|
||||
{source && selectedProvider === DataSourceProvider.fireCrawl && (
|
||||
|
@ -6,6 +6,7 @@ import s from './index.module.css'
|
||||
import { Icon3Dots } from '@/app/components/base/icons/src/vender/line/others'
|
||||
import Button from '@/app/components/base/button'
|
||||
import { DataSourceProvider } from '@/models/common'
|
||||
import { ENABLE_WEBSITE_FIRECRAWL, ENABLE_WEBSITE_JINAREADER, ENABLE_WEBSITE_WATERCRAWL } from '@/config'
|
||||
|
||||
const I18N_PREFIX = 'datasetCreation.stepOne.website'
|
||||
|
||||
@ -16,29 +17,30 @@ type Props = {
|
||||
|
||||
const NoData: FC<Props> = ({
|
||||
onConfig,
|
||||
provider,
|
||||
}) => {
|
||||
const { t } = useTranslation()
|
||||
|
||||
const providerConfig = {
|
||||
[DataSourceProvider.jinaReader]: {
|
||||
[DataSourceProvider.jinaReader]: ENABLE_WEBSITE_JINAREADER ? {
|
||||
emoji: <span className={s.jinaLogo} />,
|
||||
title: t(`${I18N_PREFIX}.jinaReaderNotConfigured`),
|
||||
description: t(`${I18N_PREFIX}.jinaReaderNotConfiguredDescription`),
|
||||
},
|
||||
[DataSourceProvider.fireCrawl]: {
|
||||
} : null,
|
||||
[DataSourceProvider.fireCrawl]: ENABLE_WEBSITE_FIRECRAWL ? {
|
||||
emoji: '🔥',
|
||||
title: t(`${I18N_PREFIX}.fireCrawlNotConfigured`),
|
||||
description: t(`${I18N_PREFIX}.fireCrawlNotConfiguredDescription`),
|
||||
},
|
||||
[DataSourceProvider.waterCrawl]: {
|
||||
emoji: <span className={s.watercrawlLogo} />,
|
||||
} : null,
|
||||
[DataSourceProvider.waterCrawl]: ENABLE_WEBSITE_WATERCRAWL ? {
|
||||
emoji: '💧',
|
||||
title: t(`${I18N_PREFIX}.waterCrawlNotConfigured`),
|
||||
description: t(`${I18N_PREFIX}.waterCrawlNotConfiguredDescription`),
|
||||
},
|
||||
} : null,
|
||||
}
|
||||
|
||||
const currentProvider = providerConfig[provider]
|
||||
const currentProvider = Object.values(providerConfig).find(provider => provider !== null) || providerConfig[DataSourceProvider.jinaReader]
|
||||
|
||||
if (!currentProvider) return null
|
||||
|
||||
return (
|
||||
<>
|
||||
|
@ -3,6 +3,7 @@ import DataSourceNotion from './data-source-notion'
|
||||
import DataSourceWebsite from './data-source-website'
|
||||
import { fetchDataSource } from '@/service/common'
|
||||
import { DataSourceProvider } from '@/models/common'
|
||||
import { ENABLE_WEBSITE_FIRECRAWL, ENABLE_WEBSITE_JINAREADER, ENABLE_WEBSITE_WATERCRAWL } from '@/config'
|
||||
|
||||
export default function DataSourcePage() {
|
||||
const { data } = useSWR({ url: 'data-source/integrates' }, fetchDataSource)
|
||||
@ -11,9 +12,9 @@ export default function DataSourcePage() {
|
||||
return (
|
||||
<div className='mb-8'>
|
||||
<DataSourceNotion workspaces={notionWorkspaces} />
|
||||
<DataSourceWebsite provider={DataSourceProvider.jinaReader} />
|
||||
<DataSourceWebsite provider={DataSourceProvider.fireCrawl} />
|
||||
<DataSourceWebsite provider={DataSourceProvider.waterCrawl} />
|
||||
{ENABLE_WEBSITE_JINAREADER && <DataSourceWebsite provider={DataSourceProvider.jinaReader} />}
|
||||
{ENABLE_WEBSITE_FIRECRAWL && <DataSourceWebsite provider={DataSourceProvider.fireCrawl} />}
|
||||
{ENABLE_WEBSITE_WATERCRAWL && <DataSourceWebsite provider={DataSourceProvider.waterCrawl} />}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
@ -302,3 +302,15 @@ else if (globalThis.document?.body?.getAttribute('data-public-max-iterations-num
|
||||
maxIterationsNum = Number.parseInt(globalThis.document.body.getAttribute('data-public-max-iterations-num') as string)
|
||||
|
||||
export const MAX_ITERATIONS_NUM = maxIterationsNum
|
||||
|
||||
export const ENABLE_WEBSITE_JINAREADER = process.env.NEXT_PUBLIC_ENABLE_WEBSITE_JINAREADER !== undefined
|
||||
? process.env.NEXT_PUBLIC_ENABLE_WEBSITE_JINAREADER === 'true'
|
||||
: true
|
||||
|
||||
export const ENABLE_WEBSITE_FIRECRAWL = process.env.NEXT_PUBLIC_ENABLE_WEBSITE_FIRECRAWL !== undefined
|
||||
? process.env.NEXT_PUBLIC_ENABLE_WEBSITE_FIRECRAWL === 'true'
|
||||
: true
|
||||
|
||||
export const ENABLE_WEBSITE_WATERCRAWL = process.env.NEXT_PUBLIC_ENABLE_WEBSITE_WATERCRAWL !== undefined
|
||||
? process.env.NEXT_PUBLIC_ENABLE_WEBSITE_WATERCRAWL === 'true'
|
||||
: true
|
||||
|
@ -28,5 +28,7 @@ export NEXT_PUBLIC_CSP_WHITELIST=${CSP_WHITELIST}
|
||||
export NEXT_PUBLIC_TOP_K_MAX_VALUE=${TOP_K_MAX_VALUE}
|
||||
export NEXT_PUBLIC_INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH=${INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH}
|
||||
export NEXT_PUBLIC_MAX_TOOLS_NUM=${MAX_TOOLS_NUM}
|
||||
|
||||
export NEXT_PUBLIC_ENABLE_WEBSITE_JINAREADER=${ENABLE_WEBSITE_JINAREADER:-true}
|
||||
export NEXT_PUBLIC_ENABLE_WEBSITE_FIRECRAWL=${ENABLE_WEBSITE_FIRECRAWL:-true}
|
||||
export NEXT_PUBLIC_ENABLE_WEBSITE_WATERCRAWL=${ENABLE_WEBSITE_WATERCRAWL:-true}
|
||||
pm2 start /app/web/server.js --name dify-web --cwd /app/web -i ${PM2_INSTANCES} --no-daemon
|
||||
|
Loading…
x
Reference in New Issue
Block a user